No OneTemporary
Actions

Size

678 KB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	diff --git a/.gitignore b/.gitignore
	index 7e046a0204..89bbad891a 100644
	--- a/.gitignore
	+++ b/.gitignore
	@@ -1,359 +1,361 @@
	#
	-# Copyright 2011-2023 the Pacemaker project contributors
	+# Copyright 2011-2025 the Pacemaker project contributors
	#
	# The version control history for this file may have further details.
	#
	# This source code is licensed under the GNU General Public License version 2
	# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	# Common conventions for files that should be ignored
	*~
	*.bz2
	*.diff
	*.orig
	*.patch
	*.rej
	*.sed
	*.swp
	*.tar.gz
	*.tgz
	\#*
	.\#*
	logs

	# libtool artifacts
	*.la
	*.lo
	.libs
	libltdl
	libtool
	libtool.m4
	ltdl.m4
	/m4/argz.m4
	/m4/ltargz.m4
	/m4/ltoptions.m4
	/m4/ltsugar.m4
	/m4/ltversion.m4
	/m4/lt~obsolete.m4

	# autotools artifacts
	.deps
	.dirstamp
	Makefile
	Makefile.in
	aclocal.m4
	autoconf
	autoheader
	autom4te.cache/
	automake
	/confdefs.h
	config.log
	config.status
	configure
	/conftest*

	# gettext artifacts
	/ABOUT-NLS
	/m4/codeset.m4
	/m4/fcntl-o.m4
	/m4/gettext.m4
	/m4/glibc2.m4
	/m4/glibc21.m4
	/m4/iconv.m4
	/m4/intdiv0.m4
	/m4/intl.m4
	/m4/intldir.m4
	/m4/intlmacosx.m4
	/m4/intmax.m4
	/m4/inttypes-pri.m4
	/m4/inttypes_h.m4
	/m4/lcmessage.m4
	/m4/lib-ld.m4
	/m4/lib-link.m4
	/m4/lib-prefix.m4
	/m4/lock.m4
	/m4/longlong.m4
	/m4/nls.m4
	/m4/po.m4
	/m4/printf-posix.m4
	/m4/progtest.m4
	/m4/size_max.m4
	/m4/stdint_h.m4
	/m4/threadlib.m4
	/m4/uintmax_t.m4
	/m4/visibility.m4
	/m4/wchar_t.m4
	/m4/wint_t.m4
	/m4/xsize.m4
	/po/*.gmo
	/po/*.header
	/po/*.pot
	/po/*.sin
	/po/Makefile.in.in
	/po/Makevars.template
	/po/POTFILES
	/po/Rules-quot
	/po/stamp-po

	# configure targets
	/agents/ocf/ClusterMon
	/agents/ocf/Dummy
	/agents/ocf/HealthCPU
	/agents/ocf/HealthIOWait
	/agents/ocf/HealthSMART
	/agents/ocf/Stateful
	/agents/ocf/SysInfo
	/agents/ocf/attribute
	/agents/ocf/controld
	/agents/ocf/ifspeed
	/agents/ocf/o2cb
	/agents/ocf/ping
	/agents/ocf/remote
	/agents/stonith/fence_legacy
	/agents/stonith/fence_watchdog
	/cts/benchmark/clubench
	/cts/cluster_test
	/cts/cts
	/cts/cts-attrd
	/cts/cts-cli
	/cts/cts-exec
	/cts/cts-fencing
	/cts/cts-lab
	/cts/cts-log-watcher
	/cts/cts-regression
	/cts/cts-scheduler
	/cts/lab/CTS.py
	/cts/support/LSBDummy
	/cts/support/cts-support
	/cts/support/fence_dummy
	/cts/support/pacemaker-cts-dummyd
	/cts/support/pacemaker-cts-dummyd@.service
	/daemons/execd/pacemaker_remote
	/daemons/execd/pacemaker_remote.service
	/daemons/fenced/fence_legacy
	/daemons/fenced/fence_watchdog
	/daemons/pacemakerd/pacemaker.combined.upstart
	/daemons/pacemakerd/pacemaker.service
	/daemons/pacemakerd/pacemaker.upstart
	/doc/Doxyfile
	/etc/init.d/pacemaker
	/etc/logrotate.d/pacemaker
	/etc/sysconfig/pacemaker
	/include/config.h
	/include/config.h.in
	/include/crm_config.h
	/maint/bumplibs
	/python/pacemaker/buildoptions.py
	/python/setup.py
	/tools/cluster-clean
	/tools/cluster-helper
	/tools/cluster-init
	/tools/cibsecret
	/tools/crm_error
	/tools/crm_failcount
	/tools/crm_master
	/tools/crm_mon.service
	/tools/crm_mon.upstart
	/tools/crm_report
	/tools/crm_rule
	/tools/crm_standby
	/tools/pcmk_simtimes
	/tools/report.collector
	/tools/report.common
	/xml/rng-helper

	# Compiled targets and intermediary files
	*.o
	*.pc
	*.pyc
	/daemons/attrd/pacemaker-attrd
	/daemons/based/pacemaker-based
	/daemons/controld/pacemaker-controld
	/daemons/execd/cts-exec-helper
	/daemons/execd/pacemaker-execd
	/daemons/execd/pacemaker-remoted
	/daemons/fenced/cts-fence-helper
	/daemons/fenced/pacemaker-fenced
	/daemons/pacemakerd/pacemakerd
	/daemons/schedulerd/pacemaker-schedulerd
	/devel/scratch
	/lib/gnu/stdalign.h
	/tools/attrd_updater
	/tools/cibadmin
	/tools/crmadmin
	/tools/crm_attribute
	/tools/crm_diff
	/tools/crm_mon
	/tools/crm_node
	/tools/crm_resource
	/tools/crm_shadow
	/tools/crm_simulate
	/tools/crm_ticket
	/tools/crm_verify
	/tools/iso8601
	/tools/stonith_admin

	# Generated XML schema files
	/xml/crm_mon.rng
	/xml/pacemaker*.rng
	/xml/versions.rng
	/xml/api/api-result*.rng

	# Working directories for make dist and make export
	/pacemaker-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]

	# Documentation build targets and intermediary files
	*.7
	*.7.xml
	*.7.html
	*.8
	*.8.xml
	*.8.html
	GPATH
	GRTAGS
	GTAGS
	TAGS
	/daemons/fenced/pacemaker-fenced.xml
	/daemons/schedulerd/pacemaker-schedulerd.xml
	/doc/.ABI-build
	/doc/HTML
	/doc/abi_dumps
	/doc/abi-check
	/doc/api/
	/doc/compat_reports
	/doc/crm_fencing.html
	/doc/sphinx/*/_build
	/doc/sphinx/*/conf.py
	/doc/sphinx/*/generated
	-/doc/sphinx/build-2.1.txt
	+/doc/sphinx/build-[0-9]*.txt
	/doc/sphinx/shared/images/*.png

	# Test artifacts (from unit tests, regression tests, static analysis, etc.)
	*.coverity
	*.gcda
	*.gcno
	coverity-*
	pacemaker_*.info
	/coverage
	/cppcheck.out
	/cts/scheduler/*.ref
	/cts/scheduler/*.up
	/cts/scheduler/*.up.err
	/cts/scheduler/bug-rh-1097457.log
	/cts/scheduler/bug-rh-1097457.trs
	/cts/scheduler/shadow.*
	/cts/test-suite.log
	/lib//tests//*.log
	/lib//tests//*_test
	/lib//tests//*.trs
	/lib/common/tests/schemas/schemas
	/xml/test-/.up
	/xml/test-/.up.err
	/xml/assets/*.rng
	/xml/assets/diffview.js
	/xml/assets/xmlcatalog
	/test/_test_file.c

	# Packaging artifacts
	*.rpm
	/pacemaker.spec
	/rpm/[A-LN-Z]*
	/rpm/build.counter
	/rpm/mock

	# Project maintainer artifacts
	/maint/gnulib
	/maint/mocked/based
	/maint/testcc_helper.cc
	/maint/testcc_*_h

	# Files built by main branch (helps when jumping back and forth in checkout)
	/cts/cts-schemas
	+/cts/schemas/test-*/
	+/lib//fuzzers//*_fuzzer

	# Formerly built files (helps when jumping back and forth in checkout)
	/.ABI-build
	/Doxyfile
	/HTML
	/abi_dumps
	/abi-check
	/build.counter
	/compat_reports
	/compile
	/cts/.regression.failed.diff
	/attrd
	/cib
	/config.guess
	/config.sub
	/coverage.sh
	/crmd
	/cts/CTS.py
	/cts/CTSlab.py
	/cts/CTSvars.py
	/cts/HBDummy
	/cts/LSBDummy
	/cts/OCFIPraTest.py
	/cts/cts-coverage
	/cts/cts-support
	/cts/fence_dummy
	/cts/lab/CTSlab.py
	/cts/lab/CTSvars.py
	/cts/lab/OCFIPraTest.py
	/cts/lab/cluster_test
	/cts/lab/cts
	/cts/lab/cts-log-watcher
	/cts/lxc_autogen.sh
	/cts/pacemaker-cts-dummyd
	/cts/pacemaker-cts-dummyd@.service
	/daemons/based/cibmon
	/daemons/fenced/fence_legacy
	/daemons/fenced/fence_watchdog
	/daemons/pacemakerd/pacemaker
	/depcomp
	/doc/*.build
	/doc//en-US/Ap-.xml
	/doc//en-US/Ch-.xml
	/doc/*/publican.cfg
	/doc/*/publish
	/doc//tmp/*
	/doc/Clusters_from_Scratch.txt
	/doc/Pacemaker_Explained.txt
	/doc/acls.html
	/doc/publican-catalog*
	/doc/shared/en-US/*.xml
	/doc/shared/en-US/images/pcmk-*.png
	/doc/shared/en-US/images/Policy-Engine-*.png
	/extra//
	/fencing
	/include/stamp-*
	/install-sh
	/lib/common/md5.c
	/lib/common/tests/flags/pcmk__clear_flags_as
	/lib/common/tests/flags/pcmk__set_flags_as
	/lib/common/tests/flags/pcmk_all_flags_set
	/lib/common/tests/flags/pcmk_any_flags_set
	/lib/common/tests/operations/parse_op_key
	/lib/common/tests/strings/pcmk__btoa
	/lib/common/tests/strings/pcmk__parse_ll_range
	/lib/common/tests/strings/pcmk__scan_double
	/lib/common/tests/strings/pcmk__str_any_of
	/lib/common/tests/strings/pcmk__strcmp
	/lib/common/tests/strings/pcmk__char_in_any_str
	/lib/common/tests/utils/pcmk_str_is_infinity
	/lib/common/tests/utils/pcmk_str_is_minus_infinity
	/lib/gnu/libgnu.a
	/lib/pengine/tests/rules/
	/lrmd
	/ltmain.sh
	/mcp
	/missing
	/mock
	/pacemaker-*.spec
	/pengine
	/py-compile
	/scratch
	/tools/cluster-init
	/test-driver
	/xml/crm.dtd
	/xml/version-diff.sh
	ylwrap
	diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am
	index f8d8bc91d6..3212efa289 100644
	--- a/daemons/attrd/Makefile.am
	+++ b/daemons/attrd/Makefile.am
	@@ -1,49 +1,50 @@
	#
	# Copyright 2004-2023 the Pacemaker project contributors
	#
	# The version control history for this file may have further details.
	#
	# This source code is licensed under the GNU General Public License version 2
	# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	include $(top_srcdir)/mk/common.mk

	halibdir = $(CRM_DAEMON_DIR)

	halib_PROGRAMS = pacemaker-attrd

	noinst_HEADERS = pacemaker-attrd.h

	pacemaker_attrd_CFLAGS = $(CFLAGS_HARDENED_EXE)
	pacemaker_attrd_LDFLAGS = $(LDFLAGS_HARDENED_EXE)

	pacemaker_attrd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la
	pacemaker_attrd_LDADD += $(top_builddir)/lib/cib/libcib.la
	pacemaker_attrd_LDADD += $(top_builddir)/lib/pengine/libpe_rules.la
	pacemaker_attrd_LDADD += $(top_builddir)/lib/lrmd/liblrmd.la
	pacemaker_attrd_LDADD += $(top_builddir)/lib/common/libcrmcommon.la
	pacemaker_attrd_LDADD += $(CLUSTERLIBS)

	pacemaker_attrd_SOURCES = attrd_alerts.c \
	attrd_attributes.c \
	attrd_cib.c \
	attrd_corosync.c \
	attrd_elections.c \
	attrd_ipc.c \
	attrd_messages.c \
	+ attrd_nodes.c \
	attrd_sync.c \
	attrd_utils.c \
	pacemaker-attrd.c

	.PHONY: install-exec-hook
	install-exec-hook:
	if BUILD_LEGACY_LINKS
	cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f attrd && $(LN_S) pacemaker-attrd attrd
	endif

	.PHONY: uninstall-hook
	uninstall-hook:
	if BUILD_LEGACY_LINKS
	cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f attrd
	endif
	diff --git a/daemons/attrd/attrd_alerts.c b/daemons/attrd/attrd_alerts.c
	index 4e97743008..3e351c5df4 100644
	--- a/daemons/attrd/attrd_alerts.c
	+++ b/daemons/attrd/attrd_alerts.c
	@@ -1,136 +1,146 @@
	/*
	* Copyright 2015-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>
	#include <crm/crm.h>
	#include <crm/cib/internal.h>
	#include <crm/cluster/internal.h>
	#include <crm/cluster/election_internal.h>
	#include <crm/common/alerts_internal.h>
	#include <crm/common/cib_internal.h>
	#include <crm/common/xml.h>
	#include <crm/pengine/rules_internal.h>
	#include <crm/lrmd_internal.h>
	#include "pacemaker-attrd.h"

	static GList *attrd_alert_list = NULL;

	static void
	attrd_lrmd_callback(lrmd_event_data_t * op)
	{
	CRM_CHECK(op != NULL, return);
	switch (op->type) {
	case lrmd_event_disconnect:
	crm_info("Lost connection to executor");
	attrd_lrmd_disconnect();
	break;
	default:
	break;
	}
	}

	static lrmd_t *
	attrd_lrmd_connect(void)
	{
	if (the_lrmd == NULL) {
	the_lrmd = lrmd_api_new();
	the_lrmd->cmds->set_callback(the_lrmd, attrd_lrmd_callback);
	}

	if (!the_lrmd->cmds->is_connected(the_lrmd)) {
	const unsigned int max_attempts = 10;
	int ret = -ENOTCONN;

	for (int fails = 0; fails < max_attempts; ++fails) {
	ret = the_lrmd->cmds->connect(the_lrmd, PCMK__VALUE_ATTRD, NULL);
	if (ret == pcmk_ok) {
	break;
	}

	crm_debug("Could not connect to executor, %d tries remaining",
	(max_attempts - fails));
	/* @TODO We don't want to block here with sleep, but we should wait
	* some time between connection attempts. We could possibly add a
	* timer with a callback, but then we'd likely need an alert queue.
	*/
	}

	if (ret != pcmk_ok) {
	attrd_lrmd_disconnect();
	}
	}

	return the_lrmd;
	}

	void
	attrd_lrmd_disconnect(void) {
	if (the_lrmd) {
	lrmd_t *conn = the_lrmd;

	the_lrmd = NULL; /* in case we're called recursively */
	lrmd_api_delete(conn); /* will disconnect if necessary */
	}
	}

	static void
	config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	xmlNode *crmalerts = NULL;

	if (rc == -ENXIO) {
	crm_debug("Local CIB has no alerts section");
	return;
	} else if (rc != pcmk_ok) {
	crm_notice("Could not query local CIB: %s", pcmk_strerror(rc));
	return;
	}

	crmalerts = output;
	if ((crmalerts != NULL) && !pcmk__xe_is(crmalerts, PCMK_XE_ALERTS)) {
	crmalerts = pcmk__xe_first_child(crmalerts, PCMK_XE_ALERTS, NULL, NULL);
	}
	if (!crmalerts) {
	crm_notice("CIB query result has no " PCMK_XE_ALERTS " section");
	return;
	}

	pe_free_alert_list(attrd_alert_list);
	attrd_alert_list = pe_unpack_alerts(crmalerts);
	}

	gboolean
	attrd_read_options(gpointer user_data)
	{
	int call_id;

	CRM_CHECK(the_cib != NULL, return TRUE);

	call_id = the_cib->cmds->query(the_cib,
	pcmk__cib_abs_xpath_for(PCMK_XE_ALERTS),
	NULL, cib_xpath\|cib_scope_local);

	the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, NULL,
	"config_query_callback",
	config_query_callback, free);

	crm_trace("Querying the CIB... call %d", call_id);
	return TRUE;
	}

	int
	-attrd_send_attribute_alert(const char *node, int nodeid,
	+attrd_send_attribute_alert(const char node, const char node_xml_id,
	const char attr, const char value)
	{
	+ uint32_t nodeid = 0U;
	+ crm_node_t *node_status = NULL;
	+
	if (attrd_alert_list == NULL) {
	return pcmk_ok;
	}
	+ node_status = pcmk__search_node_caches(0, node, node_xml_id,
	+ pcmk__node_search_remote
	+ \|pcmk__node_search_cluster_member
	+ \|pcmk__node_search_cluster_cib);
	+ if (node_status != NULL) {
	+ nodeid = node_status->id;
	+ }
	return lrmd_send_attribute_alert(attrd_lrmd_connect(), attrd_alert_list,
	node, nodeid, attr, value);
	}
	diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
	index f0f46ff2d9..fe7e642f0e 100644
	--- a/daemons/attrd/attrd_attributes.c
	+++ b/daemons/attrd/attrd_attributes.c
	@@ -1,275 +1,283 @@
	/*
	* Copyright 2013-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <errno.h>
	#include <stdbool.h>
	#include <stdlib.h>
	#include <glib.h>

	#include <crm/common/logging.h>
	#include <crm/common/results.h>
	#include <crm/common/strings_internal.h>
	#include <crm/common/xml.h>

	#include "pacemaker-attrd.h"

	static attribute_t *
	attrd_create_attribute(xmlNode *xml)
	{
	int is_private = 0;
	long long dampen = 0;
	const char *name = crm_element_value(xml, PCMK__XA_ATTR_NAME);
	const char *set_type = crm_element_value(xml, PCMK__XA_ATTR_SET_TYPE);
	const char *dampen_s = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING);
	attribute_t *a = NULL;

	if (set_type == NULL) {
	set_type = PCMK_XE_INSTANCE_ATTRIBUTES;
	}

	/* Set type is meaningful only when writing to the CIB. Private
	* attributes are not written.
	*/
	crm_element_value_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &is_private);
	if (!is_private && !pcmk__str_any_of(set_type,
	PCMK_XE_INSTANCE_ATTRIBUTES,
	PCMK_XE_UTILIZATION, NULL)) {
	crm_warn("Ignoring attribute %s with invalid set type %s",
	pcmk__s(name, "(unidentified)"), set_type);
	return NULL;
	}

	a = pcmk__assert_alloc(1, sizeof(attribute_t));

	a->id = pcmk__str_copy(name);
	a->set_type = pcmk__str_copy(set_type);
	a->set_id = crm_element_value_copy(xml, PCMK__XA_ATTR_SET);
	a->user = crm_element_value_copy(xml, PCMK__XA_ATTR_USER);
	a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value);

	if (is_private) {
	attrd_set_attr_flags(a, attrd_attr_is_private);
	}

	if (dampen_s != NULL) {
	dampen = crm_get_msec(dampen_s);
	}

	if (dampen > 0) {
	a->timeout_ms = (int) QB_MIN(dampen, INT_MAX);
	a->timer = attrd_add_timer(a->id, a->timeout_ms, a);
	} else if (dampen < 0) {
	crm_warn("Ignoring invalid delay %s for attribute %s", dampen_s, a->id);
	}

	crm_trace("Created attribute %s with %s write delay and %s CIB user",
	a->id,
	((dampen > 0)? pcmk__readable_interval(a->timeout_ms) : "no"),
	pcmk__s(a->user, "default"));

	g_hash_table_replace(attributes, a->id, a);
	return a;
	}

	static int
	attrd_update_dampening(attribute_t a, xmlNode xml, const char *attr)
	{
	const char *dvalue = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING);
	long long dampen = 0;

	if (dvalue == NULL) {
	crm_warn("Could not update %s: peer did not specify value for delay",
	attr);
	return EINVAL;
	}

	dampen = crm_get_msec(dvalue);
	if (dampen < 0) {
	crm_warn("Could not update %s: invalid delay value %dms (%s)",
	attr, dampen, dvalue);
	return EINVAL;
	}

	if (a->timeout_ms != dampen) {
	mainloop_timer_del(a->timer);
	a->timeout_ms = (int) QB_MIN(dampen, INT_MAX);
	if (dampen > 0) {
	a->timer = attrd_add_timer(attr, a->timeout_ms, a);
	crm_info("Update attribute %s delay to %dms (%s)",
	attr, dampen, dvalue);
	} else {
	a->timer = NULL;
	crm_info("Update attribute %s to remove delay", attr);
	}

	/* If dampening changed, do an immediate write-out,
	* otherwise repeated dampening changes would prevent write-outs
	*/
	attrd_write_or_elect_attribute(a);
	}

	return pcmk_rc_ok;
	}

	GHashTable *attributes = NULL;

	/*!
	* \internal
	* \brief Create an XML representation of an attribute for use in peer messages
	*
	* \param[in,out] parent Create attribute XML as child element of this
	* \param[in] a Attribute to represent
	* \param[in] v Attribute value to represent
	* \param[in] force_write If true, value should be written even if unchanged
	*
	* \return XML representation of attribute
	*/
	xmlNode *
	attrd_add_value_xml(xmlNode parent, const attribute_t a,
	const attribute_value_t *v, bool force_write)
	{
	xmlNode *xml = pcmk__xe_create(parent, __func__);

	crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id);
	crm_xml_add(xml, PCMK__XA_ATTR_SET_TYPE, a->set_type);
	crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id);
	crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
	- pcmk__xe_add_node(xml, v->nodename, v->nodeid);
	- if (pcmk_is_set(v->flags, attrd_value_remote)) {
	- crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1);
	- }
	+ crm_xml_add(xml, PCMK__XA_ATTR_HOST, v->nodename);
	+
	+ /* @COMPAT Prior to 2.1.10 and 3.0.1, the node's cluster ID was added
	+ * instead of its XML ID. For Corosync and Pacemaker Remote nodes, those are
	+ * the same, but if we ever support node XML IDs that differ from their
	+ * cluster IDs, we will have to drop support for rolling upgrades from
	+ * versions before those.
	+ */
	+ crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, attrd_get_node_xml_id(v->nodename));
	+
	crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
	crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000);
	crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE,
	pcmk_is_set(a->flags, attrd_attr_is_private));
	+ crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE,
	+ pcmk_is_set(v->flags, attrd_value_remote));
	crm_xml_add_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE, force_write);

	return xml;
	}

	void
	attrd_clear_value_seen(void)
	{
	GHashTableIter aIter;
	GHashTableIter vIter;
	attribute_t *a;
	attribute_value_t *v = NULL;

	g_hash_table_iter_init(&aIter, attributes);
	while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
	g_hash_table_iter_init(&vIter, a->values);
	while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
	attrd_clear_value_flags(v, attrd_value_from_peer);
	}
	}
	}

	attribute_t *
	attrd_populate_attribute(xmlNode xml, const char attr)
	{
	attribute_t *a = NULL;
	bool update_both = false;

	const char *op = crm_element_value(xml, PCMK_XA_TASK);

	// NULL because PCMK__ATTRD_CMD_SYNC_RESPONSE has no PCMK_XA_TASK
	update_both = pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_BOTH,
	pcmk__str_null_matches);

	// Look up or create attribute entry
	a = g_hash_table_lookup(attributes, attr);
	if (a == NULL) {
	if (update_both \|\| pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE, pcmk__str_none)) {
	a = attrd_create_attribute(xml);
	if (a == NULL) {
	return NULL;
	}

	} else {
	crm_warn("Could not update %s: attribute not found", attr);
	return NULL;
	}
	}

	// Update attribute dampening
	if (update_both \|\| pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) {
	int rc = attrd_update_dampening(a, xml, attr);

	if (rc != pcmk_rc_ok \|\| !update_both) {
	return NULL;
	}
	}

	return a;
	}

	/*!
	* \internal
	* \brief Get the XML ID used to write out an attribute set
	*
	* \param[in] attr Attribute to get set ID for
	* \param[in] node_state_id XML ID of node state that attribute value is for
	*
	* \return Newly allocated string with XML ID to use for \p attr set
	*/
	char *
	attrd_set_id(const attribute_t attr, const char node_state_id)
	{
	char *set_id = NULL;

	pcmk__assert((attr != NULL) && (node_state_id != NULL));

	if (attr->set_id == NULL) {
	/* @COMPAT This should really take the set type into account. Currently
	* we use the same XML ID for transient attributes and utilization
	* attributes. It doesn't cause problems because the status section is
	* not limited by the schema in any way, but it's still unfortunate.
	* For backward compatibility reasons, we can't change this.
	*/
	set_id = crm_strdup_printf("%s-%s", PCMK_XE_STATUS, node_state_id);
	} else {
	/* @COMPAT When the user specifies a set ID for an attribute, it is the
	* same for every node. That is less than ideal, but again, the schema
	* doesn't enforce anything for the status section. We couldn't change
	* it without allowing the set ID to vary per value rather than per
	* attribute, which would break backward compatibility, pose design
	* challenges, and potentially cause problems in rolling upgrades.
	*/
	set_id = pcmk__str_copy(attr->set_id);
	}
	crm_xml_sanitize_id(set_id);
	return set_id;
	}

	/*!
	* \internal
	* \brief Get the XML ID used to write out an attribute value
	*
	* \param[in] attr Attribute to get value XML ID for
	* \param[in] node_state_id UUID of node that attribute value is for
	*
	* \return Newly allocated string with XML ID of \p attr value
	*/
	char *
	attrd_nvpair_id(const attribute_t attr, const char node_state_id)
	{
	char *nvpair_id = NULL;

	if (attr->set_id != NULL) {
	nvpair_id = crm_strdup_printf("%s-%s", attr->set_id, attr->id);

	} else {
	nvpair_id = crm_strdup_printf(PCMK_XE_STATUS "-%s-%s",
	node_state_id, attr->id);
	}
	crm_xml_sanitize_id(nvpair_id);
	return nvpair_id;
	}
	diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
	index 2537ade54a..7f697e5048 100644
	--- a/daemons/attrd/attrd_cib.c
	+++ b/daemons/attrd/attrd_cib.c
	@@ -1,682 +1,704 @@
	/*
	* Copyright 2013-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <errno.h>
	-#include <inttypes.h> // PRIu32
	#include <stdbool.h>
	#include <stdlib.h>
	#include <glib.h>

	#include <crm/cib/internal.h> // cib__*
	#include <crm/common/logging.h>
	#include <crm/common/results.h>
	#include <crm/common/strings_internal.h>
	#include <crm/common/xml.h>
	#include <crm/cluster/internal.h> // pcmk__get_node()

	#include "pacemaker-attrd.h"

	static int last_cib_op_done = 0;

	static void write_attribute(attribute_t *a, bool ignore_delay);

	static void
	attrd_cib_destroy_cb(gpointer user_data)
	{
	cib_t *cib = user_data;

	cib->cmds->signoff(cib);

	if (attrd_shutting_down(false)) {
	crm_info("Disconnected from the CIB manager");

	} else {
	// @TODO This should trigger a reconnect, not a shutdown
	crm_crit("Lost connection to the CIB manager, shutting down");
	attrd_exit_status = CRM_EX_DISCONNECT;
	attrd_shutdown(0);
	}
	}

	static void
	attrd_cib_updated_cb(const char event, xmlNode msg)
	{
	const xmlNode *patchset = NULL;
	const char *client_name = NULL;
	bool status_changed = false;

	if (attrd_shutting_down(true)) {
	crm_debug("Ignoring CIB change during shutdown");
	return;
	}

	if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) {
	return;
	}

	if (cib__element_in_patchset(patchset, PCMK_XE_ALERTS)) {
	mainloop_set_trigger(attrd_config_read);
	}

	status_changed = cib__element_in_patchset(patchset, PCMK_XE_STATUS);

	client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTNAME);
	if (!cib__client_triggers_refresh(client_name)) {
	/* This change came from a source that ensured the CIB is consistent
	* with our attributes table, so we don't need to write anything out.
	*/
	return;
	}

	if (!attrd_election_won()) {
	// Don't write attributes if we're not the writer
	return;
	}

	if (status_changed \|\| cib__element_in_patchset(patchset, PCMK_XE_NODES)) {
	/* An unsafe client modified the PCMK_XE_NODES or PCMK_XE_STATUS
	* section. Write transient attributes to ensure they're up-to-date in
	* the CIB.
	*/
	if (client_name == NULL) {
	client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTID);
	}
	crm_notice("Updating all attributes after %s event triggered by %s",
	event, pcmk__s(client_name, "(unidentified client)"));

	attrd_write_attributes(attrd_write_all);
	}
	}

	int
	attrd_cib_connect(int max_retry)
	{
	static int attempts = 0;

	int rc = -ENOTCONN;

	the_cib = cib_new();
	if (the_cib == NULL) {
	return -ENOTCONN;
	}

	do {
	if (attempts > 0) {
	sleep(attempts);
	}
	attempts++;
	crm_debug("Connection attempt %d to the CIB manager", attempts);
	rc = the_cib->cmds->signon(the_cib, PCMK__VALUE_ATTRD, cib_command);

	} while ((rc != pcmk_ok) && (attempts < max_retry));

	if (rc != pcmk_ok) {
	crm_err("Connection to the CIB manager failed: %s " CRM_XS " rc=%d",
	pcmk_strerror(rc), rc);
	goto cleanup;
	}

	crm_debug("Connected to the CIB manager after %d attempts", attempts);

	rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb);
	if (rc != pcmk_ok) {
	crm_err("Could not set disconnection callback");
	goto cleanup;
	}

	rc = the_cib->cmds->add_notify_callback(the_cib,
	PCMK__VALUE_CIB_DIFF_NOTIFY,
	attrd_cib_updated_cb);
	if (rc != pcmk_ok) {
	crm_err("Could not set CIB notification callback");
	goto cleanup;
	}

	return pcmk_ok;

	cleanup:
	cib__clean_up_connection(&the_cib);
	return -ENOTCONN;
	}

	void
	attrd_cib_disconnect(void)
	{
	CRM_CHECK(the_cib != NULL, return);
	the_cib->cmds->del_notify_callback(the_cib, PCMK__VALUE_CIB_DIFF_NOTIFY,
	attrd_cib_updated_cb);
	cib__clean_up_connection(&the_cib);
	}

	static void
	attrd_erase_cb(xmlNode msg, int call_id, int rc, xmlNode output,
	void *user_data)
	{
	const char node = pcmk__s((const char ) user_data, "a node");

	if (rc == pcmk_ok) {
	crm_info("Cleared transient node attributes for %s from CIB", node);
	} else {
	crm_err("Unable to clear transient node attributes for %s from CIB: %s",
	node, pcmk_strerror(rc));
	}
	}

	#define XPATH_TRANSIENT "//" PCMK__XE_NODE_STATE \
	"[@" PCMK_XA_UNAME "='%s']" \
	"/" PCMK__XE_TRANSIENT_ATTRIBUTES

	/*!
	* \internal
	* \brief Wipe all transient node attributes for a node from the CIB
	*
	* \param[in] node Node to clear attributes for
	*/
	void
	attrd_cib_erase_transient_attrs(const char *node)
	{
	int call_id = 0;
	char *xpath = NULL;

	CRM_CHECK(node != NULL, return);

	xpath = crm_strdup_printf(XPATH_TRANSIENT, node);

	crm_debug("Clearing transient node attributes for %s from CIB using %s",
	node, xpath);

	call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath);
	free(xpath);

	the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE,
	pcmk__str_copy(node),
	"attrd_erase_cb", attrd_erase_cb,
	free);
	}

	/*!
	* \internal
	* \brief Prepare the CIB after cluster is connected
	*/
	void
	attrd_cib_init(void)
	{
	/* We have no attribute values in memory, so wipe the CIB to match. This is
	* normally done by the DC's controller when this node leaves the cluster, but
	* this handles the case where the node restarted so quickly that the
	* cluster layer didn't notice.
	*
	* \todo If pacemaker-attrd respawns after crashing (see PCMK_ENV_RESPAWNED),
	* ideally we'd skip this and sync our attributes from the writer.
	* However, currently we reject any values for us that the writer has, in
	* attrd_peer_update().
	*/
	attrd_cib_erase_transient_attrs(attrd_cluster->uname);

	// Set a trigger for reading the CIB (for the alerts section)
	attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL);

	// Always read the CIB at start-up
	mainloop_set_trigger(attrd_config_read);
	}

	static gboolean
	attribute_timer_cb(gpointer data)
	{
	attribute_t *a = data;
	crm_trace("Dampen interval expired for %s", a->id);
	attrd_write_or_elect_attribute(a);
	return FALSE;
	}

	static void
	attrd_cib_callback(xmlNode msg, int call_id, int rc, xmlNode output, void *user_data)
	{
	int level = LOG_ERR;
	GHashTableIter iter;
	const char *peer = NULL;
	attribute_value_t *v = NULL;

	char *name = user_data;
	attribute_t *a = g_hash_table_lookup(attributes, name);

	if(a == NULL) {
	crm_info("Attribute %s no longer exists", name);
	return;
	}

	a->update = 0;
	if (rc == pcmk_ok && call_id < 0) {
	rc = call_id;
	}

	switch (rc) {
	case pcmk_ok:
	level = LOG_INFO;
	last_cib_op_done = call_id;
	if (a->timer && !a->timeout_ms) {
	// Remove temporary dampening for failed writes
	mainloop_timer_del(a->timer);
	a->timer = NULL;
	}
	break;

	case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */
	case -ETIME: /* When an attr changes while there is a DC election */
	case -ENXIO: /* When an attr changes while the CIB is syncing a
	* newer config from a node that just came up
	*/
	level = LOG_WARNING;
	break;
	}

	do_crm_log(level, "CIB update %d result for %s: %s " CRM_XS " rc=%d",
	call_id, a->id, pcmk_strerror(rc), rc);

	g_hash_table_iter_init(&iter, a->values);
	while (g_hash_table_iter_next(&iter, (gpointer ) & peer, (gpointer ) & v)) {
	if (rc == pcmk_ok) {
	crm_info("* Wrote %s[%s]=%s",
	a->id, peer, pcmk__s(v->requested, "(unset)"));
	pcmk__str_update(&(v->requested), NULL);
	} else {
	do_crm_log(level, "* Could not write %s[%s]=%s",
	a->id, peer, pcmk__s(v->requested, "(unset)"));
	/* Reattempt write below if we are still the writer */
	attrd_set_attr_flags(a, attrd_attr_changed);
	}
	}

	if (pcmk_is_set(a->flags, attrd_attr_changed) && attrd_election_won()) {
	if (rc == pcmk_ok) {
	/* We deferred a write of a new update because this update was in
	* progress. Write out the new value without additional delay.
	*/
	crm_debug("Pending update for %s can be written now", a->id);
	write_attribute(a, false);

	/* We're re-attempting a write because the original failed; delay
	* the next attempt so we don't potentially flood the CIB manager
	* and logs with a zillion attempts per second.
	*
	* @TODO We could elect a new writer instead. However, we'd have to
	* somehow downgrade our vote, and we'd still need something like this
	* if all peers similarly fail to write this attribute (which may
	* indicate a corrupted attribute entry rather than a CIB issue).
	*/
	} else if (a->timer) {
	// Attribute has a dampening value, so use that as delay
	if (!mainloop_timer_running(a->timer)) {
	crm_trace("Delayed re-attempted write for %s by %s",
	name, pcmk__readable_interval(a->timeout_ms));
	mainloop_timer_start(a->timer);
	}
	} else {
	/* Set a temporary dampening of 2 seconds (timer will continue
	* to exist until the attribute's dampening gets set or the
	* write succeeds).
	*/
	a->timer = attrd_add_timer(a->id, 2000, a);
	mainloop_timer_start(a->timer);
	}
	}
	}

	/*!
	* \internal
	* \brief Add a set-attribute update request to the current CIB transaction
	*
	* \param[in] attr Attribute to update
	* \param[in] attr_id ID of attribute to update
	* \param[in] node_id ID of node for which to update attribute value
	* \param[in] set_id ID of attribute set
	* \param[in] value New value for attribute
	*
	* \return Standard Pacemaker return code
	*/
	static int
	add_set_attr_update(const attribute_t attr, const char attr_id,
	const char node_id, const char set_id, const char *value)
	{
	xmlNode *update = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE);
	xmlNode *child = update;
	int rc = ENOMEM;

	crm_xml_add(child, PCMK_XA_ID, node_id);

	child = pcmk__xe_create(child, PCMK__XE_TRANSIENT_ATTRIBUTES);
	crm_xml_add(child, PCMK_XA_ID, node_id);

	child = pcmk__xe_create(child, attr->set_type);
	crm_xml_add(child, PCMK_XA_ID, set_id);

	child = pcmk__xe_create(child, PCMK_XE_NVPAIR);
	crm_xml_add(child, PCMK_XA_ID, attr_id);
	crm_xml_add(child, PCMK_XA_NAME, attr->id);
	crm_xml_add(child, PCMK_XA_VALUE, value);

	rc = the_cib->cmds->modify(the_cib, PCMK_XE_STATUS, update,
	cib_can_create\|cib_transaction);
	rc = pcmk_legacy2rc(rc);

	free_xml(update);
	return rc;
	}

	/*!
	* \internal
	* \brief Add an unset-attribute update request to the current CIB transaction
	*
	* \param[in] attr Attribute to update
	* \param[in] attr_id ID of attribute to update
	* \param[in] node_id ID of node for which to update attribute value
	* \param[in] set_id ID of attribute set
	*
	* \return Standard Pacemaker return code
	*/
	static int
	add_unset_attr_update(const attribute_t attr, const char attr_id,
	const char node_id, const char set_id)
	{
	char *xpath = crm_strdup_printf("/" PCMK_XE_CIB
	"/" PCMK_XE_STATUS
	"/" PCMK__XE_NODE_STATE
	"[@" PCMK_XA_ID "='%s']"
	"/" PCMK__XE_TRANSIENT_ATTRIBUTES
	"[@" PCMK_XA_ID "='%s']"
	"/%s[@" PCMK_XA_ID "='%s']"
	"/" PCMK_XE_NVPAIR
	"[@" PCMK_XA_ID "='%s' "
	"and @" PCMK_XA_NAME "='%s']",
	node_id, node_id, attr->set_type, set_id,
	attr_id, attr->id);

	int rc = the_cib->cmds->remove(the_cib, xpath, NULL,
	cib_xpath\|cib_transaction);

	free(xpath);
	return pcmk_legacy2rc(rc);
	}

	/*!
	* \internal
	* \brief Add an attribute update request to the current CIB transaction
	*
	* \param[in] attr Attribute to update
	* \param[in] value New value for attribute
	* \param[in] node_id ID of node for which to update attribute value
	*
	* \return Standard Pacemaker return code
	*/
	static int
	add_attr_update(const attribute_t attr, const char value, const char *node_id)
	{
	char *set_id = attrd_set_id(attr, node_id);
	char *nvpair_id = attrd_nvpair_id(attr, node_id);
	int rc = pcmk_rc_ok;

	if (value == NULL) {
	rc = add_unset_attr_update(attr, nvpair_id, node_id, set_id);
	} else {
	rc = add_set_attr_update(attr, nvpair_id, node_id, set_id, value);
	}
	free(set_id);
	free(nvpair_id);
	return rc;
	}

	static void
	send_alert_attributes_value(attribute_t a, GHashTable t)
	{
	int rc = 0;
	attribute_value_t *at = NULL;
	GHashTableIter vIter;

	g_hash_table_iter_init(&vIter, t);

	while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) {
	- rc = attrd_send_attribute_alert(at->nodename, at->nodeid,
	+ const char *node_xml_id = attrd_get_node_xml_id(at->nodename);
	+
	+ rc = attrd_send_attribute_alert(at->nodename, node_xml_id,
	a->id, at->current);
	- crm_trace("Sent alerts for %s[%s]=%s: nodeid=%d rc=%d",
	- a->id, at->nodename, at->current, at->nodeid, rc);
	+ crm_trace("Sent alerts for %s[%s]=%s with node XML ID %s "
	+ "(%s agents failed)",
	+ a->id, at->nodename, at->current,
	+ pcmk__s(node_xml_id, "unknown"),
	+ ((rc == 0)? "no" : ((rc == -1)? "some" : "all")));
	}
	}

	static void
	set_alert_attribute_value(GHashTable t, attribute_value_t v)
	{
	attribute_value_t *a_v = pcmk__assert_alloc(1, sizeof(attribute_value_t));

	- a_v->nodeid = v->nodeid;
	a_v->nodename = pcmk__str_copy(v->nodename);
	a_v->current = pcmk__str_copy(v->current);

	g_hash_table_replace(t, a_v->nodename, a_v);
	}

	mainloop_timer_t *
	attrd_add_timer(const char id, int timeout_ms, attribute_t attr)
	{
	return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr);
	}

	/*!
	* \internal
	* \brief Write an attribute's values to the CIB if appropriate
	*
	* \param[in,out] a Attribute to write
	* \param[in] ignore_delay If true, write attribute now regardless of any
	* configured delay
	*/
	static void
	write_attribute(attribute_t *a, bool ignore_delay)
	{
	int private_updates = 0, cib_updates = 0;
	attribute_value_t *v = NULL;
	GHashTableIter iter;
	GHashTable *alert_attribute_value = NULL;
	int rc = pcmk_ok;
	+ bool should_write = true;

	if (a == NULL) {
	return;
	}

	+ // Private attributes (or any in standalone mode) are not written to the CIB
	+ if (stand_alone \|\| pcmk_is_set(a->flags, attrd_attr_is_private)) {
	+ should_write = false;
	+ }
	+
	/* If this attribute will be written to the CIB ... */
	- if (!stand_alone && !pcmk_is_set(a->flags, attrd_attr_is_private)) {
	+ if (should_write) {
	/* Defer the write if now's not a good time */
	if (a->update && (a->update < last_cib_op_done)) {
	crm_info("Write out of '%s' continuing: update %d considered lost",
	a->id, a->update);
	a->update = 0; // Don't log this message again

	} else if (a->update) {
	crm_info("Write out of '%s' delayed: update %d in progress",
	a->id, a->update);
	goto done;

	} else if (mainloop_timer_running(a->timer)) {
	if (ignore_delay) {
	mainloop_timer_stop(a->timer);
	crm_debug("Overriding '%s' write delay", a->id);
	} else {
	crm_info("Delaying write of '%s'", a->id);
	goto done;
	}
	}

	// Initiate a transaction for all the peer value updates
	CRM_CHECK(the_cib != NULL, goto done);
	the_cib->cmds->set_user(the_cib, a->user);
	rc = the_cib->cmds->init_transaction(the_cib);
	if (rc != pcmk_ok) {
	crm_err("Failed to write %s (set %s): Could not initiate "
	"CIB transaction",
	a->id, pcmk__s(a->set_id, "unspecified"));
	goto done;
	}
	}

	- /* Attribute will be written shortly, so clear changed flag and force
	- * write flag, and initialize UUID missing flag to false.
	+ /* The changed and force-write flags apply only to the next write,
	+ * which this is, so clear them now. Also clear the "node unknown" flag
	+ * because we will check whether it is known below and reset if appopriate.
	*/
	- attrd_clear_attr_flags(a, attrd_attr_changed\|attrd_attr_uuid_missing\|attrd_attr_force_write);
	+ attrd_clear_attr_flags(a, attrd_attr_changed
	+ \|attrd_attr_force_write
	+ \|attrd_attr_node_unknown);

	/* Make the table for the attribute trap */
	alert_attribute_value = pcmk__strikey_table(NULL,
	attrd_free_attribute_value);

	/* Iterate over each peer value of this attribute */
	g_hash_table_iter_init(&iter, a->values);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
	- const char *uuid = NULL;
	+ const char *node_xml_id = NULL;
	+ const char *prev_xml_id = NULL;
	+
	+ if (!should_write) {
	+ private_updates++;
	+ continue;
	+ }
	+
	+ /* We need the node's CIB XML ID to write out its attributes, so look
	+ * for it now. Check the node caches first, even if the ID was
	+ * previously known (in case it changed), but use any previous value as
	+ * a fallback.
	+ */
	+
	+ prev_xml_id = attrd_get_node_xml_id(v->nodename);

	if (pcmk_is_set(v->flags, attrd_value_remote)) {
	- /* If this is a Pacemaker Remote node, the node's UUID is the same
	- * as its name, which we already have.
	- */
	- uuid = v->nodename;
	+ // A Pacemaker Remote node's XML ID is the same as its name
	+ node_xml_id = v->nodename;

	} else {
	- // This will create a cluster node cache entry if none exists
	- crm_node_t *peer = pcmk__get_node(v->nodeid, v->nodename, NULL,
	+ // This creates a cluster node cache entry if none exists
	+ crm_node_t *peer = pcmk__get_node(0, v->nodename, prev_xml_id,
	pcmk__node_search_any);

	- uuid = peer->uuid;
	-
	- // Remember peer's node ID if we're just now learning it
	- if ((peer->id != 0) && (v->nodeid == 0)) {
	- crm_trace("Learned ID %u for node %s", peer->id, v->nodename);
	- v->nodeid = peer->id;
	+ node_xml_id = pcmk__cluster_get_xml_id(peer);
	+ if (node_xml_id == NULL) {
	+ node_xml_id = prev_xml_id;
	}
	}

	- /* If this is a private attribute, no update needs to be sent */
	- if (stand_alone \|\| pcmk_is_set(a->flags, attrd_attr_is_private)) {
	- private_updates++;
	- continue;
	- }
	-
	// Defer write if this is a cluster node that's never been seen
	- if (uuid == NULL) {
	- attrd_set_attr_flags(a, attrd_attr_uuid_missing);
	- crm_notice("Cannot update %s[%s]='%s' now because node's UUID is "
	- "unknown (will retry if learned)",
	+ if (node_xml_id == NULL) {
	+ attrd_set_attr_flags(a, attrd_attr_node_unknown);
	+ crm_notice("Cannot write %s[%s]='%s' to CIB because node's XML ID "
	+ "is unknown (will retry if learned)",
	a->id, v->nodename, v->current);
	continue;
	}

	+ if (!pcmk__str_eq(prev_xml_id, node_xml_id, pcmk__str_none)) {
	+ crm_trace("Setting %s[%s] node XML ID to %s (was %s)",
	+ a->id, v->nodename, node_xml_id,
	+ pcmk__s(prev_xml_id, "unknown"));
	+ attrd_set_node_xml_id(v->nodename, node_xml_id);
	+ }
	+
	// Update this value as part of the CIB transaction we're building
	- rc = add_attr_update(a, v->current, uuid);
	+ rc = add_attr_update(a, v->current, node_xml_id);
	if (rc != pcmk_rc_ok) {
	- crm_err("Failed to update %s[%s]='%s': %s "
	- CRM_XS " node uuid=%s id=%" PRIu32,
	+ crm_err("Couldn't add %s[%s]='%s' to CIB transaction: %s "
	+ CRM_XS " node XML ID %s",
	a->id, v->nodename, v->current, pcmk_rc_str(rc),
	- uuid, v->nodeid);
	+ node_xml_id);
	continue;
	}

	- crm_debug("Writing %s[%s]=%s (node-state-id=%s node-id=%" PRIu32 ")",
	+ crm_debug("Added %s[%s]=%s to CIB transaction (node XML ID %s)",
	a->id, v->nodename, pcmk__s(v->current, "(unset)"),
	- uuid, v->nodeid);
	+ node_xml_id);
	cib_updates++;

	/* Preservation of the attribute to transmit alert */
	set_alert_attribute_value(alert_attribute_value, v);

	// Save this value so we can log it when write completes
	pcmk__str_update(&(v->requested), v->current);
	}

	if (private_updates) {
	crm_info("Processed %d private change%s for %s (set %s)",
	private_updates, pcmk__plural_s(private_updates),
	a->id, pcmk__s(a->set_id, "unspecified"));
	}
	if (cib_updates > 0) {
	char *id = pcmk__str_copy(a->id);

	// Commit transaction
	a->update = the_cib->cmds->end_transaction(the_cib, true, cib_none);

	crm_info("Sent CIB request %d with %d change%s for %s (set %s)",
	a->update, cib_updates, pcmk__plural_s(cib_updates),
	a->id, pcmk__s(a->set_id, "unspecified"));

	if (the_cib->cmds->register_callback_full(the_cib, a->update,
	CIB_OP_TIMEOUT_S, FALSE, id,
	"attrd_cib_callback",
	attrd_cib_callback, free)) {
	// Transmit alert of the attribute
	send_alert_attributes_value(a, alert_attribute_value);
	}
	}

	done:
	// Discard transaction (if any)
	if (the_cib != NULL) {
	the_cib->cmds->end_transaction(the_cib, false, cib_none);
	the_cib->cmds->set_user(the_cib, NULL);
	}

	if (alert_attribute_value != NULL) {
	g_hash_table_destroy(alert_attribute_value);
	}
	}

	/*!
	* \internal
	* \brief Write out attributes
	*
	* \param[in] options Group of enum attrd_write_options
	*/
	void
	attrd_write_attributes(uint32_t options)
	{
	GHashTableIter iter;
	attribute_t *a = NULL;

	crm_debug("Writing out %s attributes",
	pcmk_is_set(options, attrd_write_all)? "all" : "changed");
	g_hash_table_iter_init(&iter, attributes);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
	- if (!pcmk_is_set(options, attrd_write_all) &&
	- pcmk_is_set(a->flags, attrd_attr_uuid_missing)) {
	+ if (!pcmk_is_set(options, attrd_write_all)
	+ && pcmk_is_set(a->flags, attrd_attr_node_unknown)) {
	// Try writing this attribute again, in case peer ID was learned
	attrd_set_attr_flags(a, attrd_attr_changed);
	} else if (pcmk_is_set(a->flags, attrd_attr_force_write)) {
	/* If the force_write flag is set, write the attribute. */
	attrd_set_attr_flags(a, attrd_attr_changed);
	}

	if (pcmk_is_set(options, attrd_write_all) \|\|
	pcmk_is_set(a->flags, attrd_attr_changed)) {
	bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay);

	if (pcmk_is_set(a->flags, attrd_attr_force_write)) {
	// Always ignore delay when forced write flag is set
	ignore_delay = true;
	}
	write_attribute(a, ignore_delay);
	} else {
	crm_trace("Skipping unchanged attribute %s", a->id);
	}
	}
	}

	void
	attrd_write_or_elect_attribute(attribute_t *a)
	{
	if (attrd_election_won()) {
	write_attribute(a, false);
	} else {
	attrd_start_election_if_needed();
	}
	}
	diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
	index 2868f1f404..488c5db87d 100644
	--- a/daemons/attrd/attrd_corosync.c
	+++ b/daemons/attrd/attrd_corosync.c
	@@ -1,608 +1,614 @@
	/*
	- * Copyright 2013-2024 the Pacemaker project contributors
	+ * Copyright 2013-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <errno.h>
	#include <stdbool.h>
	#include <stdint.h>
	#include <stdlib.h>

	#include <crm/cluster.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/logging.h>
	#include <crm/common/results.h>
	#include <crm/common/strings_internal.h>
	#include <crm/common/xml.h>

	#include "pacemaker-attrd.h"

	static xmlNode *
	attrd_confirmation(int callid)
	{
	xmlNode *node = pcmk__xe_create(NULL, __func__);

	crm_xml_add(node, PCMK__XA_T, PCMK__VALUE_ATTRD);
	crm_xml_add(node, PCMK__XA_SRC, pcmk__cluster_local_node_name());
	crm_xml_add(node, PCMK_XA_TASK, PCMK__ATTRD_CMD_CONFIRM);
	crm_xml_add_int(node, PCMK__XA_CALL_ID, callid);

	return node;
	}

	static void
	attrd_peer_message(crm_node_t peer, xmlNode xml)
	{
	const char *election_op = crm_element_value(xml, PCMK__XA_CRM_TASK);

	if (election_op) {
	attrd_handle_election_op(peer, xml);
	return;
	}

	if (attrd_shutting_down(false)) {
	/* If we're shutting down, we want to continue responding to election
	* ops as long as we're a cluster member (because our vote may be
	* needed). Ignore all other messages.
	*/
	return;

	} else {
	pcmk__request_t request = {
	.ipc_client = NULL,
	.ipc_id = 0,
	.ipc_flags = 0,
	.peer = peer->uname,
	.xml = xml,
	.call_options = 0,
	.result = PCMK__UNKNOWN_RESULT,
	};

	request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK);
	CRM_CHECK(request.op != NULL, return);

	attrd_handle_request(&request);

	/* Having finished handling the request, check to see if the originating
	* peer requested confirmation. If so, send that confirmation back now.
	*/
	if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) &&
	!pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) {
	int callid = 0;
	xmlNode *reply = NULL;

	/* Add the confirmation ID for the message we are confirming to the
	* response so the originating peer knows what they're a confirmation
	* for.
	*/
	crm_element_value_int(xml, PCMK__XA_CALL_ID, &callid);
	reply = attrd_confirmation(callid);

	/* And then send the confirmation back to the originating peer. This
	* ends up right back in this same function (attrd_peer_message) on the
	* peer where it will have to do something with a PCMK__XA_CONFIRM type
	* message.
	*/
	crm_debug("Sending %s a confirmation", peer->uname);
	attrd_send_message(peer, reply, false);
	free_xml(reply);
	}

	pcmk__reset_request(&request);
	}
	}

	static void
	attrd_cpg_dispatch(cpg_handle_t handle,
	const struct cpg_name *groupName,
	uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
	{
	uint32_t kind = 0;
	xmlNode *xml = NULL;
	const char *from = NULL;
	char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &kind, &from);

	if(data == NULL) {
	return;
	}

	if (kind == crm_class_cluster) {
	xml = pcmk__xml_parse(data);
	}

	if (xml == NULL) {
	crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data);
	} else {
	attrd_peer_message(pcmk__get_node(nodeid, from, NULL,
	pcmk__node_search_cluster_member),
	xml);
	}

	free_xml(xml);
	free(data);
	}

	static void
	attrd_cpg_destroy(gpointer unused)
	{
	if (attrd_shutting_down(false)) {
	crm_info("Disconnected from Corosync process group");

	} else {
	crm_crit("Lost connection to Corosync process group, shutting down");
	attrd_exit_status = CRM_EX_DISCONNECT;
	attrd_shutdown(0);
	}
	}

	/*!
	* \internal
	* \brief Broadcast an update for a single attribute value
	*
	* \param[in] a Attribute to broadcast
	* \param[in] v Attribute value to broadcast
	*/
	void
	attrd_broadcast_value(const attribute_t a, const attribute_value_t v)
	{
	xmlNode *op = pcmk__xe_create(NULL, PCMK_XE_OP);

	crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
	attrd_add_value_xml(op, a, v, false);
	attrd_send_message(NULL, op, false);
	free_xml(op);
	}

	#define state_text(state) pcmk__s((state), "in unknown state")

	static void
	attrd_peer_change_cb(enum crm_status_type kind, crm_node_t peer, const void data)
	{
	bool gone = false;
	bool is_remote = pcmk_is_set(peer->flags, crm_remote_node);

	switch (kind) {
	case crm_status_uname:
	crm_debug("%s node %s is now %s",
	(is_remote? "Remote" : "Cluster"),
	peer->uname, state_text(peer->state));
	break;

	case crm_status_processes:
	if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
	gone = true;
	}
	crm_debug("Node %s is %s a peer",
	peer->uname, (gone? "no longer" : "now"));
	break;

	case crm_status_nstate:
	crm_debug("%s node %s is now %s (was %s)",
	(is_remote? "Remote" : "Cluster"),
	peer->uname, state_text(peer->state), state_text(data));
	if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
	/* If we're the writer, send new peers a list of all attributes
	* (unless it's a remote node, which doesn't run its own attrd)
	*/
	if (attrd_election_won()
	&& !pcmk_is_set(peer->flags, crm_remote_node)) {
	attrd_peer_sync(peer);
	}
	} else {
	// Remove all attribute values associated with lost nodes
	attrd_peer_remove(peer->uname, false, "loss");
	gone = true;
	}
	break;
	}

	// Remove votes from cluster nodes that leave, in case election in progress
	if (gone && !is_remote) {
	attrd_remove_voter(peer);
	attrd_remove_peer_protocol_ver(peer->uname);
	attrd_do_not_expect_from_peer(peer->uname);
	}
	}

	-static void
	-record_peer_nodeid(attribute_value_t v, const char host)
	-{
	- crm_node_t *known_peer = pcmk__get_node(v->nodeid, host, NULL,
	- pcmk__node_search_cluster_member);
	-
	- crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid);
	- if (attrd_election_won()) {
	- attrd_write_attributes(attrd_write_changed);
	- }
	-}
	-
	#define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)")

	#define readable_peer(p) \
	(((p) == NULL)? "all peers" : pcmk__s((p)->uname, "unknown peer"))

	static void
	update_attr_on_host(attribute_t a, const crm_node_t peer, const xmlNode *xml,
	const char attr, const char value, const char *host,
	bool filter)
	{
	int is_remote = 0;
	bool changed = false;
	attribute_value_t *v = NULL;
	+ const char *prev_xml_id = NULL;
	+ const char *node_xml_id = crm_element_value(xml, PCMK__XA_ATTR_HOST_ID);

	// Create entry for value if not already existing
	v = g_hash_table_lookup(a->values, host);
	if (v == NULL) {
	v = pcmk__assert_alloc(1, sizeof(attribute_value_t));

	v->nodename = pcmk__str_copy(host);
	g_hash_table_replace(a->values, v->nodename, v);
	}

	+ /* If update doesn't contain the node XML ID, fall back to any previously
	+ * known value (for logging)
	+ */
	+ prev_xml_id = attrd_get_node_xml_id(v->nodename);
	+ if (node_xml_id == NULL) {
	+ node_xml_id = prev_xml_id;
	+ }
	+
	// If value is for a Pacemaker Remote node, remember that
	crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
	if (is_remote) {
	attrd_set_value_flags(v, attrd_value_remote);
	pcmk__assert(pcmk__cluster_lookup_remote_node(host) != NULL);
	}

	// Check whether the value changed
	changed = !pcmk__str_eq(v->current, value, pcmk__str_casei);

	if (changed && filter && pcmk__str_eq(host, attrd_cluster->uname,
	pcmk__str_casei)) {
	/* Broadcast the local value for an attribute that differs from the
	* value provided in a peer's attribute synchronization response. This
	* ensures a node's values for itself take precedence and all peers are
	* kept in sync.
	*/
	v = g_hash_table_lookup(a->values, attrd_cluster->uname);
	crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
	attr, host, readable_value(v), value, peer->uname);
	attrd_broadcast_value(a, v);

	} else if (changed) {
	crm_notice("Setting %s[%s]%s%s: %s -> %s "
	- CRM_XS " from %s with %s write delay",
	+ CRM_XS " from %s with %s write delay and node XML ID %s",
	attr, host, a->set_type ? " in " : "",
	pcmk__s(a->set_type, ""), readable_value(v),
	pcmk__s(value, "(unset)"), peer->uname,
	- (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
	+ (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms),
	+ pcmk__s(node_xml_id, "unknown"));
	pcmk__str_update(&v->current, value);
	attrd_set_attr_flags(a, attrd_attr_changed);

	if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)
	&& pcmk__str_eq(attr, PCMK__NODE_ATTR_SHUTDOWN, pcmk__str_none)) {

	if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) {
	attrd_set_requesting_shutdown();

	} else {
	attrd_clear_requesting_shutdown();
	}
	}

	// Write out new value or start dampening timer
	if (a->timeout_ms && a->timer) {
	crm_trace("Delaying write of %s %s for dampening",
	attr, pcmk__readable_interval(a->timeout_ms));
	mainloop_timer_start(a->timer);
	} else {
	attrd_write_or_elect_attribute(a);
	}

	} else {
	int is_force_write = 0;

	crm_element_value_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE,
	&is_force_write);

	if (is_force_write == 1 && a->timeout_ms && a->timer) {
	/* Save forced writing and set change flag. */
	/* The actual attribute is written by Writer after election. */
	crm_trace("%s[%s] from %s is unchanged (%s), forcing write",
	attr, host, peer->uname, pcmk__s(value, "unset"));
	attrd_set_attr_flags(a, attrd_attr_force_write);
	} else {
	crm_trace("%s[%s] from %s is unchanged (%s)",
	attr, host, peer->uname, pcmk__s(value, "unset"));
	}
	}

	// This allows us to later detect local values that peer doesn't know about
	attrd_set_value_flags(v, attrd_value_from_peer);

	- /* If this is a cluster node whose node ID we are learning, remember it */
	- if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote)
	- && (crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID,
	- (int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
	- record_peer_nodeid(v, host);
	+ // Remember node's XML ID if we're just learning it
	+ if ((node_xml_id != NULL)
	+ && !pcmk__str_eq(node_xml_id, prev_xml_id, pcmk__str_none)) {
	+ crm_trace("Learned %s[%s] node XML ID is %s (was %s)",
	+ a->id, v->nodename, node_xml_id,
	+ pcmk__s(prev_xml_id, "unknown"));
	+ attrd_set_node_xml_id(v->nodename, node_xml_id);
	+ if (attrd_election_won()) {
	+ // In case we couldn't write a value missing the XML ID before
	+ attrd_write_attributes(attrd_write_changed);
	+ }
	}
	}

	static void
	attrd_peer_update_one(const crm_node_t peer, xmlNode xml, bool filter)
	{
	attribute_t *a = NULL;
	const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
	const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
	const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);

	if (attr == NULL) {
	crm_warn("Could not update attribute: peer did not specify name");
	return;
	}

	a = attrd_populate_attribute(xml, attr);
	if (a == NULL) {
	return;
	}

	if (host == NULL) {
	// If no host was specified, update all hosts
	GHashTableIter vIter;

	crm_debug("Setting %s for all hosts to %s", attr, value);
	pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_HOST_ID);
	g_hash_table_iter_init(&vIter, a->values);

	while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
	update_attr_on_host(a, peer, xml, attr, value, host, filter);
	}

	} else {
	// Update attribute value for the given host
	update_attr_on_host(a, peer, xml, attr, value, host, filter);
	}

	/* If this is a message from some attrd instance broadcasting its protocol
	* version, check to see if it's a new minimum version.
	*/
	if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) {
	attrd_update_minimum_protocol_ver(peer->uname, value);
	}
	}

	static void
	broadcast_unseen_local_values(void)
	{
	GHashTableIter aIter;
	GHashTableIter vIter;
	attribute_t *a = NULL;
	attribute_value_t *v = NULL;
	xmlNode *sync = NULL;

	g_hash_table_iter_init(&aIter, attributes);
	while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {

	g_hash_table_iter_init(&vIter, a->values);
	while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {

	if (!pcmk_is_set(v->flags, attrd_value_from_peer)
	&& pcmk__str_eq(v->nodename, attrd_cluster->uname,
	pcmk__str_casei)) {
	crm_trace("* %s[%s]='%s' is local-only",
	a->id, v->nodename, readable_value(v));
	if (sync == NULL) {
	sync = pcmk__xe_create(NULL, __func__);
	crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
	}
	attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer);
	}
	}
	}

	if (sync != NULL) {
	crm_debug("Broadcasting local-only values");
	attrd_send_message(NULL, sync, false);
	free_xml(sync);
	}
	}

	int
	attrd_cluster_connect(void)
	{
	int rc = pcmk_rc_ok;

	attrd_cluster = pcmk_cluster_new();

	pcmk_cluster_set_destroy_fn(attrd_cluster, attrd_cpg_destroy);
	pcmk_cpg_set_deliver_fn(attrd_cluster, attrd_cpg_dispatch);
	pcmk_cpg_set_confchg_fn(attrd_cluster, pcmk__cpg_confchg_cb);

	pcmk__cluster_set_status_callback(&attrd_peer_change_cb);

	rc = pcmk_cluster_connect(attrd_cluster);
	rc = pcmk_rc2legacy(rc);
	if (rc != pcmk_ok) {
	crm_err("Cluster connection failed");
	return rc;
	}
	return pcmk_ok;
	}

	void
	attrd_peer_clear_failure(pcmk__request_t *request)
	{
	xmlNode *xml = request->xml;
	const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
	const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
	const char *op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION);
	const char *interval_spec = crm_element_value(xml,
	PCMK__XA_ATTR_CLEAR_INTERVAL);
	guint interval_ms = 0U;
	char *attr = NULL;
	GHashTableIter iter;
	regex_t regex;

	crm_node_t *peer = pcmk__get_node(0, request->peer, NULL,
	pcmk__node_search_cluster_member);

	pcmk_parse_interval_spec(interval_spec, &interval_ms);

	if (attrd_failure_regex(&regex, rsc, op, interval_ms) != pcmk_ok) {
	crm_info("Ignoring invalid request to clear failures for %s",
	pcmk__s(rsc, "all resources"));
	return;
	}

	crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);

	/* Make sure value is not set, so we delete */
	pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE);

	g_hash_table_iter_init(&iter, attributes);
	while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) {
	if (regexec(&regex, attr, 0, NULL, 0) == 0) {
	crm_trace("Matched %s when clearing %s",
	attr, pcmk__s(rsc, "all resources"));
	crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr);
	attrd_peer_update(peer, xml, host, false);
	}
	}
	regfree(&regex);
	}

	/*!
	* \internal
	* \brief Load attributes from a peer sync response
	*
	* \param[in] peer Peer that sent sync response
	* \param[in] peer_won Whether peer is the attribute writer
	* \param[in,out] xml Request XML
	*/
	void
	attrd_peer_sync_response(const crm_node_t peer, bool peer_won, xmlNode xml)
	{
	crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
	peer->uname);

	if (peer_won) {
	/* Initialize the "seen" flag for all attributes to cleared, so we can
	* detect attributes that local node has but the writer doesn't.
	*/
	attrd_clear_value_seen();
	}

	// Process each attribute update in the sync response
	for (xmlNode *child = pcmk__xe_first_child(xml, NULL, NULL, NULL);
	child != NULL; child = pcmk__xe_next(child)) {

	attrd_peer_update(peer, child,
	crm_element_value(child, PCMK__XA_ATTR_HOST), true);
	}

	if (peer_won) {
	/* If any attributes are still not marked as seen, the writer doesn't
	* know about them, so send all peers an update with them.
	*/
	broadcast_unseen_local_values();
	}
	}

	/*!
	* \internal
	* \brief Remove all attributes and optionally peer cache entries for a node
	*
	* \param[in] host Name of node to purge
	* \param[in] uncache If true, remove node from peer caches
	* \param[in] source Who requested removal (only used for logging)
	*/
	void
	attrd_peer_remove(const char host, bool uncache, const char source)
	{
	attribute_t *a = NULL;
	GHashTableIter aIter;

	CRM_CHECK(host != NULL, return);
	crm_notice("Removing all %s attributes for node %s "
	CRM_XS " %s reaping node from cache",
	host, source, (uncache? "and" : "without"));

	g_hash_table_iter_init(&aIter, attributes);
	while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
	if(g_hash_table_remove(a->values, host)) {
	crm_debug("Removed %s[%s] for peer %s", a->id, host, source);
	}
	}

	if (uncache) {
	pcmk__purge_node_from_cache(host, 0);
	+ attrd_forget_node_xml_id(host);
	}
	}

	/*!
	* \internal
	* \brief Send all known attributes and values to a peer
	*
	* \param[in] peer Peer to send sync to (if NULL, broadcast to all peers)
	*/
	void
	attrd_peer_sync(crm_node_t *peer)
	{
	GHashTableIter aIter;
	GHashTableIter vIter;

	attribute_t *a = NULL;
	attribute_value_t *v = NULL;
	xmlNode *sync = pcmk__xe_create(NULL, __func__);

	crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);

	g_hash_table_iter_init(&aIter, attributes);
	while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
	g_hash_table_iter_init(&vIter, a->values);
	while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
	crm_debug("Syncing %s[%s]='%s' to %s",
	a->id, v->nodename, readable_value(v),
	readable_peer(peer));
	attrd_add_value_xml(sync, a, v, false);
	}
	}

	crm_debug("Syncing values to %s", readable_peer(peer));
	attrd_send_message(peer, sync, false);
	free_xml(sync);
	}

	void
	attrd_peer_update(const crm_node_t peer, xmlNode xml, const char *host,
	bool filter)
	{
	bool handle_sync_point = false;

	CRM_CHECK((peer != NULL) && (xml != NULL), return);
	if (xml->children != NULL) {
	for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL, NULL);
	child != NULL; child = pcmk__xe_next_same(child)) {

	pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite);
	attrd_peer_update_one(peer, child, filter);

	if (attrd_request_has_sync_point(child)) {
	handle_sync_point = true;
	}
	}

	} else {
	attrd_peer_update_one(peer, xml, filter);

	if (attrd_request_has_sync_point(xml)) {
	handle_sync_point = true;
	}
	}

	/* If the update XML specified that the client wanted to wait for a sync
	* point, process that now.
	*/
	if (handle_sync_point) {
	crm_trace("Hit local sync point for attribute update");
	attrd_ack_waitlist_clients(attrd_sync_point_local, xml);
	}
	}
	diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c
	index 17df2a3d2f..f2b02b6fb6 100644
	--- a/daemons/attrd/attrd_ipc.c
	+++ b/daemons/attrd/attrd_ipc.c
	@@ -1,624 +1,625 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <errno.h>
	#include <stdint.h>
	#include <stdlib.h>
	+#include <inttypes.h> // PRIu32
	#include <sys/types.h>

	#include <crm/cluster.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/acl_internal.h>
	#include <crm/common/ipc_internal.h>
	#include <crm/common/logging.h>
	#include <crm/common/results.h>
	#include <crm/common/strings_internal.h>
	#include <crm/common/util.h>
	#include <crm/common/xml.h>

	#include "pacemaker-attrd.h"

	static qb_ipcs_service_t *ipcs = NULL;

	/*!
	* \internal
	* \brief Build the XML reply to a client query
	*
	* \param[in] attr Name of requested attribute
	* \param[in] host Name of requested host (or NULL for all hosts)
	*
	* \return New XML reply
	* \note Caller is responsible for freeing the resulting XML
	*/
	static xmlNode build_query_reply(const char attr, const char *host)
	{
	xmlNode *reply = pcmk__xe_create(NULL, __func__);
	attribute_t *a;

	crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_ATTRD);
	crm_xml_add(reply, PCMK__XA_SUBT, PCMK__ATTRD_CMD_QUERY);
	crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION);

	/* If desired attribute exists, add its value(s) to the reply */
	a = g_hash_table_lookup(attributes, attr);
	if (a) {
	attribute_value_t *v;
	xmlNode *host_value;

	crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr);

	/* Allow caller to use "localhost" to refer to local node */
	if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) {
	host = attrd_cluster->uname;
	crm_trace("Mapped localhost to %s", host);
	}

	/* If a specific node was requested, add its value */
	if (host) {
	v = g_hash_table_lookup(a->values, host);
	host_value = pcmk__xe_create(reply, PCMK_XE_NODE);
	pcmk__xe_add_node(host_value, host, 0);
	crm_xml_add(host_value, PCMK__XA_ATTR_VALUE,
	(v? v->current : NULL));

	/* Otherwise, add all nodes' values */
	} else {
	GHashTableIter iter;

	g_hash_table_iter_init(&iter, a->values);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
	host_value = pcmk__xe_create(reply, PCMK_XE_NODE);
	pcmk__xe_add_node(host_value, v->nodename, 0);
	crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current);
	}
	}
	}
	return reply;
	}

	xmlNode *
	attrd_client_clear_failure(pcmk__request_t *request)
	{
	xmlNode *xml = request->xml;
	const char rsc, op, *interval_spec;

	if (minimum_protocol_version >= 2) {
	/* Propagate to all peers (including ourselves).
	* This ends up at attrd_peer_message().
	*/
	attrd_send_message(NULL, xml, false);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	}

	rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
	op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION);
	interval_spec = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_INTERVAL);

	/* Map this to an update */
	crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);

	/* Add regular expression matching desired attributes */

	if (rsc) {
	char *pattern;

	if (op == NULL) {
	pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);

	} else {
	guint interval_ms = 0U;

	pcmk_parse_interval_spec(interval_spec, &interval_ms);
	pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP,
	rsc, op, interval_ms);
	}

	crm_xml_add(xml, PCMK__XA_ATTR_REGEX, pattern);
	free(pattern);

	} else {
	crm_xml_add(xml, PCMK__XA_ATTR_REGEX, ATTRD_RE_CLEAR_ALL);
	}

	/* Make sure attribute and value are not set, so we delete via regex */
	pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_NAME);
	pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE);

	return attrd_client_update(request);
	}

	xmlNode *
	attrd_client_peer_remove(pcmk__request_t *request)
	{
	xmlNode *xml = request->xml;

	// Host and ID are not used in combination, rather host has precedence
	const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
	char *host_alloc = NULL;

	attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);

	if (host == NULL) {
	int nodeid = 0;

	crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID, &nodeid);
	if (nodeid > 0) {
	crm_node_t *node = NULL;
	char *host_alloc = NULL;

	- node = pcmk__search_node_caches(nodeid, NULL,
	+ node = pcmk__search_node_caches(nodeid, NULL, NULL,
	pcmk__node_search_cluster_member);
	if (node && node->uname) {
	// Use cached name if available
	host = node->uname;
	} else {
	// Otherwise ask cluster layer
	host_alloc = pcmk__cluster_node_name(nodeid);
	host = host_alloc;
	}
	pcmk__xe_add_node(xml, host, 0);
	}
	}

	if (host) {
	crm_info("Client %s is requesting all values for %s be removed",
	pcmk__client_name(request->ipc_client), host);
	attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
	free(host_alloc);
	} else {
	crm_info("Ignoring request by client %s to remove all peer values without specifying peer",
	pcmk__client_name(request->ipc_client));
	}

	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	}

	xmlNode *
	attrd_client_query(pcmk__request_t *request)
	{
	xmlNode *query = request->xml;
	xmlNode *reply = NULL;
	const char *attr = NULL;

	crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client));

	/* Request must specify attribute name to query */
	attr = crm_element_value(query, PCMK__XA_ATTR_NAME);
	if (attr == NULL) {
	pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
	"Ignoring malformed query from %s (no attribute name given)",
	pcmk__client_name(request->ipc_client));
	return NULL;
	}

	/* Build the XML reply */
	reply = build_query_reply(attr,
	crm_element_value(query, PCMK__XA_ATTR_HOST));
	if (reply == NULL) {
	pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
	"Could not respond to query from %s: could not create XML reply",
	pcmk__client_name(request->ipc_client));
	return NULL;
	} else {
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	}

	request->ipc_client->request_id = 0;
	return reply;
	}

	xmlNode *
	attrd_client_refresh(pcmk__request_t *request)
	{
	crm_info("Updating all attributes");

	attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
	attrd_write_attributes(attrd_write_all\|attrd_write_no_delay);

	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	}

	static void
	handle_missing_host(xmlNode *xml)
	{
	- const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
	-
	- if (host == NULL) {
	- crm_trace("Inferring host");
	- pcmk__xe_add_node(xml, attrd_cluster->uname, attrd_cluster->nodeid);
	+ if (crm_element_value(xml, PCMK__XA_ATTR_HOST) == NULL) {
	+ crm_trace("Inferring local node %s with XML ID %s",
	+ attrd_cluster->uname, attrd_cluster->uuid);
	+ crm_xml_add(xml, PCMK__XA_ATTR_HOST, attrd_cluster->uname);
	+ crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, attrd_cluster->uuid);
	}
	}

	/* Convert a single IPC message with a regex into one with multiple children, one
	* for each regex match.
	*/
	static int
	expand_regexes(xmlNode xml, const char attr, const char value, const char regex)
	{
	if (attr == NULL && regex) {
	bool matched = false;
	GHashTableIter aIter;
	regex_t r_patt;

	crm_debug("Setting %s to %s", regex, value);
	if (regcomp(&r_patt, regex, REG_EXTENDED\|REG_NOSUB)) {
	return EINVAL;
	}

	g_hash_table_iter_init(&aIter, attributes);
	while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) {
	int status = regexec(&r_patt, attr, 0, NULL, 0);

	if (status == 0) {
	xmlNode *child = pcmk__xe_create(xml, PCMK_XE_OP);

	crm_trace("Matched %s with %s", attr, regex);
	matched = true;

	/* Copy all the non-conflicting attributes from the parent over,
	* but remove the regex and replace it with the name.
	*/
	pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite);
	pcmk__xe_remove_attr(child, PCMK__XA_ATTR_REGEX);
	crm_xml_add(child, PCMK__XA_ATTR_NAME, attr);
	}
	}

	regfree(&r_patt);

	/* Return a code if we never matched anything. This should not be treated
	* as an error. It indicates there was a regex, and it was a valid regex,
	* but simply did not match anything and the caller should not continue
	* doing any regex-related processing.
	*/
	if (!matched) {
	return pcmk_rc_op_unsatisfied;
	}

	} else if (attr == NULL) {
	return pcmk_rc_bad_nvpair;
	}

	return pcmk_rc_ok;
	}

	static int
	handle_regexes(pcmk__request_t *request)
	{
	xmlNode *xml = request->xml;
	int rc = pcmk_rc_ok;

	const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
	const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
	const char *regex = crm_element_value(xml, PCMK__XA_ATTR_REGEX);

	rc = expand_regexes(xml, attr, value, regex);

	if (rc == EINVAL) {
	pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
	"Bad regex '%s' for update from client %s", regex,
	pcmk__client_name(request->ipc_client));

	} else if (rc == pcmk_rc_bad_nvpair) {
	crm_err("Update request did not specify attribute or regular expression");
	pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
	"Client %s update request did not specify attribute or regular expression",
	pcmk__client_name(request->ipc_client));
	}

	return rc;
	}

	static int
	handle_value_expansion(const char *value, xmlNode xml, const char *op,
	const char *attr)
	{
	attribute_t *a = g_hash_table_lookup(attributes, attr);

	if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) {
	return EINVAL;
	}

	if (value && attrd_value_needs_expansion(value)) {
	int int_value;
	attribute_value_t *v = NULL;

	if (a) {
	const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
	v = g_hash_table_lookup(a->values, host);
	}

	int_value = attrd_expand_value(*value, (v? v->current : NULL));

	crm_info("Expanded %s=%s to %d", attr, *value, int_value);
	crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value);

	/* Replacing the value frees the previous memory, so re-query it */
	*value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
	}

	return pcmk_rc_ok;
	}

	static void
	send_update_msg_to_cluster(pcmk__request_t request, xmlNode xml)
	{
	if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) {
	/* The client is waiting on the cluster-wide sync point. In this case,
	* the response ACK is not sent until this attrd broadcasts the update
	* and receives its own confirmation back from all peers.
	*/
	attrd_expect_confirmations(request, attrd_cluster_sync_point_update);
	attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */

	} else {
	/* The client is either waiting on the local sync point or was not
	* waiting on any sync point at all. For the local sync point, the
	* response ACK is sent in attrd_peer_update. For clients not
	* waiting on any sync point, the response ACK is sent in
	* handle_update_request immediately before this function was called.
	*/
	attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
	}
	}

	static int
	send_child_update(xmlNode child, void data)
	{
	pcmk__request_t request = (pcmk__request_t ) data;

	/* Calling pcmk__set_result is handled by one of these calls to
	* attrd_client_update, so no need to do it again here.
	*/
	request->xml = child;
	attrd_client_update(request);
	return pcmk_rc_ok;
	}

	xmlNode *
	attrd_client_update(pcmk__request_t *request)
	{
	xmlNode *xml = NULL;
	const char attr, value, *regex;

	CRM_CHECK((request != NULL) && (request->xml != NULL), return NULL);

	xml = request->xml;

	/* If the message has children, that means it is a message from a newer
	* client that supports sending multiple operations at a time. There are
	* two ways we can handle that.
	*/
	if (xml->children != NULL) {
	if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) {
	/* First, if all peers support a certain protocol version, we can
	* just broadcast the big message and they'll handle it. However,
	* we also need to apply all the transformations in this function
	* to the children since they don't happen anywhere else.
	*/
	for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL,
	NULL);
	child != NULL; child = pcmk__xe_next_same(child)) {

	attr = crm_element_value(child, PCMK__XA_ATTR_NAME);
	value = crm_element_value(child, PCMK__XA_ATTR_VALUE);

	handle_missing_host(child);

	if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) {
	pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
	"Attribute %s does not exist", attr);
	return NULL;
	}
	}

	send_update_msg_to_cluster(request, xml);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);

	} else {
	/* Save the original xml node pointer so it can be restored after iterating
	* over all the children.
	*/
	xmlNode *orig_xml = request->xml;

	/* Second, if they do not support that protocol version, split it
	* up into individual messages and call attrd_client_update on
	* each one.
	*/
	pcmk__xe_foreach_child(xml, PCMK_XE_OP, send_child_update, request);
	request->xml = orig_xml;
	}

	return NULL;
	}

	attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
	value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
	regex = crm_element_value(xml, PCMK__XA_ATTR_REGEX);

	if (handle_regexes(request) != pcmk_rc_ok) {
	/* Error handling was already dealt with in handle_regexes, so just return. */
	return NULL;
	} else if (regex) {
	/* Recursively call attrd_client_update on the new message with regexes
	* expanded. If supported by the attribute daemon, this means that all
	* matches can also be handled atomically.
	*/
	return attrd_client_update(request);
	}

	handle_missing_host(xml);

	if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) {
	pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
	"Attribute %s does not exist", attr);
	return NULL;
	}

	crm_debug("Broadcasting %s[%s]=%s%s",
	attr, crm_element_value(xml, PCMK__XA_ATTR_HOST),
	value, (attrd_election_won()? " (writer)" : ""));

	send_update_msg_to_cluster(request, xml);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	}

	/*!
	* \internal
	* \brief Accept a new client IPC connection
	*
	* \param[in,out] c New connection
	* \param[in] uid Client user id
	* \param[in] gid Client group id
	*
	* \return pcmk_ok on success, -errno otherwise
	*/
	static int32_t
	attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
	{
	crm_trace("New client connection %p", c);
	if (attrd_shutting_down(false)) {
	crm_info("Ignoring new connection from pid %d during shutdown",
	pcmk__client_pid(c));
	return -ECONNREFUSED;
	}

	if (pcmk__new_client(c, uid, gid) == NULL) {
	return -ENOMEM;
	}
	return pcmk_ok;
	}

	/*!
	* \internal
	* \brief Destroy a client IPC connection
	*
	* \param[in] c Connection to destroy
	*
	* \return FALSE (i.e. do not re-run this callback)
	*/
	static int32_t
	attrd_ipc_closed(qb_ipcs_connection_t *c)
	{
	pcmk__client_t *client = pcmk__find_client(c);

	if (client == NULL) {
	crm_trace("Ignoring request to clean up unknown connection %p", c);
	} else {
	crm_trace("Cleaning up closed client connection %p", c);

	/* Remove the client from the sync point waitlist if it's present. */
	attrd_remove_client_from_waitlist(client);

	/* And no longer wait for confirmations from any peers. */
	attrd_do_not_wait_for_client(client);

	pcmk__free_client(client);
	}

	return FALSE;
	}

	/*!
	* \internal
	* \brief Destroy a client IPC connection
	*
	* \param[in,out] c Connection to destroy
	*
	* \note We handle a destroyed connection the same as a closed one,
	* but we need a separate handler because the return type is different.
	*/
	static void
	attrd_ipc_destroy(qb_ipcs_connection_t *c)
	{
	crm_trace("Destroying client connection %p", c);
	attrd_ipc_closed(c);
	}

	static int32_t
	attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
	{
	uint32_t id = 0;
	uint32_t flags = 0;
	pcmk__client_t *client = pcmk__find_client(c);
	xmlNode *xml = NULL;

	// Sanity-check, and parse XML from IPC data
	CRM_CHECK((c != NULL) && (client != NULL), return 0);
	if (data == NULL) {
	crm_debug("No IPC data from PID %d", pcmk__client_pid(c));
	return 0;
	}

	xml = pcmk__client_data2xml(client, data, &id, &flags);

	if (xml == NULL) {
	crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c));
	pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
	CRM_EX_PROTOCOL);
	return 0;

	} else {
	pcmk__request_t request = {
	.ipc_client = client,
	.ipc_id = id,
	.ipc_flags = flags,
	.peer = NULL,
	.xml = xml,
	.call_options = 0,
	.result = PCMK__UNKNOWN_RESULT,
	};

	pcmk__assert(client->user != NULL);
	pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user);

	request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK);
	CRM_CHECK(request.op != NULL, return 0);

	attrd_handle_request(&request);
	pcmk__reset_request(&request);
	}

	free_xml(xml);
	return 0;
	}

	static struct qb_ipcs_service_handlers ipc_callbacks = {
	.connection_accept = attrd_ipc_accept,
	.connection_created = NULL,
	.msg_process = attrd_ipc_dispatch,
	.connection_closed = attrd_ipc_closed,
	.connection_destroyed = attrd_ipc_destroy
	};

	void
	attrd_ipc_fini(void)
	{
	if (ipcs != NULL) {
	pcmk__drop_all_clients(ipcs);
	qb_ipcs_destroy(ipcs);
	ipcs = NULL;
	}
	}

	/*!
	* \internal
	* \brief Set up attrd IPC communication
	*/
	void
	attrd_init_ipc(void)
	{
	pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks);
	}
	diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c
	index edb33a5cd1..845ec1cfb4 100644
	--- a/daemons/attrd/attrd_messages.c
	+++ b/daemons/attrd/attrd_messages.c
	@@ -1,357 +1,359 @@
	/*
	* Copyright 2022-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	+#include <inttypes.h> // PRIu32
	#include <glib.h>

	#include <crm/common/messages_internal.h>
	#include <crm/cluster/internal.h> // pcmk__get_node()
	#include <crm/common/xml.h>

	#include "pacemaker-attrd.h"

	int minimum_protocol_version = -1;

	static GHashTable *attrd_handlers = NULL;

	static bool
	is_sync_point_attr(xmlAttrPtr attr, void *data)
	{
	return pcmk__str_eq((const char *) attr->name, PCMK__XA_ATTR_SYNC_POINT, pcmk__str_none);
	}

	static int
	remove_sync_point_attribute(xmlNode xml, void data)
	{
	pcmk__xe_remove_matching_attrs(xml, is_sync_point_attr, NULL);
	pcmk__xe_foreach_child(xml, PCMK_XE_OP, remove_sync_point_attribute, NULL);
	return pcmk_rc_ok;
	}

	/* Sync points on a multi-update IPC message to an attrd too old to support
	* multi-update messages won't work. Strip the sync point attribute off here
	* so we don't pretend to support this situation and instead ACK the client
	* immediately.
	*/
	static void
	remove_unsupported_sync_points(pcmk__request_t *request)
	{
	if (request->xml->children != NULL && !ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version) &&
	attrd_request_has_sync_point(request->xml)) {
	crm_warn("Ignoring sync point in request from %s because not all nodes support it",
	pcmk__request_origin(request));
	remove_sync_point_attribute(request->xml, NULL);
	}
	}

	static xmlNode *
	handle_unknown_request(pcmk__request_t *request)
	{
	crm_err("Unknown IPC request %s from %s %s",
	request->op, pcmk__request_origin_type(request),
	pcmk__request_origin(request));
	pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
	"Unknown request type '%s' (bug?)", request->op);
	return NULL;
	}

	static xmlNode *
	handle_clear_failure_request(pcmk__request_t *request)
	{
	if (request->peer != NULL) {
	/* It is not currently possible to receive this as a peer command,
	* but will be, if we one day enable propagating this operation.
	*/
	attrd_peer_clear_failure(request);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	} else {
	remove_unsupported_sync_points(request);

	if (attrd_request_has_sync_point(request->xml)) {
	/* If this client supplied a sync point it wants to wait for, add it to
	* the wait list. Clients on this list will not receive an ACK until
	* their sync point is hit which will result in the client stalled there
	* until it receives a response.
	*
	* All other clients will receive the expected response as normal.
	*/
	attrd_add_client_to_waitlist(request);

	} else {
	/* If the client doesn't want to wait for a sync point, go ahead and send
	* the ACK immediately. Otherwise, we'll send the ACK when the appropriate
	* sync point is reached.
	*/
	attrd_send_ack(request->ipc_client, request->ipc_id,
	request->ipc_flags);
	}

	return attrd_client_clear_failure(request);
	}
	}

	static xmlNode *
	handle_confirm_request(pcmk__request_t *request)
	{
	if (request->peer != NULL) {
	int callid;

	crm_debug("Received confirmation from %s", request->peer);

	if (crm_element_value_int(request->xml, PCMK__XA_CALL_ID,
	&callid) == -1) {
	pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
	"Could not get callid from XML");
	} else {
	attrd_handle_confirmation(callid, request->peer);
	}

	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	} else {
	return handle_unknown_request(request);
	}
	}

	static xmlNode *
	handle_flush_request(pcmk__request_t *request)
	{
	if (request->peer != NULL) {
	/* Ignore. The flush command was removed in 2.0.0 but may be
	* received from peers running older versions.
	*/
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	} else {
	return handle_unknown_request(request);
	}
	}

	static xmlNode *
	handle_query_request(pcmk__request_t *request)
	{
	if (request->peer != NULL) {
	return handle_unknown_request(request);
	} else {
	return attrd_client_query(request);
	}
	}

	static xmlNode *
	handle_remove_request(pcmk__request_t *request)
	{
	if (request->peer != NULL) {
	const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_HOST);
	bool reap = false;

	if (pcmk__xe_get_bool_attr(request->xml, PCMK__XA_REAP,
	&reap) != pcmk_rc_ok) {
	reap = true; // Default to true for backward compatibility
	}
	attrd_peer_remove(host, reap, request->peer);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	} else {
	return attrd_client_peer_remove(request);
	}
	}

	static xmlNode *
	handle_refresh_request(pcmk__request_t *request)
	{
	if (request->peer != NULL) {
	return handle_unknown_request(request);
	} else {
	return attrd_client_refresh(request);
	}
	}

	static xmlNode *
	handle_sync_response_request(pcmk__request_t *request)
	{
	if (request->ipc_client != NULL) {
	return handle_unknown_request(request);
	} else {
	if (request->peer != NULL) {
	crm_node_t *peer = pcmk__get_node(0, request->peer, NULL,
	pcmk__node_search_cluster_member);
	bool peer_won = attrd_check_for_new_writer(peer, request->xml);

	if (!pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) {
	attrd_peer_sync_response(peer, peer_won, request->xml);
	}
	}

	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	}
	}

	static xmlNode *
	handle_update_request(pcmk__request_t *request)
	{
	if (request->peer != NULL) {
	const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_HOST);
	crm_node_t *peer = pcmk__get_node(0, request->peer, NULL,
	pcmk__node_search_cluster_member);

	attrd_peer_update(peer, request->xml, host, false);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;

	} else {
	remove_unsupported_sync_points(request);

	if (attrd_request_has_sync_point(request->xml)) {
	/* If this client supplied a sync point it wants to wait for, add it to
	* the wait list. Clients on this list will not receive an ACK until
	* their sync point is hit which will result in the client stalled there
	* until it receives a response.
	*
	* All other clients will receive the expected response as normal.
	*/
	attrd_add_client_to_waitlist(request);

	} else {
	/* If the client doesn't want to wait for a sync point, go ahead and send
	* the ACK immediately. Otherwise, we'll send the ACK when the appropriate
	* sync point is reached.
	*
	* In the normal case, attrd_client_update can be called recursively which
	* makes where to send the ACK tricky. Doing it here ensures the client
	* only ever receives one.
	*/
	attrd_send_ack(request->ipc_client, request->ipc_id,
	request->flags\|crm_ipc_client_response);
	}

	return attrd_client_update(request);
	}
	}

	static void
	attrd_register_handlers(void)
	{
	pcmk__server_command_t handlers[] = {
	{ PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request },
	{ PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request },
	{ PCMK__ATTRD_CMD_FLUSH, handle_flush_request },
	{ PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request },
	{ PCMK__ATTRD_CMD_QUERY, handle_query_request },
	{ PCMK__ATTRD_CMD_REFRESH, handle_refresh_request },
	{ PCMK__ATTRD_CMD_SYNC_RESPONSE, handle_sync_response_request },
	{ PCMK__ATTRD_CMD_UPDATE, handle_update_request },
	{ PCMK__ATTRD_CMD_UPDATE_DELAY, handle_update_request },
	{ PCMK__ATTRD_CMD_UPDATE_BOTH, handle_update_request },
	{ NULL, handle_unknown_request },
	};

	attrd_handlers = pcmk__register_handlers(handlers);
	}

	void
	attrd_unregister_handlers(void)
	{
	if (attrd_handlers != NULL) {
	g_hash_table_destroy(attrd_handlers);
	attrd_handlers = NULL;
	}
	}

	void
	attrd_handle_request(pcmk__request_t *request)
	{
	xmlNode *reply = NULL;
	char *log_msg = NULL;
	const char *reason = NULL;

	if (attrd_handlers == NULL) {
	attrd_register_handlers();
	}

	reply = pcmk__process_request(request, attrd_handlers);

	if (reply != NULL) {
	crm_log_xml_trace(reply, "Reply");

	if (request->ipc_client != NULL) {
	pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
	request->ipc_flags);
	} else {
	crm_err("Not sending CPG reply to client");
	}

	free_xml(reply);
	}

	reason = request->result.exit_reason;
	log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s",
	request->op, pcmk__request_origin_type(request),
	pcmk__request_origin(request),
	pcmk_exec_status_str(request->result.execution_status),
	(reason == NULL)? "" : " (",
	pcmk__s(reason, ""),
	(reason == NULL)? "" : ")");

	if (!pcmk__result_ok(&request->result)) {
	crm_warn("%s", log_msg);
	} else {
	crm_debug("%s", log_msg);
	}

	free(log_msg);
	pcmk__reset_request(request);
	}

	/*!
	\internal
	\brief Broadcast private attribute for local node with protocol version
	*/
	void
	attrd_broadcast_protocol(void)
	{
	xmlNode *attrd_op = pcmk__xe_create(NULL, __func__);

	crm_xml_add(attrd_op, PCMK__XA_T, PCMK__VALUE_ATTRD);
	crm_xml_add(attrd_op, PCMK__XA_SRC, crm_system_name);
	crm_xml_add(attrd_op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
	crm_xml_add(attrd_op, PCMK__XA_ATTR_NAME, CRM_ATTR_PROTOCOL);
	crm_xml_add(attrd_op, PCMK__XA_ATTR_VALUE, ATTRD_PROTOCOL_VERSION);
	crm_xml_add_int(attrd_op, PCMK__XA_ATTR_IS_PRIVATE, 1);
	- pcmk__xe_add_node(attrd_op, attrd_cluster->uname, attrd_cluster->nodeid);
	+ crm_xml_add(attrd_op, PCMK__XA_ATTR_HOST, attrd_cluster->uname);
	+ crm_xml_add(attrd_op, PCMK__XA_ATTR_HOST_ID, attrd_cluster->uuid);

	crm_debug("Broadcasting attrd protocol version %s for node %s",
	ATTRD_PROTOCOL_VERSION, attrd_cluster->uname);

	attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */

	free_xml(attrd_op);
	}

	gboolean
	attrd_send_message(crm_node_t node, xmlNode data, bool confirm)
	{
	const char *op = crm_element_value(data, PCMK_XA_TASK);

	crm_xml_add(data, PCMK__XA_T, PCMK__VALUE_ATTRD);
	crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION);

	/* Request a confirmation from the destination peer node (which could
	* be all if node is NULL) that the message has been received and
	* acted upon.
	*/
	if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) {
	pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm);
	}

	attrd_xml_add_writer(data);
	return pcmk__cluster_send_message(node, crm_msg_attrd, data);
	}
	diff --git a/daemons/attrd/attrd_nodes.c b/daemons/attrd/attrd_nodes.c
	new file mode 100644
	index 0000000000..8fb7797f2d
	--- /dev/null
	+++ b/daemons/attrd/attrd_nodes.c
	@@ -0,0 +1,82 @@
	+/*
	+ * Copyright 2024-2025 the Pacemaker project contributors
	+ *
	+ * The version control history for this file may have further details.
	+ *
	+ * This source code is licensed under the GNU General Public License version 2
	+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
	+ */
	+
	+#include <crm_internal.h>
	+
	+#include <stdio.h> // NULL
	+#include <glib.h> // GHashTable, etc.
	+
	+#include "pacemaker-attrd.h"
	+
	+// Track the last known node XML ID for each node name
	+static GHashTable *node_xml_ids = NULL;
	+
	+/*!
	+ * \internal
	+ * \brief Get last known XML ID for a given node
	+ *
	+ * \param[in] node_name Name of node to check
	+ *
	+ * \return Last known XML ID for node (or NULL if none known)
	+ *
	+ * \note The return value may become invalid if attrd_set_node_xml_id() or
	+ * attrd_forget_node_xml_id() is later called for \p node_name.
	+ */
	+const char *
	+attrd_get_node_xml_id(const char *node_name)
	+{
	+ if (node_xml_ids == NULL) {
	+ return NULL;
	+ }
	+ return g_hash_table_lookup(node_xml_ids, node_name);
	+}
	+
	+/*!
	+ * \internal
	+ * \brief Set last known XML ID for a given node
	+ *
	+ * \param[in] node_name Name of node to set
	+ * \param[in] node_xml_id New XML ID to set for node
	+ */
	+void
	+attrd_set_node_xml_id(const char node_name, const char node_xml_id)
	+{
	+ if (node_xml_ids == NULL) {
	+ node_xml_ids = pcmk__strikey_table(free, free);
	+ }
	+ pcmk__insert_dup(node_xml_ids, node_name, node_xml_id);
	+}
	+
	+/*!
	+ * \internal
	+ * \brief Forget last known XML ID for a given node
	+ *
	+ * \param[in] node_name Name of node to forget
	+ */
	+void
	+attrd_forget_node_xml_id(const char *node_name)
	+{
	+ if (node_xml_ids == NULL) {
	+ return;
	+ }
	+ g_hash_table_remove(node_xml_ids, node_name);
	+}
	+
	+/*!
	+ * \internal
	+ * \brief Free the node XML ID cache
	+ */
	+void
	+attrd_cleanup_xml_ids(void)
	+{
	+ if (node_xml_ids != NULL) {
	+ g_hash_table_destroy(node_xml_ids);
	+ node_xml_ids = NULL;
	+ }
	+}
	diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c
	index 4ae5c8a555..1d86c1b5cc 100644
	--- a/daemons/attrd/pacemaker-attrd.c
	+++ b/daemons/attrd/pacemaker-attrd.c
	@@ -1,224 +1,226 @@
	/*
	* Copyright 2013-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <stdio.h>
	#include <sys/types.h>
	#include <sys/stat.h>
	#include <unistd.h>

	#include <stdlib.h>
	#include <errno.h>
	#include <fcntl.h>

	#include <crm/crm.h>
	#include <crm/pengine/rules.h>
	#include <crm/common/cmdline_internal.h>
	#include <crm/common/iso8601.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipc_internal.h>
	#include <crm/common/output_internal.h>
	#include <crm/common/xml.h>
	#include <crm/cluster/internal.h>

	#include <crm/common/attrs_internal.h>
	#include "pacemaker-attrd.h"

	#define SUMMARY "daemon for managing Pacemaker node attributes"

	gboolean stand_alone = FALSE;
	gchar **log_files = NULL;

	static GOptionEntry entries[] = {
	{ "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone,
	"(Advanced use only) Run in stand-alone mode", NULL },

	{ "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
	&log_files, "Send logs to the additional named logfile", NULL },

	{ NULL }
	};

	static pcmk__output_t *out = NULL;

	static pcmk__supported_format_t formats[] = {
	PCMK__SUPPORTED_FORMAT_NONE,
	PCMK__SUPPORTED_FORMAT_TEXT,
	PCMK__SUPPORTED_FORMAT_XML,
	{ NULL, NULL, NULL }
	};

	lrmd_t *the_lrmd = NULL;
	pcmk_cluster_t *attrd_cluster = NULL;
	crm_trigger_t *attrd_config_read = NULL;
	crm_exit_t attrd_exit_status = CRM_EX_OK;

	static bool
	ipc_already_running(void)
	{
	pcmk_ipc_api_t *old_instance = NULL;
	int rc = pcmk_rc_ok;

	rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_attrd);
	if (rc != pcmk_rc_ok) {
	return false;
	}

	rc = pcmk__connect_ipc(old_instance, pcmk_ipc_dispatch_sync, 2);
	if (rc != pcmk_rc_ok) {
	crm_debug("No existing %s manager instance found: %s",
	pcmk_ipc_name(old_instance, true), pcmk_rc_str(rc));
	pcmk_free_ipc_api(old_instance);
	return false;
	}

	pcmk_disconnect_ipc(old_instance);
	pcmk_free_ipc_api(old_instance);
	return true;
	}

	static GOptionContext *
	build_arg_context(pcmk__common_args_t args, GOptionGroup *group) {
	GOptionContext *context = NULL;

	context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
	pcmk__add_main_args(context, entries);
	return context;
	}

	int
	main(int argc, char **argv)
	{
	int rc = pcmk_rc_ok;

	GError *error = NULL;
	bool initialized = false;

	GOptionGroup *output_group = NULL;
	pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
	gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
	GOptionContext *context = build_arg_context(args, &output_group);

	attrd_init_mainloop();
	crm_log_preinit(NULL, argc, argv);
	mainloop_add_signal(SIGTERM, attrd_shutdown);

	pcmk__register_formats(output_group, formats);
	if (!g_option_context_parse_strv(context, &processed_args, &error)) {
	attrd_exit_status = CRM_EX_USAGE;
	goto done;
	}

	rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
	if ((rc != pcmk_rc_ok) \|\| (out == NULL)) {
	attrd_exit_status = CRM_EX_ERROR;
	g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status,
	"Error creating output format %s: %s",
	args->output_ty, pcmk_rc_str(rc));
	goto done;
	}

	if (args->version) {
	out->version(out, false);
	goto done;
	}

	// Open additional log files
	pcmk__add_logfiles(log_files, out);

	crm_log_init(PCMK__VALUE_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
	crm_notice("Starting Pacemaker node attribute manager%s",
	stand_alone ? " in standalone mode" : "");

	if (ipc_already_running()) {
	const char *msg = "pacemaker-attrd is already active, aborting startup";

	attrd_exit_status = CRM_EX_OK;
	g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "%s", msg);
	crm_err("%s", msg);
	goto done;
	}

	initialized = true;

	attributes = pcmk__strkey_table(NULL, attrd_free_attribute);

	/* Connect to the CIB before connecting to the cluster or listening for IPC.
	* This allows us to assume the CIB is connected whenever we process a
	* cluster or IPC message (which also avoids start-up race conditions).
	*/
	if (!stand_alone) {
	if (attrd_cib_connect(30) != pcmk_ok) {
	attrd_exit_status = CRM_EX_FATAL;
	g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status,
	"Could not connect to the CIB");
	goto done;
	}
	crm_info("CIB connection active");
	}

	if (attrd_cluster_connect() != pcmk_ok) {
	attrd_exit_status = CRM_EX_FATAL;
	g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status,
	"Could not connect to the cluster");
	goto done;
	}
	crm_info("Cluster connection active");

	// Initialization that requires the cluster to be connected
	attrd_election_init();

	if (!stand_alone) {
	attrd_cib_init();
	}

	/* Set a private attribute for ourselves with the protocol version we
	* support. This lets all nodes determine the minimum supported version
	* across all nodes. It also ensures that the writer learns our node name,
	* so it can send our attributes to the CIB.
	*/
	attrd_broadcast_protocol();

	attrd_init_ipc();
	crm_notice("Pacemaker node attribute manager successfully started and accepting connections");
	attrd_run_mainloop();

	done:
	if (initialized) {
	crm_info("Shutting down attribute manager");

	attrd_election_fini();
	attrd_ipc_fini();
	attrd_lrmd_disconnect();

	if (!stand_alone) {
	attrd_cib_disconnect();
	}

	attrd_free_waitlist();
	pcmk_cluster_free(attrd_cluster);
	g_hash_table_destroy(attributes);
	}

	+ attrd_cleanup_xml_ids();
	+
	g_strfreev(processed_args);
	pcmk__free_arg_context(context);

	g_strfreev(log_files);

	pcmk__output_and_clear_error(&error, out);

	if (out != NULL) {
	out->finish(out, attrd_exit_status, true, NULL);
	pcmk__output_free(out);
	}
	pcmk__unregister_formats();
	crm_exit(attrd_exit_status);
	}
	diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
	index 76faf04f11..ba4fccb6e7 100644
	--- a/daemons/attrd/pacemaker-attrd.h
	+++ b/daemons/attrd/pacemaker-attrd.h
	@@ -1,251 +1,264 @@
	/*
	- * Copyright 2013-2024 the Pacemaker project contributors
	+ * Copyright 2013-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#ifndef PACEMAKER_ATTRD__H
	# define PACEMAKER_ATTRD__H

	#include <regex.h>
	#include <glib.h>
	#include <crm/crm.h>
	#include <crm/cluster.h>
	#include <crm/cluster/election_internal.h>
	#include <crm/common/messages_internal.h>
	#include <crm/cib/cib_types.h>

	/*
	* Legacy attrd (all pre-1.1.11 Pacemaker versions, plus all versions when used
	* with the no-longer-supported CMAN or corosync-plugin stacks) is unversioned.
	*
	* With atomic attrd, each attrd will send ATTRD_PROTOCOL_VERSION with every
	* peer request and reply. As of Pacemaker 2.0.0, at start-up each attrd will
	* also set a private attribute for itself with its version, so any attrd can
	* determine the minimum version supported by all peers.
	*
	* Protocol Pacemaker Significant changes
	* -------- --------- -------------------
	* 1 1.1.11 PCMK__ATTRD_CMD_UPDATE (PCMK__XA_ATTR_NAME only),
	* PCMK__ATTRD_CMD_PEER_REMOVE, PCMK__ATTRD_CMD_REFRESH,
	* PCMK__ATTRD_CMD_FLUSH, PCMK__ATTRD_CMD_SYNC_RESPONSE
	* 1 1.1.13 PCMK__ATTRD_CMD_UPDATE (with PCMK__XA_ATTR_REGEX),
	* PCMK__ATTRD_CMD_QUERY
	* 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH,
	* PCMK__ATTRD_CMD_UPDATE_DELAY
	* 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE
	* 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes
	* 4 2.1.5 Multiple attributes can be updated in a single IPC
	* message
	* 5 2.1.5 Peers can request confirmation of a sent message
	* 6 2.1.7 PCMK__ATTRD_CMD_PEER_REMOVE supports PCMK__XA_REAP
	*/
	#define ATTRD_PROTOCOL_VERSION "6"

	#define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4)
	#define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5)

	#define attrd_send_ack(client, id, flags) \
	pcmk__ipc_send_ack((client), (id), (flags), PCMK__XE_ACK, \
	ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE)

	void attrd_init_mainloop(void);
	void attrd_run_mainloop(void);

	void attrd_set_requesting_shutdown(void);
	void attrd_clear_requesting_shutdown(void);
	void attrd_free_waitlist(void);
	bool attrd_shutting_down(bool if_requested);
	void attrd_shutdown(int nsig);
	void attrd_init_ipc(void);
	void attrd_ipc_fini(void);

	int attrd_cib_connect(int max_retry);
	void attrd_cib_disconnect(void);
	void attrd_cib_init(void);
	void attrd_cib_erase_transient_attrs(const char *node);

	bool attrd_value_needs_expansion(const char *value);
	int attrd_expand_value(const char value, const char old_value);

	/* regular expression to clear failures of all resources */
	#define ATTRD_RE_CLEAR_ALL \
	"^(" PCMK__FAIL_COUNT_PREFIX "\|" PCMK__LAST_FAILURE_PREFIX ")-"

	/* regular expression to clear failure of all operations for one resource
	* (format takes resource name)
	*
	* @COMPAT attributes set < 1.1.17:
	* also match older attributes that do not have the operation part
	*/
	#define ATTRD_RE_CLEAR_ONE ATTRD_RE_CLEAR_ALL "%s(#.+_[0-9]+)?$"

	/* regular expression to clear failure of one operation for one resource
	* (format takes resource name, operation name, and interval)
	*
	* @COMPAT attributes set < 1.1.17:
	* also match older attributes that do not have the operation part
	*/
	#define ATTRD_RE_CLEAR_OP ATTRD_RE_CLEAR_ALL "%s(#%s_%u)?$"

	int attrd_failure_regex(regex_t regex, const char rsc, const char *op,
	guint interval_ms);

	extern cib_t *the_cib;
	extern crm_exit_t attrd_exit_status;

	/* Alerts */

	extern lrmd_t *the_lrmd;
	extern crm_trigger_t *attrd_config_read;

	void attrd_lrmd_disconnect(void);
	gboolean attrd_read_options(gpointer user_data);
	-int attrd_send_attribute_alert(const char *node, int nodeid,
	+int attrd_send_attribute_alert(const char node, const char node_xml_id,
	const char attr, const char value);

	// Elections
	void attrd_election_init(void);
	void attrd_election_fini(void);
	void attrd_start_election_if_needed(void);
	bool attrd_election_won(void);
	void attrd_handle_election_op(const crm_node_t peer, xmlNode xml);
	bool attrd_check_for_new_writer(const crm_node_t peer, const xmlNode xml);
	void attrd_declare_winner(void);
	void attrd_remove_voter(const crm_node_t *peer);
	void attrd_xml_add_writer(xmlNode *xml);

	enum attrd_attr_flags {
	attrd_attr_none = 0U,
	- attrd_attr_changed = (1U << 0), // Attribute value has changed since last write
	- attrd_attr_uuid_missing = (1U << 1), // Whether we know we're missing a peer UUID
	- attrd_attr_is_private = (1U << 2), // Whether to keep this attribute out of the CIB
	- attrd_attr_force_write = (1U << 3), // Update attribute by ignoring delay
	+
	+ // At least one of attribute's values has changed since last write
	+ attrd_attr_changed = (1U << 0),
	+
	+ // At least one of attribute's values has an unknown node XML ID
	+ attrd_attr_node_unknown = (1U << 1),
	+
	+ // This attribute should never be written to the CIB
	+ attrd_attr_is_private = (1U << 2),
	+
	+ // Ignore any configured delay for next write of this attribute
	+ attrd_attr_force_write = (1U << 3),
	};

	typedef struct attribute_s {
	char *id; // Attribute name
	char *set_type; // PCMK_XE_INSTANCE_ATTRIBUTES or PCMK_XE_UTILIZATION
	char *set_id; // Set's XML ID to use when writing
	char *user; // ACL user to use for CIB writes
	int update; // Call ID of pending write
	int timeout_ms; // How long to wait for more changes before writing
	uint32_t flags; // Group of enum attrd_attr_flags
	GHashTable *values; // Key: node name, value: attribute_value_t
	mainloop_timer_t *timer; // Timer to use for timeout_ms
	} attribute_t;

	#define attrd_set_attr_flags(attr, flags_to_set) do { \
	(attr)->flags = pcmk__set_flags_as(__func__, __LINE__, \
	LOG_TRACE, "Value for attribute", (attr)->id, \
	(attr)->flags, (flags_to_set), #flags_to_set); \
	} while (0)

	#define attrd_clear_attr_flags(attr, flags_to_clear) do { \
	(attr)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
	LOG_TRACE, "Value for attribute", (attr)->id, \
	(attr)->flags, (flags_to_clear), #flags_to_clear); \
	} while (0)

	enum attrd_value_flags {
	attrd_value_none = 0U,
	attrd_value_remote = (1U << 0), // Value is for Pacemaker Remote node
	attrd_value_from_peer = (1U << 1), // Value is from peer sync response
	};

	typedef struct attribute_value_s {
	char *nodename; // Node that this value is for
	char *current; // Attribute value
	char *requested; // Value specified in pending CIB write, if any
	- uint32_t nodeid; // Cluster node ID of node that this value is for
	uint32_t flags; // Group of attrd_value_flags
	} attribute_value_t;

	#define attrd_set_value_flags(attr_value, flags_to_set) do { \
	(attr_value)->flags = pcmk__set_flags_as(__func__, __LINE__, \
	LOG_TRACE, "Value for node", (attr_value)->nodename, \
	(attr_value)->flags, (flags_to_set), #flags_to_set); \
	} while (0)

	#define attrd_clear_value_flags(attr_value, flags_to_clear) do { \
	(attr_value)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
	LOG_TRACE, "Value for node", (attr_value)->nodename, \
	(attr_value)->flags, (flags_to_clear), #flags_to_clear); \
	} while (0)

	extern pcmk_cluster_t *attrd_cluster;
	extern GHashTable *attributes;
	extern GHashTable *peer_protocol_vers;

	#define CIB_OP_TIMEOUT_S 120

	int attrd_cluster_connect(void);
	void attrd_broadcast_value(const attribute_t a, const attribute_value_t v);
	void attrd_peer_update(const crm_node_t peer, xmlNode xml, const char *host,
	bool filter);
	void attrd_peer_sync(crm_node_t *peer);
	void attrd_peer_remove(const char host, bool uncache, const char source);
	void attrd_peer_clear_failure(pcmk__request_t *request);
	void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won,
	xmlNode *xml);

	void attrd_broadcast_protocol(void);
	xmlNode attrd_client_peer_remove(pcmk__request_t request);
	xmlNode attrd_client_clear_failure(pcmk__request_t request);
	xmlNode attrd_client_update(pcmk__request_t request);
	xmlNode attrd_client_refresh(pcmk__request_t request);
	xmlNode attrd_client_query(pcmk__request_t request);
	gboolean attrd_send_message(crm_node_t node, xmlNode data, bool confirm);

	xmlNode attrd_add_value_xml(xmlNode parent, const attribute_t *a,
	const attribute_value_t *v, bool force_write);
	void attrd_clear_value_seen(void);
	void attrd_free_attribute(gpointer data);
	void attrd_free_attribute_value(gpointer data);
	attribute_t attrd_populate_attribute(xmlNode xml, const char *attr);
	char attrd_set_id(const attribute_t attr, const char *node_state_id);
	char attrd_nvpair_id(const attribute_t attr, const char *node_state_id);

	enum attrd_write_options {
	attrd_write_changed = 0,
	attrd_write_all = (1 << 0),
	attrd_write_no_delay = (1 << 1),
	};

	void attrd_write_attributes(uint32_t options);
	void attrd_write_or_elect_attribute(attribute_t *a);

	extern int minimum_protocol_version;
	void attrd_remove_peer_protocol_ver(const char *host);
	void attrd_update_minimum_protocol_ver(const char host, const char value);

	mainloop_timer_t attrd_add_timer(const char id, int timeout_ms, attribute_t *attr);

	void attrd_unregister_handlers(void);
	void attrd_handle_request(pcmk__request_t *request);

	enum attrd_sync_point {
	attrd_sync_point_local,
	attrd_sync_point_cluster,
	};

	typedef int (attrd_confirmation_action_fn)(xmlNode );

	void attrd_add_client_to_waitlist(pcmk__request_t *request);
	void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml);
	int attrd_cluster_sync_point_update(xmlNode *xml);
	void attrd_do_not_expect_from_peer(const char *host);
	void attrd_do_not_wait_for_client(pcmk__client_t *client);
	void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn);
	void attrd_free_confirmations(void);
	void attrd_handle_confirmation(int callid, const char *host);
	void attrd_remove_client_from_waitlist(pcmk__client_t *client);
	const char attrd_request_sync_point(xmlNode xml);
	bool attrd_request_has_sync_point(xmlNode *xml);

	extern gboolean stand_alone;

	+// Node utilities (from attrd_nodes.c)
	+const char attrd_get_node_xml_id(const char node_name);
	+void attrd_set_node_xml_id(const char node_name, const char node_xml_id);
	+void attrd_forget_node_xml_id(const char *node_name);
	+void attrd_cleanup_xml_ids(void);
	+
	#endif /* PACEMAKER_ATTRD__H */
	diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c
	index 87b7eb162b..ed944672bc 100644
	--- a/daemons/based/based_messages.c
	+++ b/daemons/based/based_messages.c
	@@ -1,538 +1,538 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <stdio.h>
	#include <unistd.h>
	#include <stdlib.h>
	#include <errno.h>
	#include <fcntl.h>
	#include <time.h>

	#include <sys/param.h>
	#include <sys/types.h>

	#include <glib.h>
	#include <libxml/tree.h>

	#include <crm/crm.h>
	#include <crm/cib/internal.h>

	#include <crm/common/xml.h>
	#include <crm/common/ipc_internal.h>
	#include <crm/common/xml_internal.h>
	#include <crm/cluster/internal.h>

	#include <pacemaker-based.h>

	/* Maximum number of diffs to ignore while waiting for a resync */
	#define MAX_DIFF_RETRY 5

	bool based_is_primary = false;

	xmlNode *the_cib = NULL;

	int
	cib_process_shutdown_req(const char op, int options, const char section, xmlNode * req,
	xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
	xmlNode ** answer)
	{
	const char *host = crm_element_value(req, PCMK__XA_SRC);

	*answer = NULL;

	if (crm_element_value(req, PCMK__XA_CIB_ISREPLYTO) == NULL) {
	crm_info("Peer %s is requesting to shut down", host);
	return pcmk_ok;
	}

	if (cib_shutdown_flag == FALSE) {
	crm_err("Peer %s mistakenly thinks we wanted to shut down", host);
	return -EINVAL;
	}

	crm_info("Peer %s has acknowledged our shutdown request", host);
	terminate_cib(__func__, 0);
	return pcmk_ok;
	}

	// @COMPAT: Remove when PCMK__CIB_REQUEST_NOOP is removed
	int
	cib_process_noop(const char op, int options, const char section, xmlNode *req,
	xmlNode input, xmlNode existing_cib, xmlNode **result_cib,
	xmlNode **answer)
	{
	crm_trace("Processing \"%s\" event", op);
	*answer = NULL;
	return pcmk_ok;
	}

	int
	cib_process_readwrite(const char op, int options, const char section, xmlNode * req,
	xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
	xmlNode ** answer)
	{
	int result = pcmk_ok;

	crm_trace("Processing \"%s\" event", op);

	if (pcmk__str_eq(op, PCMK__CIB_REQUEST_IS_PRIMARY, pcmk__str_none)) {
	if (based_is_primary) {
	result = pcmk_ok;
	} else {
	result = -EPERM;
	}
	return result;
	}

	if (pcmk__str_eq(op, PCMK__CIB_REQUEST_PRIMARY, pcmk__str_none)) {
	if (!based_is_primary) {
	crm_info("We are now in R/W mode");
	based_is_primary = true;
	} else {
	crm_debug("We are still in R/W mode");
	}

	} else if (based_is_primary) {
	crm_info("We are now in R/O mode");
	based_is_primary = false;
	}

	return result;
	}

	/* Set to 1 when a sync is requested, incremented when a diff is ignored,
	* reset to 0 when a sync is received
	*/
	static int sync_in_progress = 0;

	void
	send_sync_request(const char *host)
	{
	xmlNode *sync_me = pcmk__xe_create(NULL, "sync-me");
	crm_node_t *peer = NULL;

	crm_info("Requesting re-sync from %s", (host? host : "all peers"));
	sync_in_progress = 1;

	crm_xml_add(sync_me, PCMK__XA_T, PCMK__VALUE_CIB);
	crm_xml_add(sync_me, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_SYNC_TO_ONE);
	crm_xml_add(sync_me, PCMK__XA_CIB_DELEGATED_FROM,
	stand_alone? "localhost" : crm_cluster->uname);

	if (host != NULL) {
	peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster_member);
	}
	pcmk__cluster_send_message(peer, crm_msg_cib, sync_me);
	free_xml(sync_me);
	}

	int
	cib_process_ping(const char op, int options, const char section, xmlNode * req, xmlNode * input,
	xmlNode * existing_cib, xmlNode result_cib, xmlNode answer)
	{
	const char *host = crm_element_value(req, PCMK__XA_SRC);
	const char *seq = crm_element_value(req, PCMK__XA_CIB_PING_ID);
	char *digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET);

	xmlNode *wrapper = NULL;

	crm_trace("Processing \"%s\" event %s from %s", op, seq, host);
	*answer = pcmk__xe_create(NULL, PCMK__XE_PING_RESPONSE);

	crm_xml_add(*answer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
	crm_xml_add(*answer, PCMK__XA_DIGEST, digest);
	crm_xml_add(*answer, PCMK__XA_CIB_PING_ID, seq);

	wrapper = pcmk__xe_create(*answer, PCMK__XE_CIB_CALLDATA);

	if (the_cib != NULL) {
	pcmk__if_tracing(
	{
	/* Append additional detail so the receiver can log the
	* differences
	*/
	pcmk__xml_copy(wrapper, the_cib);
	},
	{
	// Always include at least the version details
	const char name = (const char ) the_cib->name;
	xmlNode *shallow = pcmk__xe_create(wrapper, name);

	pcmk__xe_copy_attrs(shallow, the_cib, pcmk__xaf_none);
	}
	);
	}

	crm_info("Reporting our current digest to %s: %s for %s.%s.%s",
	host, digest,
	crm_element_value(existing_cib, PCMK_XA_ADMIN_EPOCH),
	crm_element_value(existing_cib, PCMK_XA_EPOCH),
	crm_element_value(existing_cib, PCMK_XA_NUM_UPDATES));

	free(digest);

	return pcmk_ok;
	}

	int
	cib_process_sync(const char op, int options, const char section, xmlNode * req, xmlNode * input,
	xmlNode * existing_cib, xmlNode result_cib, xmlNode answer)
	{
	return sync_our_cib(req, TRUE);
	}

	int
	cib_process_upgrade_server(const char op, int options, const char section, xmlNode * req, xmlNode * input,
	xmlNode * existing_cib, xmlNode result_cib, xmlNode answer)
	{
	int rc = pcmk_ok;

	*answer = NULL;

	if (crm_element_value(req, PCMK__XA_CIB_SCHEMA_MAX) != NULL) {
	/* The originator of an upgrade request sends it to the DC, without
	* PCMK__XA_CIB_SCHEMA_MAX. If an upgrade is needed, the DC
	* re-broadcasts the request with PCMK__XA_CIB_SCHEMA_MAX, and each node
	* performs the upgrade (and notifies its local clients) here.
	*/
	return cib_process_upgrade(
	op, options, section, req, input, existing_cib, result_cib, answer);

	} else {
	xmlNode *scratch = pcmk__xml_copy(NULL, existing_cib);
	const char *host = crm_element_value(req, PCMK__XA_SRC);
	const char *original_schema = NULL;
	const char *new_schema = NULL;
	const char *client_id = crm_element_value(req, PCMK__XA_CIB_CLIENTID);
	const char *call_opts = crm_element_value(req, PCMK__XA_CIB_CALLOPT);
	const char *call_id = crm_element_value(req, PCMK__XA_CIB_CALLID);

	crm_trace("Processing \"%s\" event", op);
	original_schema = crm_element_value(existing_cib,
	PCMK_XA_VALIDATE_WITH);
	rc = pcmk__update_schema(&scratch, NULL, true, true);
	rc = pcmk_rc2legacy(rc);
	new_schema = crm_element_value(scratch, PCMK_XA_VALIDATE_WITH);

	if (pcmk__cmp_schemas_by_name(new_schema, original_schema) > 0) {
	xmlNode *up = pcmk__xe_create(NULL, __func__);

	rc = pcmk_ok;
	crm_notice("Upgrade request from %s verified", host);

	crm_xml_add(up, PCMK__XA_T, PCMK__VALUE_CIB);
	crm_xml_add(up, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_UPGRADE);
	crm_xml_add(up, PCMK__XA_CIB_SCHEMA_MAX, new_schema);
	crm_xml_add(up, PCMK__XA_CIB_DELEGATED_FROM, host);
	crm_xml_add(up, PCMK__XA_CIB_CLIENTID, client_id);
	crm_xml_add(up, PCMK__XA_CIB_CALLOPT, call_opts);
	crm_xml_add(up, PCMK__XA_CIB_CALLID, call_id);

	if (cib_legacy_mode() && based_is_primary) {
	rc = cib_process_upgrade(
	op, options, section, up, input, existing_cib, result_cib, answer);

	} else {
	pcmk__cluster_send_message(NULL, crm_msg_cib, up);
	}

	free_xml(up);

	} else if(rc == pcmk_ok) {
	rc = -pcmk_err_schema_unchanged;
	}

	if (rc != pcmk_ok) {
	// Notify originating peer so it can notify its local clients
	crm_node_t *origin = NULL;

	- origin = pcmk__search_node_caches(0, host,
	+ origin = pcmk__search_node_caches(0, host, NULL,
	pcmk__node_search_cluster_member);

	crm_info("Rejecting upgrade request from %s: %s "
	CRM_XS " rc=%d peer=%s", host, pcmk_strerror(rc), rc,
	(origin? origin->uname : "lost"));

	if (origin) {
	xmlNode *up = pcmk__xe_create(NULL, __func__);

	crm_xml_add(up, PCMK__XA_T, PCMK__VALUE_CIB);
	crm_xml_add(up, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_UPGRADE);
	crm_xml_add(up, PCMK__XA_CIB_DELEGATED_FROM, host);
	crm_xml_add(up, PCMK__XA_CIB_ISREPLYTO, host);
	crm_xml_add(up, PCMK__XA_CIB_CLIENTID, client_id);
	crm_xml_add(up, PCMK__XA_CIB_CALLOPT, call_opts);
	crm_xml_add(up, PCMK__XA_CIB_CALLID, call_id);
	crm_xml_add_int(up, PCMK__XA_CIB_UPGRADE_RC, rc);
	if (!pcmk__cluster_send_message(origin, crm_msg_cib, up)) {
	crm_warn("Could not send CIB upgrade result to %s", host);
	}
	free_xml(up);
	}
	}
	free_xml(scratch);
	}
	return rc;
	}

	int
	cib_process_sync_one(const char op, int options, const char section, xmlNode * req,
	xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
	xmlNode ** answer)
	{
	return sync_our_cib(req, FALSE);
	}

	int
	cib_server_process_diff(const char op, int options, const char section, xmlNode * req,
	xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
	xmlNode ** answer)
	{
	int rc = pcmk_ok;

	if (sync_in_progress > MAX_DIFF_RETRY) {
	/* Don't ignore diffs forever; the last request may have been lost.
	* If the diff fails, we'll ask for another full resync.
	*/
	sync_in_progress = 0;
	}

	// The primary instance should never ignore a diff
	if (sync_in_progress && !based_is_primary) {
	int diff_add_updates = 0;
	int diff_add_epoch = 0;
	int diff_add_admin_epoch = 0;

	int diff_del_updates = 0;
	int diff_del_epoch = 0;
	int diff_del_admin_epoch = 0;

	cib_diff_version_details(input,
	&diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
	&diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);

	sync_in_progress++;
	crm_notice("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)",
	diff_del_admin_epoch, diff_del_epoch, diff_del_updates,
	diff_add_admin_epoch, diff_add_epoch, diff_add_updates);
	return -pcmk_err_diff_resync;
	}

	rc = cib_process_diff(op, options, section, req, input, existing_cib, result_cib, answer);
	crm_trace("result: %s (%d), %s", pcmk_strerror(rc), rc,
	(based_is_primary? "primary": "secondary"));

	if ((rc == -pcmk_err_diff_resync) && !based_is_primary) {
	free_xml(*result_cib);
	*result_cib = NULL;
	send_sync_request(NULL);

	} else if (rc == -pcmk_err_diff_resync) {
	rc = -pcmk_err_diff_failed;
	if (options & cib_force_diff) {
	crm_warn("Not requesting full refresh in R/W mode");
	}

	} else if ((rc != pcmk_ok) && !based_is_primary && cib_legacy_mode()) {
	crm_warn("Requesting full CIB refresh because update failed: %s"
	CRM_XS " rc=%d", pcmk_strerror(rc), rc);

	pcmk__log_xml_patchset(LOG_INFO, input);
	free_xml(*result_cib);
	*result_cib = NULL;
	send_sync_request(NULL);
	}

	return rc;
	}

	int
	cib_process_replace_svr(const char op, int options, const char section, xmlNode * req,
	xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
	xmlNode ** answer)
	{
	int rc =
	cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer);

	if ((rc == pcmk_ok) && pcmk__xe_is(input, PCMK_XE_CIB)) {
	sync_in_progress = 0;
	}
	return rc;
	}

	// @COMPAT: Remove when PCMK__CIB_REQUEST_ABS_DELETE is removed
	int
	cib_process_delete_absolute(const char op, int options, const char section, xmlNode * req,
	xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
	xmlNode ** answer)
	{
	return -EINVAL;
	}

	static xmlNode *
	cib_msg_copy(xmlNode *msg)
	{
	static const char *field_list[] = {
	PCMK__XA_T,
	PCMK__XA_CIB_CLIENTID,
	PCMK__XA_CIB_CALLOPT,
	PCMK__XA_CIB_CALLID,
	PCMK__XA_CIB_OP,
	PCMK__XA_CIB_ISREPLYTO,
	PCMK__XA_CIB_SECTION,
	PCMK__XA_CIB_HOST,
	PCMK__XA_CIB_RC,
	PCMK__XA_CIB_DELEGATED_FROM,
	PCMK__XA_CIB_OBJECT,
	PCMK__XA_CIB_OBJECT_TYPE,
	PCMK__XA_CIB_UPDATE,
	PCMK__XA_CIB_CLIENTNAME,
	PCMK__XA_CIB_USER,
	PCMK__XA_CIB_NOTIFY_TYPE,
	PCMK__XA_CIB_NOTIFY_ACTIVATE,
	};

	xmlNode *copy = pcmk__xe_create(NULL, PCMK__XE_COPY);

	for (int lpc = 0; lpc < PCMK__NELEM(field_list); lpc++) {
	const char *field = field_list[lpc];
	const char *value = crm_element_value(msg, field);

	if (value != NULL) {
	crm_xml_add(copy, field, value);
	}
	}

	return copy;
	}

	int
	sync_our_cib(xmlNode * request, gboolean all)
	{
	int result = pcmk_ok;
	char *digest = NULL;
	const char *host = crm_element_value(request, PCMK__XA_SRC);
	const char *op = crm_element_value(request, PCMK__XA_CIB_OP);
	crm_node_t *peer = NULL;
	xmlNode *replace_request = NULL;
	xmlNode *wrapper = NULL;

	CRM_CHECK(the_cib != NULL, return -EINVAL);
	CRM_CHECK(all \|\| (host != NULL), return -EINVAL);

	crm_debug("Syncing CIB to %s", all ? "all peers" : host);

	replace_request = cib_msg_copy(request);

	if (host != NULL) {
	crm_xml_add(replace_request, PCMK__XA_CIB_ISREPLYTO, host);
	}
	if (all) {
	pcmk__xe_remove_attr(replace_request, PCMK__XA_CIB_HOST);
	}

	crm_xml_add(replace_request, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_REPLACE);

	// @TODO Keep for tracing, or drop?
	crm_xml_add(replace_request, PCMK__XA_ORIGINAL_CIB_OP, op);

	pcmk__xe_set_bool_attr(replace_request, PCMK__XA_CIB_UPDATE, true);

	crm_xml_add(replace_request, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
	digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET);
	crm_xml_add(replace_request, PCMK__XA_DIGEST, digest);

	wrapper = pcmk__xe_create(replace_request, PCMK__XE_CIB_CALLDATA);
	pcmk__xml_copy(wrapper, the_cib);

	if (!all) {
	peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster_member);
	}
	if (!pcmk__cluster_send_message(peer, crm_msg_cib, replace_request)) {
	result = -ENOTCONN;
	}
	free_xml(replace_request);
	free(digest);
	return result;
	}

	int
	cib_process_commit_transaction(const char op, int options, const char section,
	xmlNode req, xmlNode input,
	xmlNode existing_cib, xmlNode *result_cib,
	xmlNode **answer)
	{
	/* On success, our caller will activate *result_cib locally, trigger a
	* replace notification if appropriate, and sync *result_cib to all nodes.
	* On failure, our caller will free *result_cib.
	*/
	int rc = pcmk_rc_ok;
	const char *client_id = crm_element_value(req, PCMK__XA_CIB_CLIENTID);
	const char *origin = crm_element_value(req, PCMK__XA_SRC);
	pcmk__client_t *client = pcmk__find_client_by_id(client_id);

	rc = based_commit_transaction(input, client, origin, result_cib);

	if (rc != pcmk_rc_ok) {
	char *source = based_transaction_source_str(client, origin);

	crm_err("Could not commit transaction for %s: %s",
	source, pcmk_rc_str(rc));
	free(source);
	}
	return pcmk_rc2legacy(rc);
	}

	int
	cib_process_schemas(const char op, int options, const char section, xmlNode *req,
	xmlNode input, xmlNode existing_cib, xmlNode **result_cib,
	xmlNode **answer)
	{
	xmlNode *wrapper = NULL;
	xmlNode *data = NULL;

	const char *after_ver = NULL;
	GList *schemas = NULL;
	GList *already_included = NULL;

	*answer = pcmk__xe_create(NULL, PCMK__XA_SCHEMAS);

	wrapper = pcmk__xe_first_child(req, PCMK__XE_CIB_CALLDATA, NULL, NULL);
	data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
	if (data == NULL) {
	crm_warn("No data specified in request");
	return -EPROTO;
	}

	after_ver = crm_element_value(data, PCMK_XA_VERSION);
	if (after_ver == NULL) {
	crm_warn("No version specified in request");
	return -EPROTO;
	}

	/* The client requested all schemas after the latest one we know about, which
	* means the client is fully up-to-date. Return a properly formatted reply
	* with no schemas.
	*/
	if (pcmk__str_eq(after_ver, pcmk__highest_schema_name(), pcmk__str_none)) {
	return pcmk_ok;
	}

	schemas = pcmk__schema_files_later_than(after_ver);

	for (GList *iter = schemas; iter != NULL; iter = iter->next) {
	pcmk__build_schema_xml_node(*answer, iter->data, &already_included);
	}

	g_list_free_full(schemas, free);
	g_list_free_full(already_included, free);
	return pcmk_ok;
	}
	diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c
	index d0652e4e5d..f671e3b756 100644
	--- a/daemons/controld/controld_corosync.c
	+++ b/daemons/controld/controld_corosync.c
	@@ -1,162 +1,162 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <sys/types.h>
	#include <sys/stat.h>

	#include <crm/crm.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/xml.h>

	#include <pacemaker-controld.h>

	#if SUPPORT_COROSYNC

	extern void post_cache_update(int seq);

	/* A_HA_CONNECT */

	static void
	crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName,
	uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
	{
	uint32_t kind = 0;
	const char *from = NULL;
	char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &kind, &from);

	if(data == NULL) {
	return;
	}
	if (kind == crm_class_cluster) {
	crm_node_t *peer = NULL;
	xmlNode *xml = pcmk__xml_parse(data);

	if (xml == NULL) {
	crm_err("Could not parse message content (%d): %.100s", kind, data);
	free(data);
	return;
	}

	crm_xml_add(xml, PCMK__XA_SRC, from);

	peer = pcmk__get_node(0, from, NULL, pcmk__node_search_cluster_member);
	if (!pcmk_is_set(peer->processes, crm_proc_cpg)) {
	/* If we can still talk to our peer process on that node,
	* then it must be part of the corosync membership
	*/
	crm_warn("Receiving messages from a node we think is dead: %s[%d]",
	peer->uname, peer->id);
	crm_update_peer_proc(__func__, peer, crm_proc_cpg,
	PCMK_VALUE_ONLINE);
	}
	crmd_ha_msg_filter(xml);
	free_xml(xml);
	} else {
	crm_err("Invalid message class (%d): %.100s", kind, data);
	}
	free(data);
	}

	static gboolean
	crmd_quorum_callback(unsigned long long seq, gboolean quorate)
	{
	crm_update_quorum(quorate, FALSE);
	post_cache_update(seq);
	return TRUE;
	}

	static void
	crmd_cs_destroy(gpointer user_data)
	{
	if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) {
	crm_crit("Lost connection to cluster layer, shutting down");
	crmd_exit(CRM_EX_DISCONNECT);
	}
	}

	/*!
	* \brief Handle a Corosync notification of a CPG configuration change
	*
	* \param[in] handle CPG connection
	* \param[in] cpg_name CPG group name
	* \param[in] member_list List of current CPG members
	* \param[in] member_list_entries Number of entries in \p member_list
	* \param[in] left_list List of CPG members that left
	* \param[in] left_list_entries Number of entries in \p left_list
	* \param[in] joined_list List of CPG members that joined
	* \param[in] joined_list_entries Number of entries in \p joined_list
	*/
	static void
	cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name,
	const struct cpg_address *member_list,
	size_t member_list_entries,
	const struct cpg_address *left_list,
	size_t left_list_entries,
	const struct cpg_address *joined_list,
	size_t joined_list_entries)
	{
	/* When nodes leave CPG, the DC clears their transient node attributes.
	*
	* However if there is no DC, or the DC is among the nodes that left, each
	* remaining node needs to do the clearing, to ensure it gets done.
	* Otherwise, the attributes would persist when the nodes rejoin, which
	* could have serious consequences for unfencing, agents that use attributes
	* for internal logic, etc.
	*
	* Here, we set a global boolean if the DC is among the nodes that left, for
	* use by the peer callback.
	*/
	if (controld_globals.dc_name != NULL) {
	crm_node_t *peer = NULL;

	- peer = pcmk__search_node_caches(0, controld_globals.dc_name,
	+ peer = pcmk__search_node_caches(0, controld_globals.dc_name, NULL,
	pcmk__node_search_cluster_member);
	if (peer != NULL) {
	for (int i = 0; i < left_list_entries; ++i) {
	if (left_list[i].nodeid == peer->id) {
	controld_set_global_flags(controld_dc_left);
	break;
	}
	}
	}
	}

	// Process the change normally, which will call the peer callback as needed
	pcmk__cpg_confchg_cb(handle, cpg_name, member_list, member_list_entries,
	left_list, left_list_entries,
	joined_list, joined_list_entries);

	controld_clear_global_flags(controld_dc_left);
	}

	extern gboolean crm_connect_corosync(pcmk_cluster_t *cluster);

	gboolean
	crm_connect_corosync(pcmk_cluster_t *cluster)
	{
	if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
	pcmk__cluster_set_status_callback(&peer_update_callback);

	pcmk_cluster_set_destroy_fn(cluster, crmd_cs_destroy);
	pcmk_cpg_set_deliver_fn(cluster, crmd_cs_dispatch);
	pcmk_cpg_set_confchg_fn(cluster, cpg_membership_callback);

	if (pcmk_cluster_connect(cluster) == pcmk_rc_ok) {
	pcmk__corosync_quorum_connect(crmd_quorum_callback,
	crmd_cs_destroy);
	return TRUE;
	}
	}
	return FALSE;
	}

	#endif
	diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
	index 4875b1aa8c..098a46c553 100644
	--- a/daemons/controld/controld_fencing.c
	+++ b/daemons/controld/controld_fencing.c
	@@ -1,1129 +1,1118 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>
	#include <crm/crm.h>
	#include <crm/common/xml.h>
	#include <crm/stonith-ng.h>
	#include <crm/fencing/internal.h>

	#include <pacemaker-controld.h>

	static void
	tengine_stonith_history_synced(stonith_t st, stonith_event_t st_event);

	/*
	* stonith failure counting
	*
	* We don't want to get stuck in a permanent fencing loop. Keep track of the
	* number of fencing failures for each target node, and the most we'll restart a
	* transition for.
	*/

	struct st_fail_rec {
	int count;
	};

	#define DEFAULT_STONITH_MAX_ATTEMPTS 10

	static bool fence_reaction_panic = false;
	static unsigned long int stonith_max_attempts = DEFAULT_STONITH_MAX_ATTEMPTS;
	static GHashTable *stonith_failures = NULL;

	/*!
	* \internal
	* \brief Update max fencing attempts before giving up
	*
	* \param[in] value New max fencing attempts
	*/
	static void
	update_stonith_max_attempts(const char *value)
	{
	int score = 0;
	int rc = pcmk_parse_score(value, &score, DEFAULT_STONITH_MAX_ATTEMPTS);

	// The option validator ensures invalid values shouldn't be possible
	CRM_CHECK((rc == pcmk_rc_ok) && (score > 0), return);

	if (stonith_max_attempts != score) {
	crm_debug("Maximum fencing attempts per transition is now %d (was %lu)",
	score, stonith_max_attempts);
	}
	stonith_max_attempts = score;
	}

	/*!
	* \internal
	* \brief Configure reaction to notification of local node being fenced
	*
	* \param[in] reaction_s Reaction type
	*/
	static void
	set_fence_reaction(const char *reaction_s)
	{
	if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
	fence_reaction_panic = true;

	} else {
	if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) {
	crm_warn("Invalid value '%s' for %s, using 'stop'",
	reaction_s, PCMK_OPT_FENCE_REACTION);
	}
	fence_reaction_panic = false;
	}
	}

	/*!
	* \internal
	* \brief Configure fencing options based on the CIB
	*
	* \param[in,out] options Name/value pairs for configured options
	*/
	void
	controld_configure_fencing(GHashTable *options)
	{
	const char *value = NULL;

	value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION);
	set_fence_reaction(value);

	value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS);
	update_stonith_max_attempts(value);
	}

	static gboolean
	too_many_st_failures(const char *target)
	{
	GHashTableIter iter;
	const char *key = NULL;
	struct st_fail_rec *value = NULL;

	if (stonith_failures == NULL) {
	return FALSE;
	}

	if (target == NULL) {
	g_hash_table_iter_init(&iter, stonith_failures);
	while (g_hash_table_iter_next(&iter, (gpointer *) &key,
	(gpointer *) &value)) {

	if (value->count >= stonith_max_attempts) {
	target = (const char*)key;
	goto too_many;
	}
	}
	} else {
	value = g_hash_table_lookup(stonith_failures, target);
	if ((value != NULL) && (value->count >= stonith_max_attempts)) {
	goto too_many;
	}
	}
	return FALSE;

	too_many:
	crm_warn("Too many failures (%d) to fence %s, giving up",
	value->count, target);
	return TRUE;
	}

	/*!
	* \internal
	* \brief Reset a stonith fail count
	*
	* \param[in] target Name of node to reset, or NULL for all
	*/
	void
	st_fail_count_reset(const char *target)
	{
	if (stonith_failures == NULL) {
	return;
	}

	if (target) {
	struct st_fail_rec *rec = NULL;

	rec = g_hash_table_lookup(stonith_failures, target);
	if (rec) {
	rec->count = 0;
	}
	} else {
	GHashTableIter iter;
	const char *key = NULL;
	struct st_fail_rec *rec = NULL;

	g_hash_table_iter_init(&iter, stonith_failures);
	while (g_hash_table_iter_next(&iter, (gpointer *) &key,
	(gpointer *) &rec)) {
	rec->count = 0;
	}
	}
	}

	static void
	st_fail_count_increment(const char *target)
	{
	struct st_fail_rec *rec = NULL;

	if (stonith_failures == NULL) {
	stonith_failures = pcmk__strkey_table(free, free);
	}

	rec = g_hash_table_lookup(stonith_failures, target);
	if (rec) {
	rec->count++;
	} else {
	rec = malloc(sizeof(struct st_fail_rec));
	if(rec == NULL) {
	return;
	}

	rec->count = 1;
	g_hash_table_insert(stonith_failures, pcmk__str_copy(target), rec);
	}
	}

	/* end stonith fail count functions */


	static void
	cib_fencing_updated(xmlNode msg, int call_id, int rc, xmlNode output,
	void *user_data)
	{
	if (rc < pcmk_ok) {
	crm_err("Fencing update %d for %s: failed - %s (%d)",
	call_id, (char *)user_data, pcmk_strerror(rc), rc);
	crm_log_xml_warn(msg, "Failed update");
	abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown,
	"CIB update failed", NULL);

	} else {
	crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
	}
	}

	+/*!
	+ * \internal
	+ * \brief Update a fencing target's node state
	+ *
	+ * \param[in] target Node that was successfully fenced
	+ * \param[in] target_xml_id CIB XML ID of target
	+ */
	static void
	-send_stonith_update(pcmk__graph_action_t action, const char target,
	- const char *uuid)
	+update_node_state_after_fencing(const char target, const char target_xml_id)
	{
	int rc = pcmk_ok;
	crm_node_t *peer = NULL;
	+ xmlNode *node_state = NULL;

	/* We (usually) rely on the membership layer to do node_update_cluster,
	* and the peer status callback to do node_update_peer, because the node
	* might have already rejoined before we get the stonith result here.
	*/
	int flags = node_update_join \| node_update_expected;

	- /* zero out the node-status & remove all LRM status info */
	- xmlNode *node_state = NULL;
	-
	- CRM_CHECK(target != NULL, return);
	- CRM_CHECK(uuid != NULL, return);
	-
	- /* Make sure the membership and join caches are accurate.
	- * Try getting any existing node cache entry also by node uuid in case it
	- * doesn't have an uname yet.
	- */
	- peer = pcmk__get_node(0, target, uuid, pcmk__node_search_any);
	+ CRM_CHECK((target != NULL) && (target_xml_id != NULL), return);

	+ // Ensure target is cached
	+ peer = pcmk__get_node(0, target, target_xml_id, pcmk__node_search_any);
	CRM_CHECK(peer != NULL, return);

	if (peer->state == NULL) {
	/* Usually, we rely on the membership layer to update the cluster state
	* in the CIB. However, if the node has never been seen, do it here, so
	* the node is not considered unclean.
	*/
	flags \|= node_update_cluster;
	}

	if (peer->uuid == NULL) {
	- crm_info("Recording uuid '%s' for node '%s'", uuid, target);
	- peer->uuid = pcmk__str_copy(uuid);
	+ crm_info("Recording XML ID '%s' for node '%s'", target_xml_id, target);
	+ peer->uuid = pcmk__str_copy(target_xml_id);
	}

	crmd_peer_down(peer, TRUE);

	- /* Generate a node state update for the CIB */
	node_state = create_node_state_update(peer, flags, NULL, __func__);
	+ crm_xml_add(node_state, PCMK_XA_ID, target_xml_id);

	- /* we have to mark whether or not remote nodes have already been fenced */
	if (peer->flags & crm_remote_node) {
	char *now_s = pcmk__ttoa(time(NULL));

	crm_xml_add(node_state, PCMK__XA_NODE_FENCED, now_s);
	free(now_s);
	}

	- /* Force our known ID */
	- crm_xml_add(node_state, PCMK_XA_ID, uuid);
	-
	rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
	PCMK_XE_STATUS, node_state,
	cib_scope_local
	\|cib_can_create);
	+ free_xml(node_state);

	- /* Delay processing the trigger until the update completes */
	- crm_debug("Sending fencing update %d for %s", rc, target);
	+ crm_debug("Updating node state for %s after fencing (call %d)", target, rc);
	fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);

	- // Make sure it sticks
	- /* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn,
	- * cib_scope_local);
	- */
	-
	controld_delete_node_state(peer->uname, controld_section_all,
	cib_scope_local);
	- free_xml(node_state);
	- return;
	}

	/*!
	* \internal
	* \brief Abort transition due to stonith failure
	*
	* \param[in] abort_action Whether to restart or stop transition
	* \param[in] target Don't restart if this (NULL for any) has too many failures
	* \param[in] reason Log this stonith action XML as abort reason (or NULL)
	*/
	static void
	abort_for_stonith_failure(enum pcmk__graph_next abort_action,
	const char target, const xmlNode reason)
	{
	/* If stonith repeatedly fails, we eventually give up on starting a new
	* transition for that reason.
	*/
	if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
	abort_action = pcmk__graph_wait;
	}
	abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed",
	reason);
	}


	/*
	* stonith cleanup list
	*
	* If the DC is shot, proper notifications might not go out.
	* The stonith cleanup list allows the cluster to (re-)send
	* notifications once a new DC is elected.
	*/

	static GList *stonith_cleanup_list = NULL;

	/*!
	* \internal
	* \brief Add a node to the stonith cleanup list
	*
	* \param[in] target Name of node to add
	*/
	void
	add_stonith_cleanup(const char *target) {
	stonith_cleanup_list = g_list_append(stonith_cleanup_list,
	pcmk__str_copy(target));
	}

	/*!
	* \internal
	* \brief Remove a node from the stonith cleanup list
	*
	* \param[in] Name of node to remove
	*/
	void
	remove_stonith_cleanup(const char *target)
	{
	GList *iter = stonith_cleanup_list;

	while (iter != NULL) {
	GList *tmp = iter;
	char *iter_name = tmp->data;

	iter = iter->next;
	if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
	crm_trace("Removing %s from the cleanup list", iter_name);
	stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
	free(iter_name);
	}
	}
	}

	/*!
	* \internal
	* \brief Purge all entries from the stonith cleanup list
	*/
	void
	purge_stonith_cleanup(void)
	{
	if (stonith_cleanup_list) {
	GList *iter = NULL;

	for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
	char *target = iter->data;

	crm_info("Purging %s from stonith cleanup list", target);
	free(target);
	}
	g_list_free(stonith_cleanup_list);
	stonith_cleanup_list = NULL;
	}
	}

	/*!
	* \internal
	* \brief Send stonith updates for all entries in cleanup list, then purge it
	*/
	void
	execute_stonith_cleanup(void)
	{
	GList *iter;

	for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
	char *target = iter->data;
	crm_node_t *target_node =
	pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member);
	- const char *uuid = pcmk__cluster_node_uuid(target_node);
	+ const char *uuid = pcmk__cluster_get_xml_id(target_node);

	crm_notice("Marking %s, target of a previous stonith action, as clean", target);
	- send_stonith_update(NULL, target, uuid);
	+ update_node_state_after_fencing(target, uuid);
	free(target);
	}
	g_list_free(stonith_cleanup_list);
	stonith_cleanup_list = NULL;
	}

	/* end stonith cleanup list functions */


	/* stonith API client
	*
	* Functions that need to interact directly with the fencer via its API
	*/

	static stonith_t *stonith_api = NULL;
	static mainloop_timer_t *controld_fencer_connect_timer = NULL;
	static char *te_client_id = NULL;

	static gboolean
	fail_incompletable_stonith(pcmk__graph_t *graph)
	{
	GList *lpc = NULL;
	const char *task = NULL;
	xmlNode *last_action = NULL;

	if (graph == NULL) {
	return FALSE;
	}

	for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
	GList *lpc2 = NULL;
	pcmk__graph_synapse_t synapse = (pcmk__graph_synapse_t ) lpc->data;

	if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
	continue;
	}

	for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
	pcmk__graph_action_t action = (pcmk__graph_action_t ) lpc2->data;

	if ((action->type != pcmk__cluster_graph_action)
	\|\| pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
	continue;
	}

	task = crm_element_value(action->xml, PCMK_XA_OPERATION);
	if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
	pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
	last_action = action->xml;
	pcmk__update_graph(graph, action);
	crm_notice("Failing action %d (%s): fencer terminated",
	action->id, pcmk__xe_id(action->xml));
	}
	}
	}

	if (last_action != NULL) {
	crm_warn("Fencer failure resulted in unrunnable actions");
	abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
	return TRUE;
	}

	return FALSE;
	}

	static void
	tengine_stonith_connection_destroy(stonith_t st, stonith_event_t e)
	{
	te_cleanup_stonith_history_sync(st, FALSE);

	if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
	crm_err("Lost fencer connection (will attempt to reconnect)");
	if (!mainloop_timer_running(controld_fencer_connect_timer)) {
	mainloop_timer_start(controld_fencer_connect_timer);
	}
	} else {
	crm_info("Disconnected from fencer");
	}

	if (stonith_api) {
	/* the client API won't properly reconnect notifications
	* if they are still in the table - so remove them
	*/
	if (stonith_api->state != stonith_disconnected) {
	stonith_api->cmds->disconnect(st);
	}
	stonith_api->cmds->remove_notification(stonith_api, NULL);
	}

	if (AM_I_DC) {
	fail_incompletable_stonith(controld_globals.transition_graph);
	trigger_graph();
	}
	}

	/*!
	* \internal
	* \brief Handle an event notification from the fencing API
	*
	* \param[in] st Fencing API connection (ignored)
	* \param[in] event Fencing API event notification
	*/
	static void
	handle_fence_notification(stonith_t st, stonith_event_t event)
	{
	bool succeeded = true;
	const char *executioner = "the cluster";
	const char *client = "a client";
	const char *reason = NULL;
	int exec_status;

	if (te_client_id == NULL) {
	te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
	(unsigned long) getpid());
	}

	if (event == NULL) {
	crm_err("Notify data not found");
	return;
	}

	if (event->executioner != NULL) {
	executioner = event->executioner;
	}
	if (event->client_origin != NULL) {
	client = event->client_origin;
	}

	exec_status = stonith__event_execution_status(event);
	if ((stonith__event_exit_status(event) != CRM_EX_OK)
	\|\| (exec_status != PCMK_EXEC_DONE)) {
	succeeded = false;
	if (exec_status == PCMK_EXEC_DONE) {
	exec_status = PCMK_EXEC_ERROR;
	}
	}
	reason = stonith__event_exit_reason(event);

	crmd_alert_fencing_op(event);

	if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
	// Unfencing doesn't need special handling, just a log message
	if (succeeded) {
	crm_notice("%s was unfenced by %s at the request of %s@%s",
	event->target, executioner, client, event->origin);
	} else {
	crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
	event->target, executioner,
	pcmk_exec_status_str(exec_status),
	((reason == NULL)? "" : ": "),
	((reason == NULL)? "" : reason),
	stonith__event_exit_status(event));
	}
	return;
	}

	if (succeeded
	&& pcmk__str_eq(event->target, controld_globals.our_nodename,
	pcmk__str_casei)) {
	/* We were notified of our own fencing. Most likely, either fencing was
	* misconfigured, or fabric fencing that doesn't cut cluster
	* communication is in use.
	*
	* Either way, shutting down the local host is a good idea, to require
	* administrator intervention. Also, other nodes would otherwise likely
	* set our status to lost because of the fencing callback and discard
	* our subsequent election votes as "not part of our cluster".
	*/
	crm_crit("We were allegedly just fenced by %s for %s!",
	executioner, event->origin); // Dumps blackbox if enabled
	if (fence_reaction_panic) {
	pcmk__panic(__func__);
	} else {
	crm_exit(CRM_EX_FATAL);
	}
	return; // Should never get here
	}

	/* Update the count of fencing failures for this target, in case we become
	* DC later. The current DC has already updated its fail count in
	* tengine_stonith_callback().
	*/
	if (!AM_I_DC) {
	if (succeeded) {
	st_fail_count_reset(event->target);
	} else {
	st_fail_count_increment(event->target);
	}
	}

	crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
	"%s%s%s%s " CRM_XS " event=%s",
	event->target, (succeeded? "" : " not"),
	event->action, executioner, client, event->origin,
	(succeeded? "OK" : pcmk_exec_status_str(exec_status)),
	((reason == NULL)? "" : " ("),
	((reason == NULL)? "" : reason),
	((reason == NULL)? "" : ")"),
	event->id);

	if (succeeded) {
	const uint32_t flags = pcmk__node_search_any
	\|pcmk__node_search_cluster_cib;

	- crm_node_t *peer = pcmk__search_node_caches(0, event->target, flags);
	+ crm_node_t *peer = pcmk__search_node_caches(0, event->target, NULL,
	+ flags);
	const char *uuid = NULL;

	if (peer == NULL) {
	return;
	}

	- uuid = pcmk__cluster_node_uuid(peer);
	+ uuid = pcmk__cluster_get_xml_id(peer);

	if (AM_I_DC) {
	/* The DC always sends updates */
	- send_stonith_update(NULL, event->target, uuid);
	+ update_node_state_after_fencing(event->target, uuid);

	/* @TODO Ideally, at this point, we'd check whether the fenced node
	* hosted any guest nodes, and call remote_node_down() for them.
	* Unfortunately, the controller doesn't have a simple, reliable way
	* to map hosts to guests. It might be possible to track this in the
	* peer cache via refresh_remote_nodes(). For now, we rely on the
	* scheduler creating fence pseudo-events for the guests.
	*/

	if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
	/* Abort the current transition if it wasn't the cluster that
	* initiated fencing.
	*/
	crm_info("External fencing operation from %s fenced %s",
	client, event->target);
	abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
	"External Fencing Operation", NULL);
	}

	} else if (pcmk__str_eq(controld_globals.dc_name, event->target,
	pcmk__str_null_matches\|pcmk__str_casei)
	&& !pcmk_is_set(peer->flags, crm_remote_node)) {
	// Assume the target was our DC if we don't currently have one

	if (controld_globals.dc_name != NULL) {
	crm_notice("Fencing target %s was our DC", event->target);
	} else {
	crm_notice("Fencing target %s may have been our DC",
	event->target);
	}

	/* Given the CIB resyncing that occurs around elections,
	* have one node update the CIB now and, if the new DC is different,
	* have them do so too after the election
	*/
	if (pcmk__str_eq(event->executioner, controld_globals.our_nodename,
	pcmk__str_casei)) {
	- send_stonith_update(NULL, event->target, uuid);
	+ update_node_state_after_fencing(event->target, uuid);
	}
	add_stonith_cleanup(event->target);
	}

	/* If the target is a remote node, and we host its connection,
	* immediately fail all monitors so it can be recovered quickly.
	* The connection won't necessarily drop when a remote node is fenced,
	* so the failure might not otherwise be detected until the next poke.
	*/
	if (pcmk_is_set(peer->flags, crm_remote_node)) {
	remote_ra_fail(event->target);
	}

	crmd_peer_down(peer, TRUE);
	}
	}

	/*!
	* \brief Connect to fencer
	*
	* \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer
	*
	* \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry
	* \note If user_data is NULL, this will wait 2s between attempts, for up to
	* 30 attempts, meaning the controller could be blocked as long as 58s.
	*/
	gboolean
	controld_timer_fencer_connect(gpointer user_data)
	{
	int rc = pcmk_ok;

	if (stonith_api == NULL) {
	stonith_api = stonith_api_new();
	if (stonith_api == NULL) {
	crm_err("Could not connect to fencer: API memory allocation failed");
	return G_SOURCE_REMOVE;
	}
	}

	if (stonith_api->state != stonith_disconnected) {
	crm_trace("Already connected to fencer, no need to retry");
	return G_SOURCE_REMOVE;
	}

	if (user_data == NULL) {
	// Blocking (retry failures now until successful)
	rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
	if (rc != pcmk_ok) {
	crm_err("Could not connect to fencer in 30 attempts: %s "
	CRM_XS " rc=%d", pcmk_strerror(rc), rc);
	}
	} else {
	// Non-blocking (retry failures later in main loop)
	rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);

	if (controld_fencer_connect_timer == NULL) {
	controld_fencer_connect_timer =
	mainloop_timer_add("controld_fencer_connect", 1000,
	TRUE, controld_timer_fencer_connect,
	GINT_TO_POINTER(TRUE));
	}

	if (rc != pcmk_ok) {
	if (pcmk_is_set(controld_globals.fsa_input_register,
	R_ST_REQUIRED)) {
	crm_notice("Fencer connection failed (will retry): %s "
	CRM_XS " rc=%d", pcmk_strerror(rc), rc);

	if (!mainloop_timer_running(controld_fencer_connect_timer)) {
	mainloop_timer_start(controld_fencer_connect_timer);
	}

	return G_SOURCE_CONTINUE;
	} else {
	crm_info("Fencer connection failed (ignoring because no longer required): %s "
	CRM_XS " rc=%d", pcmk_strerror(rc), rc);
	}
	return G_SOURCE_REMOVE;
	}
	}

	if (rc == pcmk_ok) {
	stonith_api_operations_t *cmds = stonith_api->cmds;

	cmds->register_notification(stonith_api,
	PCMK__VALUE_ST_NOTIFY_DISCONNECT,
	tengine_stonith_connection_destroy);
	cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_FENCE,
	handle_fence_notification);
	cmds->register_notification(stonith_api,
	PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
	tengine_stonith_history_synced);
	te_trigger_stonith_history_sync(TRUE);
	crm_notice("Fencer successfully connected");
	}

	return G_SOURCE_REMOVE;
	}

	void
	controld_disconnect_fencer(bool destroy)
	{
	if (stonith_api) {
	// Prevent fencer connection from coming up again
	controld_clear_fsa_input_flags(R_ST_REQUIRED);

	if (stonith_api->state != stonith_disconnected) {
	stonith_api->cmds->disconnect(stonith_api);
	}
	stonith_api->cmds->remove_notification(stonith_api, NULL);
	}
	if (destroy) {
	if (stonith_api) {
	stonith_api->cmds->free(stonith_api);
	stonith_api = NULL;
	}
	if (controld_fencer_connect_timer) {
	mainloop_timer_del(controld_fencer_connect_timer);
	controld_fencer_connect_timer = NULL;
	}
	if (te_client_id) {
	free(te_client_id);
	te_client_id = NULL;
	}
	}
	}

	static gboolean
	do_stonith_history_sync(gpointer user_data)
	{
	if (stonith_api && (stonith_api->state != stonith_disconnected)) {
	stonith_history_t *history = NULL;

	te_cleanup_stonith_history_sync(stonith_api, FALSE);
	stonith_api->cmds->history(stonith_api,
	st_opt_sync_call \| st_opt_broadcast,
	NULL, &history, 5);
	stonith_history_free(history);
	return TRUE;
	} else {
	crm_info("Skip triggering stonith history-sync as stonith is disconnected");
	return FALSE;
	}
	}

	static void
	tengine_stonith_callback(stonith_t stonith, stonith_callback_data_t data)
	{
	char *uuid = NULL;
	int stonith_id = -1;
	int transition_id = -1;
	pcmk__graph_action_t *action = NULL;
	const char *target = NULL;

	if ((data == NULL) \|\| (data->userdata == NULL)) {
	crm_err("Ignoring fence operation %d result: "
	"No transition key given (bug?)",
	((data == NULL)? -1 : data->call_id));
	return;
	}

	if (!AM_I_DC) {
	const char *reason = stonith__exit_reason(data);

	if (reason == NULL) {
	reason = pcmk_exec_status_str(stonith__execution_status(data));
	}
	crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
	data->call_id, stonith__exit_status(data), reason,
	(const char *) data->userdata);
	return;
	}

	CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
	&stonith_id, NULL),
	goto bail);

	if (controld_globals.transition_graph->complete \|\| (stonith_id < 0)
	\|\| !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
	\|\| (controld_globals.transition_graph->id != transition_id)) {
	crm_info("Ignoring fence operation %d result: "
	"Not from current transition " CRM_XS
	" complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
	data->call_id,
	pcmk__btoa(controld_globals.transition_graph->complete),
	stonith_id, uuid, controld_globals.te_uuid, transition_id,
	controld_globals.transition_graph->id);
	goto bail;
	}

	action = controld_get_action(stonith_id);
	if (action == NULL) {
	crm_err("Ignoring fence operation %d result: "
	"Action %d not found in transition graph (bug?) "
	CRM_XS " uuid=%s transition=%d",
	data->call_id, stonith_id, uuid, transition_id);
	goto bail;
	}

	target = crm_element_value(action->xml, PCMK__META_ON_NODE);
	if (target == NULL) {
	crm_err("Ignoring fence operation %d result: No target given (bug?)",
	data->call_id);
	goto bail;
	}

	stop_te_timer(action);
	if (stonith__exit_status(data) == CRM_EX_OK) {
	const char *uuid = crm_element_value(action->xml,
	PCMK__META_ON_NODE_UUID);
	const char *op = crm_meta_value(action->params,
	PCMK__META_STONITH_ACTION);

	crm_info("Fence operation %d for %s succeeded", data->call_id, target);
	if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
	te_action_confirmed(action, NULL);
	if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
	const char *value = NULL;
	char *now = pcmk__ttoa(time(NULL));
	gboolean is_remote_node = FALSE;

	/* This check is not 100% reliable, since this node is not
	* guaranteed to have the remote node cached. However, it
	* doesn't have to be reliable, since the attribute manager can
	* learn a node's "remoteness" by other means sooner or later.
	* This allows it to learn more quickly if this node does have
	* the information.
	*/
	if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
	is_remote_node = TRUE;
	}

	update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
	is_remote_node);
	free(now);

	value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL);
	update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
	is_remote_node);

	value = crm_meta_value(action->params,
	PCMK__META_DIGESTS_SECURE);
	update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
	is_remote_node);

	} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
	- send_stonith_update(action, target, uuid);
	+ update_node_state_after_fencing(target, uuid);
	pcmk__set_graph_action_flags(action,
	pcmk__graph_action_sent_update);
	}
	}
	st_fail_count_reset(target);

	} else {
	enum pcmk__graph_next abort_action = pcmk__graph_restart;
	int status = stonith__execution_status(data);
	const char *reason = stonith__exit_reason(data);

	if (reason == NULL) {
	if (status == PCMK_EXEC_DONE) {
	reason = "Agent returned error";
	} else {
	reason = pcmk_exec_status_str(status);
	}
	}
	pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);

	/* If no fence devices were available, there's no use in immediately
	* checking again, so don't start a new transition in that case.
	*/
	if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
	crm_warn("Fence operation %d for %s failed: %s "
	"(aborting transition and giving up for now)",
	data->call_id, target, reason);
	abort_action = pcmk__graph_wait;
	} else {
	crm_notice("Fence operation %d for %s failed: %s "
	"(aborting transition)", data->call_id, target, reason);
	}

	/* Increment the fail count now, so abort_for_stonith_failure() can
	* check it. Non-DC nodes will increment it in
	* handle_fence_notification().
	*/
	st_fail_count_increment(target);
	abort_for_stonith_failure(abort_action, target, NULL);
	}

	pcmk__update_graph(controld_globals.transition_graph, action);
	trigger_graph();

	bail:
	free(data->userdata);
	free(uuid);
	return;
	}

	static int
	fence_with_delay(const char target, const char type, int delay)
	{
	uint32_t options = st_opt_none; // Group of enum stonith_call_options
	int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout
	/ 1000);

	if (crmd_join_phase_count(crm_join_confirmed) == 1) {
	stonith__set_call_options(options, target, st_opt_allow_self_fencing);
	}
	return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
	type, timeout_sec, 0, delay);
	}

	/*!
	* \internal
	* \brief Execute a fencing action from a transition graph
	*
	* \param[in] graph Transition graph being executed (ignored)
	* \param[in] action Fencing action to execute
	*
	* \return Standard Pacemaker return code
	*/
	int
	controld_execute_fence_action(pcmk__graph_t *graph,
	pcmk__graph_action_t *action)
	{
	int rc = 0;
	const char *id = pcmk__xe_id(action->xml);
	const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
	const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
	const char *type = crm_meta_value(action->params,
	PCMK__META_STONITH_ACTION);
	char *transition_key = NULL;
	const char *priority_delay = NULL;
	int delay_i = 0;
	gboolean invalid_action = FALSE;
	int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout
	/ 1000);

	CRM_CHECK(id != NULL, invalid_action = TRUE);
	CRM_CHECK(uuid != NULL, invalid_action = TRUE);
	CRM_CHECK(type != NULL, invalid_action = TRUE);
	CRM_CHECK(target != NULL, invalid_action = TRUE);

	if (invalid_action) {
	crm_log_xml_warn(action->xml, "BadAction");
	return EPROTO;
	}

	priority_delay = crm_meta_value(action->params,
	PCMK_OPT_PRIORITY_FENCING_DELAY);

	crm_notice("Requesting fencing (%s) targeting node %s "
	CRM_XS " action=%s timeout=%i%s%s",
	type, target, id, stonith_timeout,
	priority_delay ? " priority_delay=" : "",
	priority_delay ? priority_delay : "");

	/* Passing NULL means block until we can connect... */
	controld_timer_fencer_connect(NULL);

	pcmk__scan_min_int(priority_delay, &delay_i, 0);
	rc = fence_with_delay(target, type, delay_i);
	transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
	action->id, 0,
	controld_globals.te_uuid),
	stonith_api->cmds->register_callback(stonith_api, rc,
	(stonith_timeout
	+ (delay_i > 0 ? delay_i : 0)),
	st_opt_timeout_updates, transition_key,
	"tengine_stonith_callback",
	tengine_stonith_callback);
	return pcmk_rc_ok;
	}

	bool
	controld_verify_stonith_watchdog_timeout(const char *value)
	{
	long long st_timeout = (value != NULL)? crm_get_msec(value) : 0;
	const char *our_nodename = controld_globals.our_nodename;

	if (st_timeout == 0
	\|\| (stonith_api && (stonith_api->state != stonith_disconnected) &&
	stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
	our_nodename))) {
	return pcmk__valid_stonith_watchdog_timeout(value);
	}
	return true;
	}

	/* end stonith API client functions */


	/*
	* stonith history synchronization
	*
	* Each node's fencer keeps track of a cluster-wide fencing history. When a node
	* joins or leaves, we need to synchronize the history across all nodes.
	*/

	static crm_trigger_t *stonith_history_sync_trigger = NULL;
	static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
	static mainloop_timer_t *stonith_history_sync_timer_long = NULL;

	void
	te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
	{
	if (free_timers) {
	mainloop_timer_del(stonith_history_sync_timer_short);
	stonith_history_sync_timer_short = NULL;
	mainloop_timer_del(stonith_history_sync_timer_long);
	stonith_history_sync_timer_long = NULL;
	} else {
	mainloop_timer_stop(stonith_history_sync_timer_short);
	mainloop_timer_stop(stonith_history_sync_timer_long);
	}

	if (st) {
	st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED);
	}
	}

	static void
	tengine_stonith_history_synced(stonith_t st, stonith_event_t st_event)
	{
	te_cleanup_stonith_history_sync(st, FALSE);
	crm_debug("Fence-history synced - cancel all timers");
	}

	static gboolean
	stonith_history_sync_set_trigger(gpointer user_data)
	{
	mainloop_set_trigger(stonith_history_sync_trigger);
	return FALSE;
	}

	void
	te_trigger_stonith_history_sync(bool long_timeout)
	{
	/* trigger a sync in 5s to give more nodes the
	* chance to show up so that we don't create
	* unnecessary stonith-history-sync traffic
	*
	* the long timeout of 30s is there as a fallback
	* so that after a successful connection to fenced
	* we will wait for 30s for the DC to trigger a
	* history-sync
	* if this doesn't happen we trigger a sync locally
	* (e.g. fenced segfaults and is restarted by pacemakerd)
	*/

	/* as we are finally checking the stonith-connection
	* in do_stonith_history_sync we should be fine
	* leaving stonith_history_sync_time & stonith_history_sync_trigger
	* around
	*/
	if (stonith_history_sync_trigger == NULL) {
	stonith_history_sync_trigger =
	mainloop_add_trigger(G_PRIORITY_LOW,
	do_stonith_history_sync, NULL);
	}

	if (long_timeout) {
	if(stonith_history_sync_timer_long == NULL) {
	stonith_history_sync_timer_long =
	mainloop_timer_add("history_sync_long", 30000,
	FALSE, stonith_history_sync_set_trigger,
	NULL);
	}
	crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
	mainloop_timer_start(stonith_history_sync_timer_long);
	} else {
	if(stonith_history_sync_timer_short == NULL) {
	stonith_history_sync_timer_short =
	mainloop_timer_add("history_sync_short", 5000,
	FALSE, stonith_history_sync_set_trigger,
	NULL);
	}
	crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
	mainloop_timer_start(stonith_history_sync_timer_short);
	}

	}

	/* end stonith history synchronization functions */
	diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
	index ba083f54b2..cb2bf37e3a 100644
	--- a/daemons/controld/controld_join_dc.c
	+++ b/daemons/controld/controld_join_dc.c
	@@ -1,1058 +1,1058 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <crm/crm.h>

	#include <crm/common/xml.h>
	#include <crm/cluster.h>

	#include <pacemaker-controld.h>

	static char *max_generation_from = NULL;
	static xmlNodePtr max_generation_xml = NULL;

	/*!
	* \internal
	* \brief Nodes from which a CIB sync has failed since the peer joined
	*
	* This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
	* the name of a client node from which a CIB \p sync_from() call has failed in
	* \p do_dc_join_finalize() since the client joined the cluster as a peer.
	* \p join_id is the ID of the join round in which the \p sync_from() failed,
	* and is intended for use in nack log messages.
	*/
	static GHashTable *failed_sync_nodes = NULL;

	void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
	void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
	gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);

	/* Numeric counter used to identify join rounds (an unsigned int would be
	* appropriate, except we get and set it in XML as int)
	*/
	static int current_join_id = 0;

	/*!
	* \internal
	* \brief Destroy the hash table containing failed sync nodes
	*/
	void
	controld_destroy_failed_sync_table(void)
	{
	if (failed_sync_nodes != NULL) {
	g_hash_table_destroy(failed_sync_nodes);
	failed_sync_nodes = NULL;
	}
	}

	/*!
	* \internal
	* \brief Remove a node from the failed sync nodes table if present
	*
	* \param[in] node_name Node name to remove
	*/
	void
	controld_remove_failed_sync_node(const char *node_name)
	{
	if (failed_sync_nodes != NULL) {
	g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
	}
	}

	/*!
	* \internal
	* \brief Add to a hash table a node whose CIB failed to sync
	*
	* \param[in] node_name Name of node whose CIB failed to sync
	* \param[in] join_id Join round when the failure occurred
	*/
	static void
	record_failed_sync_node(const char *node_name, gint join_id)
	{
	if (failed_sync_nodes == NULL) {
	failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
	}

	/* If the node is already in the table then we failed to nack it during the
	* filter offer step
	*/
	CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
	GINT_TO_POINTER(join_id)));
	}

	/*!
	* \internal
	* \brief Look up a node name in the failed sync table
	*
	* \param[in] node_name Name of node to look up
	* \param[out] join_id Where to store the join ID of when the sync failed
	*
	* \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
	* node name was found, or \p pcmk_rc_node_unknown otherwise.
	* \note \p *join_id is set to -1 if the node is not found.
	*/
	static int
	lookup_failed_sync_node(const char node_name, gint join_id)
	{
	*join_id = -1;

	if (failed_sync_nodes != NULL) {
	gpointer result = g_hash_table_lookup(failed_sync_nodes,
	(gchar *) node_name);
	if (result != NULL) {
	*join_id = GPOINTER_TO_INT(result);
	return pcmk_rc_ok;
	}
	}
	return pcmk_rc_node_unknown;
	}

	void
	crm_update_peer_join(const char source, crm_node_t node, enum crm_join_phase phase)
	{
	enum crm_join_phase last = 0;

	CRM_CHECK(node != NULL, return);

	/* Remote nodes do not participate in joins */
	if (pcmk_is_set(node->flags, crm_remote_node)) {
	return;
	}

	last = node->join;

	if(phase == last) {
	crm_trace("Node %s join-%d phase is still %s "
	CRM_XS " nodeid=%u source=%s",
	node->uname, current_join_id, crm_join_phase_str(last),
	node->id, source);

	} else if ((phase <= crm_join_none) \|\| (phase == (last + 1))) {
	node->join = phase;
	crm_trace("Node %s join-%d phase is now %s (was %s) "
	CRM_XS " nodeid=%u source=%s",
	node->uname, current_join_id, crm_join_phase_str(phase),
	crm_join_phase_str(last), node->id, source);

	} else {
	crm_warn("Rejecting join-%d phase update for node %s because "
	"can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
	current_join_id, node->uname, crm_join_phase_str(last),
	crm_join_phase_str(phase), node->id, source);
	}
	}

	static void
	start_join_round(void)
	{
	GHashTableIter iter;
	crm_node_t *peer = NULL;

	crm_debug("Starting new join round join-%d", current_join_id);

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
	crm_update_peer_join(__func__, peer, crm_join_none);
	}
	if (max_generation_from != NULL) {
	free(max_generation_from);
	max_generation_from = NULL;
	}
	if (max_generation_xml != NULL) {
	free_xml(max_generation_xml);
	max_generation_xml = NULL;
	}
	controld_clear_fsa_input_flags(R_HAVE_CIB);
	}

	/*!
	* \internal
	* \brief Create a join message from the DC
	*
	* \param[in] join_op Join operation name
	* \param[in] host_to Recipient of message
	*/
	static xmlNode *
	create_dc_message(const char join_op, const char host_to)
	{
	xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
	CRM_SYSTEM_DC, NULL);

	/* Identify which election this is a part of */
	crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);

	/* Add a field specifying whether the DC is shutting down. This keeps the
	* joining node from fencing the old DC if it becomes the new DC.
	*/
	pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
	pcmk_is_set(controld_globals.fsa_input_register,
	R_SHUTDOWN));
	return msg;
	}

	static void
	join_make_offer(gpointer key, gpointer value, gpointer user_data)
	{
	xmlNode *offer = NULL;
	crm_node_t member = (crm_node_t )value;

	pcmk__assert(member != NULL);
	if (!pcmk__cluster_is_node_active(member)) {
	crm_info("Not making join-%d offer to inactive node %s",
	current_join_id,
	(member->uname? member->uname : "with unknown name"));
	if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
	/* You would think this unsafe, but in fact this plus an
	* active resource is what causes it to be fenced.
	*
	* Yes, this does mean that any node that dies at the same
	* time as the old DC and is not running resource (still)
	* won't be fenced.
	*
	* I'm not happy about this either.
	*/
	pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
	}
	return;
	}

	if (member->uname == NULL) {
	crm_info("Not making join-%d offer to node uuid %s with unknown name",
	current_join_id, member->uuid);
	return;
	}

	if (controld_globals.membership_id != crm_peer_seq) {
	controld_globals.membership_id = crm_peer_seq;
	crm_info("Making join-%d offers based on membership event %llu",
	current_join_id, crm_peer_seq);
	}

	if(user_data && member->join > crm_join_none) {
	crm_info("Not making join-%d offer to already known node %s (%s)",
	current_join_id, member->uname,
	crm_join_phase_str(member->join));
	return;
	}

	crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);

	offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);

	// Advertise our feature set so the joining node can bail if not compatible
	crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);

	crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
	pcmk__cluster_send_message(member, crm_msg_crmd, offer);
	free_xml(offer);

	crm_update_peer_join(__func__, member, crm_join_welcomed);
	}

	/* A_DC_JOIN_OFFER_ALL */
	void
	do_dc_join_offer_all(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	int count;

	/* Reset everyone's status back to down or in_ccm in the CIB.
	* Any nodes that are active in the CIB but not in the cluster membership
	* will be seen as offline by the scheduler anyway.
	*/
	current_join_id++;
	start_join_round();

	update_dc(NULL);
	if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
	crm_info("A new node joined the cluster");
	}
	g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);

	count = crmd_join_phase_count(crm_join_welcomed);
	crm_info("Waiting on join-%d requests from %d outstanding node%s",
	current_join_id, count, pcmk__plural_s(count));

	// Don't waste time by invoking the scheduler yet
	}

	/* A_DC_JOIN_OFFER_ONE */
	void
	do_dc_join_offer_one(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	crm_node_t *member;
	ha_msg_input_t *welcome = NULL;
	int count;
	const char *join_to = NULL;

	if (msg_data->data == NULL) {
	crm_info("Making join-%d offers to any unconfirmed nodes "
	"because an unknown node joined", current_join_id);
	g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
	check_join_state(cur_state, __func__);
	return;
	}

	welcome = fsa_typed_data(fsa_dt_ha_msg);
	if (welcome == NULL) {
	// fsa_typed_data() already logged an error
	return;
	}

	join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
	if (join_to == NULL) {
	crm_err("Can't make join-%d offer to unknown node", current_join_id);
	return;
	}
	member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);

	/* It is possible that a node will have been sick or starting up when the
	* original offer was made. However, it will either re-announce itself in
	* due course, or we can re-store the original offer on the client.
	*/

	crm_update_peer_join(__func__, member, crm_join_none);
	join_make_offer(NULL, member, NULL);

	/* If the offer isn't to the local node, make an offer to the local node as
	* well, to ensure the correct value for max_generation_from.
	*/
	if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
	member = pcmk__get_node(0, controld_globals.our_nodename, NULL,
	pcmk__node_search_cluster_member);
	join_make_offer(NULL, member, NULL);
	}

	/* This was a genuine join request; cancel any existing transition and
	* invoke the scheduler.
	*/
	abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
	NULL);

	count = crmd_join_phase_count(crm_join_welcomed);
	crm_info("Waiting on join-%d requests from %d outstanding node%s",
	current_join_id, count, pcmk__plural_s(count));

	// Don't waste time by invoking the scheduler yet
	}

	static int
	compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
	{
	const char *elem_l = crm_element_value(left, field);
	const char *elem_r = crm_element_value(right, field);

	long long int_elem_l;
	long long int_elem_r;

	int rc = pcmk_rc_ok;

	rc = pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
	if (rc != pcmk_rc_ok) { // Shouldn't be possible
	crm_warn("Comparing current CIB %s as -1 "
	"because '%s' is not an integer", field, elem_l);
	}

	rc = pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
	if (rc != pcmk_rc_ok) { // Shouldn't be possible
	crm_warn("Comparing joining node's CIB %s as -1 "
	"because '%s' is not an integer", field, elem_r);
	}

	if (int_elem_l < int_elem_r) {
	return -1;

	} else if (int_elem_l > int_elem_r) {
	return 1;
	}

	return 0;
	}

	/* A_DC_JOIN_PROCESS_REQ */
	void
	do_dc_join_filter_offer(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	xmlNode *generation = NULL;

	int cmp = 0;
	int join_id = -1;
	int count = 0;
	gint value = 0;
	gboolean ack_nack_bool = TRUE;
	ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);

	const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
	const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
	const char *join_version = crm_element_value(join_ack->msg,
	PCMK_XA_CRM_FEATURE_SET);
	crm_node_t *join_node = NULL;

	if (join_from == NULL) {
	crm_err("Ignoring invalid join request without node name");
	return;
	}
	join_node = pcmk__get_node(0, join_from, NULL,
	pcmk__node_search_cluster_member);

	crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
	if (join_id != current_join_id) {
	crm_debug("Ignoring join-%d request from %s because we are on join-%d",
	join_id, join_from, current_join_id);
	check_join_state(cur_state, __func__);
	return;
	}

	generation = join_ack->xml;
	if (max_generation_xml != NULL && generation != NULL) {
	int lpc = 0;

	const char *attributes[] = {
	PCMK_XA_ADMIN_EPOCH,
	PCMK_XA_EPOCH,
	PCMK_XA_NUM_UPDATES,
	};

	/* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE
	* element from the join client. The "if" guard is for clarity.
	*/
	if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
	for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
	cmp = compare_int_fields(max_generation_xml, generation,
	attributes[lpc]);
	}

	} else { // Should always be PCMK__XE_GENERATION_TUPLE
	CRM_LOG_ASSERT(false);
	}
	}

	if (ref == NULL) {
	ref = "none"; // for logging only
	}

	if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
	crm_err("Rejecting join-%d request from node %s because we failed to "
	"sync its CIB in join-%d " CRM_XS " ref=%s",
	join_id, join_from, value, ref);
	ack_nack_bool = FALSE;

	} else if (!pcmk__cluster_is_node_active(join_node)) {
	if (match_down_event(join_from) != NULL) {
	/* The join request was received after the node was fenced or
	* otherwise shutdown in a way that we're aware of. No need to log
	* an error in this rare occurrence; we know the client was recently
	* shut down, and receiving a lingering in-flight request is not
	* cause for alarm.
	*/
	crm_debug("Rejecting join-%d request from inactive node %s "
	CRM_XS " ref=%s", join_id, join_from, ref);
	} else {
	crm_err("Rejecting join-%d request from inactive node %s "
	CRM_XS " ref=%s", join_id, join_from, ref);
	}
	ack_nack_bool = FALSE;

	} else if (generation == NULL) {
	crm_err("Rejecting invalid join-%d request from node %s "
	"missing CIB generation " CRM_XS " ref=%s",
	join_id, join_from, ref);
	ack_nack_bool = FALSE;

	} else if ((join_version == NULL)
	\|\| !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
	crm_err("Rejecting join-%d request from node %s because feature set %s"
	" is incompatible with ours (%s) " CRM_XS " ref=%s",
	join_id, join_from, (join_version? join_version : "pre-3.1.0"),
	CRM_FEATURE_SET, ref);
	ack_nack_bool = FALSE;

	} else if (max_generation_xml == NULL) {
	const char *validation = crm_element_value(generation,
	PCMK_XA_VALIDATE_WITH);

	if (pcmk__get_schema(validation) == NULL) {
	crm_err("Rejecting join-%d request from %s (with first CIB "
	"generation) due to unknown schema version %s "
	CRM_XS " ref=%s",
	join_id, join_from, pcmk__s(validation, "(missing)"), ref);
	ack_nack_bool = FALSE;

	} else {
	crm_debug("Accepting join-%d request from %s (with first CIB "
	"generation) " CRM_XS " ref=%s",
	join_id, join_from, ref);
	max_generation_xml = pcmk__xml_copy(NULL, generation);
	pcmk__str_update(&max_generation_from, join_from);
	}

	} else if ((cmp < 0)
	\|\| ((cmp == 0)
	&& pcmk__str_eq(join_from, controld_globals.our_nodename,
	pcmk__str_casei))) {
	const char *validation = crm_element_value(generation,
	PCMK_XA_VALIDATE_WITH);

	if (pcmk__get_schema(validation) == NULL) {
	crm_err("Rejecting join-%d request from %s (with better CIB "
	"generation than current best from %s) due to unknown "
	"schema version %s " CRM_XS " ref=%s",
	join_id, join_from, max_generation_from,
	pcmk__s(validation, "(missing)"), ref);
	ack_nack_bool = FALSE;

	} else {
	crm_debug("Accepting join-%d request from %s (with better CIB "
	"generation than current best from %s) " CRM_XS " ref=%s",
	join_id, join_from, max_generation_from, ref);
	crm_log_xml_debug(max_generation_xml, "Old max generation");
	crm_log_xml_debug(generation, "New max generation");

	free_xml(max_generation_xml);
	max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
	pcmk__str_update(&max_generation_from, join_from);
	}

	} else {
	crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
	join_id, join_from, ref);
	}

	if (!ack_nack_bool) {
	if (compare_version(join_version, "3.17.0") < 0) {
	/* Clients with CRM_FEATURE_SET < 3.17.0 may respawn infinitely
	* after a nack message, don't send one
	*/
	crm_update_peer_join(__func__, join_node, crm_join_nack_quiet);
	} else {
	crm_update_peer_join(__func__, join_node, crm_join_nack);
	}
	pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);

	} else {
	crm_update_peer_join(__func__, join_node, crm_join_integrated);
	pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
	}

	count = crmd_join_phase_count(crm_join_integrated);
	crm_debug("%d node%s currently integrated in join-%d",
	count, pcmk__plural_s(count), join_id);

	if (check_join_state(cur_state, __func__) == FALSE) {
	// Don't waste time by invoking the scheduler yet
	count = crmd_join_phase_count(crm_join_welcomed);
	crm_debug("Waiting on join-%d requests from %d outstanding node%s",
	join_id, count, pcmk__plural_s(count));
	}
	}

	/* A_DC_JOIN_FINALIZE */
	void
	do_dc_join_finalize(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	char *sync_from = NULL;
	int rc = pcmk_ok;
	int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
	int count_finalizable = crmd_join_phase_count(crm_join_integrated)
	+ crmd_join_phase_count(crm_join_nack)
	+ crmd_join_phase_count(crm_join_nack_quiet);

	/* This we can do straight away and avoid clients timing us out
	* while we compute the latest CIB
	*/
	if (count_welcomed != 0) {
	crm_debug("Waiting on join-%d requests from %d outstanding node%s "
	"before finalizing join", current_join_id, count_welcomed,
	pcmk__plural_s(count_welcomed));
	crmd_join_phase_log(LOG_DEBUG);
	/* crmd_fsa_stall(FALSE); Needed? */
	return;

	} else if (count_finalizable == 0) {
	crm_debug("Finalization not needed for join-%d at the current time",
	current_join_id);
	crmd_join_phase_log(LOG_DEBUG);
	check_join_state(controld_globals.fsa_state, __func__);
	return;
	}

	controld_clear_fsa_input_flags(R_HAVE_CIB);
	if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
	pcmk__str_null_matches\|pcmk__str_casei)) {
	controld_set_fsa_input_flags(R_HAVE_CIB);
	}

	if (!controld_globals.transition_graph->complete) {
	crm_warn("Delaying join-%d finalization while transition in progress",
	current_join_id);
	crmd_join_phase_log(LOG_DEBUG);
	crmd_fsa_stall(FALSE);
	return;
	}

	if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
	// Send our CIB out to everyone
	sync_from = pcmk__str_copy(controld_globals.our_nodename);
	crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
	current_join_id, count_finalizable,
	pcmk__plural_s(count_finalizable));
	crm_log_xml_debug(max_generation_xml, "Requested CIB version");

	} else {
	// Ask for the agreed best CIB
	sync_from = pcmk__str_copy(max_generation_from);
	crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
	current_join_id, count_finalizable,
	pcmk__plural_s(count_finalizable), sync_from);
	crm_log_xml_notice(max_generation_xml, "Requested CIB version");
	}
	crmd_join_phase_log(LOG_DEBUG);

	rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
	sync_from, NULL, cib_none);
	fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
	}

	void
	free_max_generation(void)
	{
	free(max_generation_from);
	max_generation_from = NULL;

	free_xml(max_generation_xml);
	max_generation_xml = NULL;
	}

	void
	finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	CRM_LOG_ASSERT(-EPERM != rc);

	if (rc != pcmk_ok) {
	const char sync_from = (const char ) user_data;

	do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
	"Could not sync CIB from %s in join-%d: %s",
	sync_from, current_join_id, pcmk_strerror(rc));

	if (rc != -pcmk_err_old_data) {
	record_failed_sync_node(sync_from, current_join_id);
	}

	/* restart the whole join process */
	register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
	__func__);

	} else if (!AM_I_DC) {
	crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);

	} else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
	crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
	"(%s)", current_join_id,
	fsa_state2string(controld_globals.fsa_state));

	} else {
	controld_set_fsa_input_flags(R_HAVE_CIB);

	/* make sure dc_uuid is re-set to us */
	if (!check_join_state(controld_globals.fsa_state, __func__)) {
	int count_finalizable = 0;

	count_finalizable = crmd_join_phase_count(crm_join_integrated)
	+ crmd_join_phase_count(crm_join_nack)
	+ crmd_join_phase_count(crm_join_nack_quiet);

	crm_debug("Notifying %d node%s of join-%d results",
	count_finalizable, pcmk__plural_s(count_finalizable),
	current_join_id);
	g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
	}
	}
	}

	static void
	join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
	xmlNode output, void user_data)
	{
	const char *node = user_data;

	if (rc != pcmk_ok) {
	fsa_data_t *msg_data = NULL; // for register_fsa_error() macro

	crm_crit("join-%d node history update (via CIB call %d) for node %s "
	"failed: %s",
	current_join_id, call_id, node, pcmk_strerror(rc));
	crm_log_xml_debug(msg, "failed");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}

	crm_debug("join-%d node history update (via CIB call %d) for node %s "
	"complete",
	current_join_id, call_id, node);
	check_join_state(controld_globals.fsa_state, __func__);
	}

	/* A_DC_JOIN_PROCESS_ACK */
	void
	do_dc_join_ack(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	int join_id = -1;
	ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);

	const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
	char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
	crm_node_t *peer = NULL;

	enum controld_section_e section = controld_section_lrm;
	char *xpath = NULL;
	xmlNode *state = join_ack->xml;
	xmlNode *execd_state = NULL;

	cib_t *cib = controld_globals.cib_conn;
	int rc = pcmk_ok;

	// Sanity checks
	if (join_from == NULL) {
	crm_warn("Ignoring message received without node identification");
	goto done;
	}
	if (op == NULL) {
	crm_warn("Ignoring message received from %s without task", join_from);
	goto done;
	}

	if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
	crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
	op, join_from, CRM_OP_JOIN_CONFIRM);
	goto done;
	}

	if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
	crm_warn("Ignoring join confirmation from %s without valid join ID",
	join_from);
	goto done;
	}

	peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
	if (peer->join != crm_join_finalized) {
	crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
	"(currently %s not %s)",
	join_id, join_from, crm_join_phase_str(peer->join),
	crm_join_phase_str(crm_join_finalized));
	goto done;
	}

	if (join_id != current_join_id) {
	crm_err("Rejecting join-%d confirmation from %s "
	"because currently on join-%d",
	join_id, join_from, current_join_id);
	crm_update_peer_join(__func__, peer, crm_join_nack);
	goto done;
	}

	crm_update_peer_join(__func__, peer, crm_join_confirmed);

	/* Update CIB with node's current executor state. A new transition will be
	* triggered later, when the CIB manager notifies us of the change.
	*
	* The delete and modify requests are part of an atomic transaction.
	*/
	rc = cib->cmds->init_transaction(cib);
	if (rc != pcmk_ok) {
	goto done;
	}

	// Delete relevant parts of node's current executor state from CIB
	if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
	section = controld_section_lrm_unlocked;
	}
	controld_node_state_deletion_strings(join_from, section, &xpath, NULL);

	rc = cib->cmds->remove(cib, xpath, NULL,
	cib_scope_local
	\|cib_xpath
	\|cib_multiple
	\|cib_transaction);
	if (rc != pcmk_ok) {
	goto done;
	}

	// Update CIB with node's latest known executor state
	if (pcmk__str_eq(join_from, controld_globals.our_nodename,
	pcmk__str_casei)) {

	// Use the latest possible state if processing our own join ack
	execd_state = controld_query_executor_state();

	if (execd_state != NULL) {
	crm_debug("Updating local node history for join-%d from query "
	"result",
	current_join_id);
	state = execd_state;

	} else {
	crm_warn("Updating local node history from join-%d confirmation "
	"because query failed",
	current_join_id);
	}

	} else {
	crm_debug("Updating node history for %s from join-%d confirmation",
	join_from, current_join_id);
	}

	rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
	cib_scope_local\|cib_can_create\|cib_transaction);
	free_xml(execd_state);
	if (rc != pcmk_ok) {
	goto done;
	}

	// Commit the transaction
	rc = cib->cmds->end_transaction(cib, true, cib_scope_local);
	fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);

	if (rc > 0) {
	// join_from will be freed after callback
	join_from = NULL;
	rc = pcmk_ok;
	}

	done:
	if (rc != pcmk_ok) {
	crm_crit("join-%d node history update for node %s failed: %s",
	current_join_id, join_from, pcmk_strerror(rc));
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	free(join_from);
	free(xpath);
	}

	void
	finalize_join_for(gpointer key, gpointer value, gpointer user_data)
	{
	xmlNode *acknak = NULL;
	xmlNode *tmp1 = NULL;
	crm_node_t *join_node = value;
	const char *join_to = join_node->uname;
	bool integrated = false;

	switch (join_node->join) {
	case crm_join_integrated:
	integrated = true;
	break;
	case crm_join_nack:
	case crm_join_nack_quiet:
	break;
	default:
	crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
	"for join-%d", join_to,
	crm_join_phase_str(join_node->join), current_join_id);
	return;
	}

	/* Update the <node> element with the node's name and UUID, in case they
	* weren't known before
	*/
	crm_trace("Updating node name and UUID in CIB for %s", join_to);
	tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
	- crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_node_uuid(join_node));
	+ crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_get_xml_id(join_node));
	crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
	fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
	free_xml(tmp1);

	if (join_node->join == crm_join_nack_quiet) {
	crm_trace("Not sending nack message to node %s with feature set older "
	"than 3.17.0", join_to);
	return;
	}

	join_node = pcmk__get_node(0, join_to, NULL,
	pcmk__node_search_cluster_member);
	if (!pcmk__cluster_is_node_active(join_node)) {
	/*
	* NACK'ing nodes that the membership layer doesn't know about yet
	* simply creates more churn
	*
	* Better to leave them waiting and let the join restart when
	* the new membership event comes in
	*
	* All other NACKs (due to versions etc) should still be processed
	*/
	pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
	return;
	}

	// Acknowledge or nack node's join request
	crm_debug("%sing join-%d request from %s",
	integrated? "Acknowledg" : "Nack", current_join_id, join_to);
	acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
	pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);

	if (integrated) {
	// No change needed for a nacked node
	crm_update_peer_join(__func__, join_node, crm_join_finalized);
	pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);

	/* Iterate through the remote peer cache and add information on which
	* node hosts each to the ACK message. This keeps new controllers in
	* sync with what has already happened.
	*/
	if (pcmk__cluster_num_remote_nodes() > 0) {
	GHashTableIter iter;
	crm_node_t *node = NULL;
	xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);

	g_hash_table_iter_init(&iter, crm_remote_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	xmlNode *remote = NULL;

	if (!node->conn_host) {
	continue;
	}

	remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
	pcmk__xe_set_props(remote,
	PCMK_XA_ID, node->uname,
	PCMK__XA_NODE_STATE, node->state,
	PCMK__XA_CONNECTION_HOST, node->conn_host,
	NULL);
	}
	}
	}
	pcmk__cluster_send_message(join_node, crm_msg_crmd, acknak);
	free_xml(acknak);
	return;
	}

	gboolean
	check_join_state(enum crmd_fsa_state cur_state, const char *source)
	{
	static unsigned long long highest_seq = 0;

	if (controld_globals.membership_id != crm_peer_seq) {
	crm_debug("join-%d: Membership changed from %llu to %llu "
	CRM_XS " highest=%llu state=%s for=%s",
	current_join_id, controld_globals.membership_id, crm_peer_seq,
	highest_seq, fsa_state2string(cur_state), source);
	if(highest_seq < crm_peer_seq) {
	/* Don't spam the FSA with duplicates */
	highest_seq = crm_peer_seq;
	register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
	}

	} else if (cur_state == S_INTEGRATION) {
	if (crmd_join_phase_count(crm_join_welcomed) == 0) {
	int count = crmd_join_phase_count(crm_join_integrated);

	crm_debug("join-%d: Integration of %d peer%s complete "
	CRM_XS " state=%s for=%s",
	current_join_id, count, pcmk__plural_s(count),
	fsa_state2string(cur_state), source);
	register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
	return TRUE;
	}

	} else if (cur_state == S_FINALIZE_JOIN) {
	if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
	crm_debug("join-%d: Delaying finalization until we have CIB "
	CRM_XS " state=%s for=%s",
	current_join_id, fsa_state2string(cur_state), source);
	return TRUE;

	} else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
	int count = crmd_join_phase_count(crm_join_welcomed);

	crm_debug("join-%d: Still waiting on %d welcomed node%s "
	CRM_XS " state=%s for=%s",
	current_join_id, count, pcmk__plural_s(count),
	fsa_state2string(cur_state), source);
	crmd_join_phase_log(LOG_DEBUG);

	} else if (crmd_join_phase_count(crm_join_integrated) != 0) {
	int count = crmd_join_phase_count(crm_join_integrated);

	crm_debug("join-%d: Still waiting on %d integrated node%s "
	CRM_XS " state=%s for=%s",
	current_join_id, count, pcmk__plural_s(count),
	fsa_state2string(cur_state), source);
	crmd_join_phase_log(LOG_DEBUG);

	} else if (crmd_join_phase_count(crm_join_finalized) != 0) {
	int count = crmd_join_phase_count(crm_join_finalized);

	crm_debug("join-%d: Still waiting on %d finalized node%s "
	CRM_XS " state=%s for=%s",
	current_join_id, count, pcmk__plural_s(count),
	fsa_state2string(cur_state), source);
	crmd_join_phase_log(LOG_DEBUG);

	} else {
	crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
	current_join_id, fsa_state2string(cur_state), source);
	register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
	return TRUE;
	}
	}

	return FALSE;
	}

	void
	do_dc_join_final(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
	crm_update_quorum(crm_have_quorum, TRUE);
	}

	int crmd_join_phase_count(enum crm_join_phase phase)
	{
	int count = 0;
	crm_node_t *peer;
	GHashTableIter iter;

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
	if(peer->join == phase) {
	count++;
	}
	}
	return count;
	}

	void crmd_join_phase_log(int level)
	{
	crm_node_t *peer;
	GHashTableIter iter;

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
	do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
	crm_join_phase_str(peer->join));
	}
	}
	diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c
	index 0d182d6d08..e04d049c2f 100644
	--- a/daemons/controld/controld_membership.c
	+++ b/daemons/controld/controld_membership.c
	@@ -1,466 +1,466 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	/* put these first so that uuid_t is defined without conflicts */
	#include <crm_internal.h>

	#include <string.h>

	#include <crm/crm.h>
	#include <crm/common/xml.h>
	#include <crm/common/xml_internal.h>
	#include <crm/cluster/internal.h>

	#include <pacemaker-controld.h>

	void post_cache_update(int instance);

	extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);

	static void
	reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
	{
	crm_node_t *node = value;

	if (!pcmk__cluster_is_node_active(node)) {
	crm_update_peer_join(__func__, node, crm_join_none);

	if(node && node->uname) {
	if (pcmk__str_eq(controld_globals.our_nodename, node->uname,
	pcmk__str_casei)) {
	crm_err("We're not part of the cluster anymore");
	register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);

	} else if (!AM_I_DC
	&& pcmk__str_eq(node->uname, controld_globals.dc_name,
	pcmk__str_casei)) {
	crm_warn("Our DC node (%s) left the cluster", node->uname);
	register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
	}
	}

	if ((controld_globals.fsa_state == S_INTEGRATION)
	\|\| (controld_globals.fsa_state == S_FINALIZE_JOIN)) {
	check_join_state(controld_globals.fsa_state, __func__);
	}
	if ((node != NULL) && (node->uuid != NULL)) {
	fail_incompletable_actions(controld_globals.transition_graph,
	node->uuid);
	}
	}
	}

	void
	post_cache_update(int instance)
	{
	xmlNode *no_op = NULL;

	crm_peer_seq = instance;
	crm_debug("Updated cache after membership event %d.", instance);

	g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL);
	controld_set_fsa_input_flags(R_MEMBERSHIP);

	if (AM_I_DC) {
	populate_cib_nodes(node_update_quick \| node_update_cluster \| node_update_peer \|
	node_update_expected, __func__);
	}

	/*
	* If we lost nodes, we should re-check the election status
	* Safe to call outside of an election
	*/
	controld_set_fsa_action_flags(A_ELECTION_CHECK);
	controld_trigger_fsa();

	/* Membership changed, remind everyone we're here.
	* This will aid detection of duplicate DCs
	*/
	no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD,
	AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL);
	pcmk__cluster_send_message(NULL, crm_msg_crmd, no_op);
	free_xml(no_op);
	}

	static void
	crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	fsa_data_t *msg_data = NULL;

	if (rc == pcmk_ok) {
	crm_trace("Node update %d complete", call_id);

	} else if(call_id < pcmk_ok) {
	crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
	crm_log_xml_debug(msg, "failed");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);

	} else {
	crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
	crm_log_xml_debug(msg, "failed");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	}

	/*!
	* \internal
	* \brief Create an XML node state tag with updates
	*
	* \param[in,out] node Node whose state will be used for update
	* \param[in] flags Bitmask of node_update_flags indicating what to update
	* \param[in,out] parent XML node to contain update (or NULL)
	* \param[in] source Who requested the update (only used for logging)
	*
	* \return Pointer to created node state tag
	*/
	xmlNode *
	create_node_state_update(crm_node_t node, int flags, xmlNode parent,
	const char *source)
	{
	const char *value = NULL;
	xmlNode *node_state;

	if (!node->state) {
	crm_info("Node update for %s cancelled: no state, not seen yet", node->uname);
	return NULL;
	}

	node_state = pcmk__xe_create(parent, PCMK__XE_NODE_STATE);

	if (pcmk_is_set(node->flags, crm_remote_node)) {
	pcmk__xe_set_bool_attr(node_state, PCMK_XA_REMOTE_NODE, true);
	}

	if (crm_xml_add(node_state, PCMK_XA_ID,
	- pcmk__cluster_node_uuid(node)) == NULL) {
	+ pcmk__cluster_get_xml_id(node)) == NULL) {
	crm_info("Node update for %s cancelled: no ID", node->uname);
	free_xml(node_state);
	return NULL;
	}

	crm_xml_add(node_state, PCMK_XA_UNAME, node->uname);

	if ((flags & node_update_cluster) && node->state) {
	if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) {
	// A value 0 means the node is not a cluster member.
	crm_xml_add_ll(node_state, PCMK__XA_IN_CCM, node->when_member);

	} else {
	pcmk__xe_set_bool_attr(node_state, PCMK__XA_IN_CCM,
	pcmk__str_eq(node->state, CRM_NODE_MEMBER,
	pcmk__str_casei));
	}
	}

	if (!pcmk_is_set(node->flags, crm_remote_node)) {
	if (flags & node_update_peer) {
	if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) {
	// A value 0 means the peer is offline in CPG.
	crm_xml_add_ll(node_state, PCMK_XA_CRMD, node->when_online);

	} else {
	// @COMPAT DCs < 2.1.7 use online/offline rather than timestamp
	value = PCMK_VALUE_OFFLINE;
	if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
	value = PCMK_VALUE_ONLINE;
	}
	crm_xml_add(node_state, PCMK_XA_CRMD, value);
	}
	}

	if (flags & node_update_join) {
	if (node->join <= crm_join_none) {
	value = CRMD_JOINSTATE_DOWN;
	} else {
	value = CRMD_JOINSTATE_MEMBER;
	}
	crm_xml_add(node_state, PCMK__XA_JOIN, value);
	}

	if (flags & node_update_expected) {
	crm_xml_add(node_state, PCMK_XA_EXPECTED, node->expected);
	}
	}

	crm_xml_add(node_state, PCMK_XA_CRM_DEBUG_ORIGIN, source);

	return node_state;
	}

	static void
	remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
	xmlNode * output, void *user_data)
	{
	char *node_uuid = user_data;

	do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
	"Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
	node_uuid, pcmk_strerror(rc), rc);
	}

	static void
	search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
	xmlNode * output, void *user_data)
	{
	char *new_node_uuid = user_data;
	xmlNode *node_xml = NULL;

	if (rc != pcmk_ok) {
	if (rc != -ENXIO) {
	crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
	new_node_uuid, pcmk_strerror(rc), rc);
	}
	return;

	} else if (output == NULL) {
	return;
	}

	if (pcmk__xe_is(output, PCMK_XE_NODE)) {
	node_xml = output;

	} else {
	node_xml = pcmk__xe_first_child(output, PCMK_XE_NODE, NULL, NULL);
	}

	for (; node_xml != NULL; node_xml = pcmk__xe_next_same(node_xml)) {
	const char *node_uuid = NULL;
	const char *node_uname = NULL;
	GHashTableIter iter;
	crm_node_t *node = NULL;
	gboolean known = FALSE;

	node_uuid = crm_element_value(node_xml, PCMK_XA_ID);
	node_uname = crm_element_value(node_xml, PCMK_XA_UNAME);

	if (node_uuid == NULL \|\| node_uname == NULL) {
	continue;
	}

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if (node->uuid
	&& pcmk__str_eq(node->uuid, node_uuid, pcmk__str_casei)
	&& node->uname
	&& pcmk__str_eq(node->uname, node_uname, pcmk__str_casei)) {

	known = TRUE;
	break;
	}
	}

	if (known == FALSE) {
	cib_t *cib_conn = controld_globals.cib_conn;
	int delete_call_id = 0;
	xmlNode *node_state_xml = NULL;

	crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
	node_uuid, node_uname, new_node_uuid);

	delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_NODES,
	node_xml, cib_scope_local);
	fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid),
	remove_conflicting_node_callback);

	node_state_xml = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE);
	crm_xml_add(node_state_xml, PCMK_XA_ID, node_uuid);
	crm_xml_add(node_state_xml, PCMK_XA_UNAME, node_uname);

	delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_STATUS,
	node_state_xml,
	cib_scope_local);
	fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid),
	remove_conflicting_node_callback);
	free_xml(node_state_xml);
	}
	}
	}

	static void
	node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	fsa_data_t *msg_data = NULL;

	if(call_id < pcmk_ok) {
	crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
	crm_log_xml_debug(msg, "update:failed");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);

	} else if(rc < pcmk_ok) {
	crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
	crm_log_xml_debug(msg, "update:failed");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	}

	void
	populate_cib_nodes(enum node_update_flags flags, const char *source)
	{
	cib_t *cib_conn = controld_globals.cib_conn;

	int call_id = 0;
	gboolean from_hashtable = TRUE;
	xmlNode *node_list = pcmk__xe_create(NULL, PCMK_XE_NODES);

	#if SUPPORT_COROSYNC
	if (!pcmk_is_set(flags, node_update_quick)
	&& (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)) {

	from_hashtable = pcmk__corosync_add_nodes(node_list);
	}
	#endif

	if (from_hashtable) {
	GHashTableIter iter;
	crm_node_t *node = NULL;
	GString *xpath = NULL;

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	xmlNode *new_node = NULL;

	if ((node->uuid != NULL) && (node->uname != NULL)) {
	crm_trace("Creating node entry for %s/%s", node->uname, node->uuid);
	if (xpath == NULL) {
	xpath = g_string_sized_new(512);
	} else {
	g_string_truncate(xpath, 0);
	}

	/* We need both to be valid */
	new_node = pcmk__xe_create(node_list, PCMK_XE_NODE);
	crm_xml_add(new_node, PCMK_XA_ID, node->uuid);
	crm_xml_add(new_node, PCMK_XA_UNAME, node->uname);

	/* Search and remove unknown nodes with the conflicting uname from CIB */
	pcmk__g_strcat(xpath,
	"/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION
	"/" PCMK_XE_NODES "/" PCMK_XE_NODE
	"[@" PCMK_XA_UNAME "='", node->uname, "']"
	"[@" PCMK_XA_ID "!='", node->uuid, "']", NULL);

	call_id = cib_conn->cmds->query(cib_conn,
	(const char *) xpath->str,
	NULL,
	cib_scope_local\|cib_xpath);
	fsa_register_cib_callback(call_id, pcmk__str_copy(node->uuid),
	search_conflicting_node_callback);
	}
	}

	if (xpath != NULL) {
	g_string_free(xpath, TRUE);
	}
	}

	crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster");

	if ((controld_update_cib(PCMK_XE_NODES, node_list, cib_scope_local,
	node_list_update_callback) == pcmk_rc_ok)
	&& (crm_peer_cache != NULL) && AM_I_DC) {
	/*
	* There is no need to update the local CIB with our values if
	* we've not seen valid membership data
	*/
	GHashTableIter iter;
	crm_node_t *node = NULL;

	free_xml(node_list);
	node_list = pcmk__xe_create(NULL, PCMK_XE_STATUS);

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	create_node_state_update(node, flags, node_list, source);
	}

	if (crm_remote_peer_cache) {
	g_hash_table_iter_init(&iter, crm_remote_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	create_node_state_update(node, flags, node_list, source);
	}
	}

	controld_update_cib(PCMK_XE_STATUS, node_list, cib_scope_local,
	crmd_node_update_complete);
	}
	free_xml(node_list);
	}

	static void
	cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	fsa_data_t *msg_data = NULL;

	if (rc == pcmk_ok) {
	crm_trace("Quorum update %d complete", call_id);

	} else {
	crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
	crm_log_xml_debug(msg, "failed");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	}

	void
	crm_update_quorum(gboolean quorum, gboolean force_update)
	{
	bool has_quorum = pcmk_is_set(controld_globals.flags, controld_has_quorum);

	if (quorum) {
	controld_set_global_flags(controld_ever_had_quorum);

	} else if (pcmk_all_flags_set(controld_globals.flags,
	controld_ever_had_quorum
	\|controld_no_quorum_panic)) {
	pcmk__panic(__func__);
	}

	if (AM_I_DC
	&& ((has_quorum && !quorum) \|\| (!has_quorum && quorum)
	\|\| force_update)) {
	xmlNode *update = NULL;

	update = pcmk__xe_create(NULL, PCMK_XE_CIB);
	crm_xml_add_int(update, PCMK_XA_HAVE_QUORUM, quorum);
	crm_xml_add(update, PCMK_XA_DC_UUID, controld_globals.our_uuid);

	crm_debug("Updating quorum status to %s", pcmk__btoa(quorum));
	controld_update_cib(PCMK_XE_CIB, update, cib_scope_local,
	cib_quorum_update_complete);
	free_xml(update);

	/* Quorum changes usually cause a new transition via other activity:
	* quorum gained via a node joining will abort via the node join,
	* and quorum lost via a node leaving will usually abort via resource
	* activity and/or fencing.
	*
	* However, it is possible that nothing else causes a transition (e.g.
	* someone forces quorum via corosync-cmaptcl, or quorum is lost due to
	* a node in standby shutting down cleanly), so here ensure a new
	* transition is triggered.
	*/
	if (quorum) {
	/* If quorum was gained, abort after a short delay, in case multiple
	* nodes are joining around the same time, so the one that brings us
	* to quorum doesn't cause all the remaining ones to be fenced.
	*/
	abort_after_delay(PCMK_SCORE_INFINITY, pcmk__graph_restart,
	"Quorum gained", 5000);
	} else {
	abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
	"Quorum lost", NULL);
	}
	}

	if (quorum) {
	controld_set_global_flags(controld_has_quorum);
	} else {
	controld_clear_global_flags(controld_has_quorum);
	}
	}
	diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
	index 452aa16b29..88c032fe6a 100644
	--- a/daemons/controld/controld_messages.c
	+++ b/daemons/controld/controld_messages.c
	@@ -1,1381 +1,1383 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <string.h>
	#include <time.h>

	#include <crm/crm.h>
	#include <crm/common/xml.h>
	#include <crm/cluster/internal.h>
	#include <crm/cib.h>
	#include <crm/common/ipc_internal.h>

	#include <pacemaker-controld.h>

	static enum crmd_fsa_input handle_message(xmlNode *msg,
	enum crmd_fsa_cause cause);
	static xmlNode* create_ping_reply(const xmlNode *msg);
	static void handle_response(xmlNode *stored_msg);
	static enum crmd_fsa_input handle_request(xmlNode *stored_msg,
	enum crmd_fsa_cause cause);
	static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg);
	static void send_msg_via_ipc(xmlNode * msg, const char sys, const char src);

	/* debug only, can wrap all it likes */
	static int last_data_id = 0;

	void
	register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
	fsa_data_t * cur_data, void new_data, const char raised_from)
	{
	/* save the current actions if any */
	if (controld_globals.fsa_actions != A_NOTHING) {
	register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
	I_NULL, cur_data ? cur_data->data : NULL,
	controld_globals.fsa_actions, TRUE, __func__);
	}

	/* reset the action list */
	crm_info("Resetting the current action list");
	fsa_dump_actions(controld_globals.fsa_actions, "Drop");
	controld_globals.fsa_actions = A_NOTHING;

	/* register the error */
	register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
	}

	void
	register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
	void *data, uint64_t with_actions,
	gboolean prepend, const char *raised_from)
	{
	unsigned old_len = g_list_length(controld_globals.fsa_message_queue);
	fsa_data_t *fsa_data = NULL;

	if (raised_from == NULL) {
	raised_from = "<unknown>";
	}

	if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) {
	/* no point doing anything */
	crm_err("Cannot add entry to queue: no input and no action");
	return;
	}

	if (input == I_WAIT_FOR_EVENT) {
	controld_set_global_flags(controld_fsa_is_stalled);
	crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d",
	raised_from, fsa_cause2string(cause), data, old_len);

	if (old_len > 0) {
	fsa_dump_queue(LOG_TRACE);
	prepend = FALSE;
	}

	if (data == NULL) {
	controld_set_fsa_action_flags(with_actions);
	fsa_dump_actions(with_actions, "Restored");
	return;
	}

	/* Store everything in the new event and reset
	* controld_globals.fsa_actions
	*/
	with_actions \|= controld_globals.fsa_actions;
	controld_globals.fsa_actions = A_NOTHING;
	}

	last_data_id++;
	crm_trace("%s %s FSA input %d (%s) due to %s, %s data",
	raised_from, (prepend? "prepended" : "appended"), last_data_id,
	fsa_input2string(input), fsa_cause2string(cause),
	(data? "with" : "without"));

	fsa_data = pcmk__assert_alloc(1, sizeof(fsa_data_t));
	fsa_data->id = last_data_id;
	fsa_data->fsa_input = input;
	fsa_data->fsa_cause = cause;
	fsa_data->origin = raised_from;
	fsa_data->data = NULL;
	fsa_data->data_type = fsa_dt_none;
	fsa_data->actions = with_actions;

	if (with_actions != A_NOTHING) {
	crm_trace("Adding actions %.16llx to input",
	(unsigned long long) with_actions);
	}

	if (data != NULL) {
	switch (cause) {
	case C_FSA_INTERNAL:
	case C_CRMD_STATUS_CALLBACK:
	case C_IPC_MESSAGE:
	case C_HA_MESSAGE:
	CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL,
	crm_err("Bogus data from %s", raised_from));
	crm_trace("Copying %s data from %s as cluster message data",
	fsa_cause2string(cause), raised_from);
	fsa_data->data = copy_ha_msg_input(data);
	fsa_data->data_type = fsa_dt_ha_msg;
	break;

	case C_LRM_OP_CALLBACK:
	crm_trace("Copying %s data from %s as lrmd_event_data_t",
	fsa_cause2string(cause), raised_from);
	fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data);
	fsa_data->data_type = fsa_dt_lrm;
	break;

	case C_TIMER_POPPED:
	case C_SHUTDOWN:
	case C_UNKNOWN:
	case C_STARTUP:
	crm_crit("Copying %s data (from %s) is not yet implemented",
	fsa_cause2string(cause), raised_from);
	crmd_exit(CRM_EX_SOFTWARE);
	break;
	}
	}

	/* make sure to free it properly later */
	if (prepend) {
	controld_globals.fsa_message_queue
	= g_list_prepend(controld_globals.fsa_message_queue, fsa_data);
	} else {
	controld_globals.fsa_message_queue
	= g_list_append(controld_globals.fsa_message_queue, fsa_data);
	}

	crm_trace("FSA message queue length is %d",
	g_list_length(controld_globals.fsa_message_queue));

	/* fsa_dump_queue(LOG_TRACE); */

	if (old_len == g_list_length(controld_globals.fsa_message_queue)) {
	crm_err("Couldn't add message to the queue");
	}

	if (input != I_WAIT_FOR_EVENT) {
	controld_trigger_fsa();
	}
	}

	void
	fsa_dump_queue(int log_level)
	{
	int offset = 0;

	for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
	iter = iter->next) {
	fsa_data_t data = (fsa_data_t ) iter->data;

	do_crm_log_unlikely(log_level,
	"queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)",
	offset++, data->id, fsa_input2string(data->fsa_input),
	data->origin, data->data, data->data_type,
	fsa_cause2string(data->fsa_cause));
	}
	}

	ha_msg_input_t *
	copy_ha_msg_input(ha_msg_input_t * orig)
	{
	xmlNode *wrapper = NULL;

	ha_msg_input_t *copy = pcmk__assert_alloc(1, sizeof(ha_msg_input_t));

	copy->msg = (orig != NULL)? pcmk__xml_copy(NULL, orig->msg) : NULL;

	wrapper = pcmk__xe_first_child(copy->msg, PCMK__XE_CRM_XML, NULL, NULL);
	copy->xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
	return copy;
	}

	void
	delete_fsa_input(fsa_data_t * fsa_data)
	{
	lrmd_event_data_t *op = NULL;
	xmlNode *foo = NULL;

	if (fsa_data == NULL) {
	return;
	}
	crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause));

	if (fsa_data->data != NULL) {
	switch (fsa_data->data_type) {
	case fsa_dt_ha_msg:
	delete_ha_msg_input(fsa_data->data);
	break;

	case fsa_dt_xml:
	foo = fsa_data->data;
	free_xml(foo);
	break;

	case fsa_dt_lrm:
	op = (lrmd_event_data_t *) fsa_data->data;
	lrmd_free_event(op);
	break;

	case fsa_dt_none:
	if (fsa_data->data != NULL) {
	crm_err("Don't know how to free %s data from %s",
	fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
	crmd_exit(CRM_EX_SOFTWARE);
	}
	break;
	}
	crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause));
	}

	free(fsa_data);
	}

	/* returns the next message */
	fsa_data_t *
	get_message(void)
	{
	fsa_data_t *message
	= (fsa_data_t *) controld_globals.fsa_message_queue->data;

	controld_globals.fsa_message_queue
	= g_list_remove(controld_globals.fsa_message_queue, message);
	crm_trace("Processing input %d", message->id);
	return message;
	}

	void *
	fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
	{
	void *ret_val = NULL;

	if (fsa_data == NULL) {
	crm_err("%s: No FSA data available", caller);

	} else if (fsa_data->data == NULL) {
	crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);

	} else if (fsa_data->data_type != a_type) {
	crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s",
	caller, fsa_data->data_type, a_type, fsa_data->origin);
	pcmk__assert(fsa_data->data_type == a_type);
	} else {
	ret_val = fsa_data->data;
	}

	return ret_val;
	}

	/* A_MSG_ROUTE */
	void
	do_msg_route(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);

	route_message(msg_data->fsa_cause, input->msg);
	}

	void
	route_message(enum crmd_fsa_cause cause, xmlNode * input)
	{
	ha_msg_input_t fsa_input;
	enum crmd_fsa_input result = I_NULL;

	fsa_input.msg = input;
	CRM_CHECK(cause == C_IPC_MESSAGE \|\| cause == C_HA_MESSAGE, return);

	/* try passing the buck first */
	if (relay_message(input, cause == C_IPC_MESSAGE)) {
	return;
	}

	/* handle locally */
	result = handle_message(input, cause);

	/* done or process later? */
	switch (result) {
	case I_NULL:
	case I_CIB_OP:
	case I_ROUTER:
	case I_NODE_JOIN:
	case I_JOIN_REQUEST:
	case I_JOIN_RESULT:
	break;
	default:
	/* Defering local processing of message */
	register_fsa_input_later(cause, result, &fsa_input);
	return;
	}

	if (result != I_NULL) {
	/* add to the front of the queue */
	register_fsa_input(cause, result, &fsa_input);
	}
	}

	gboolean
	relay_message(xmlNode * msg, gboolean originated_locally)
	{
	enum crm_ais_msg_types dest = crm_msg_none;
	bool is_for_dc = false;
	bool is_for_dcib = false;
	bool is_for_te = false;
	bool is_for_crm = false;
	bool is_for_cib = false;
	bool is_local = false;
	bool broadcast = false;
	const char *host_to = NULL;
	const char *sys_to = NULL;
	const char *sys_from = NULL;
	const char *type = NULL;
	const char *task = NULL;
	const char *ref = NULL;
	crm_node_t *node_to = NULL;

	CRM_CHECK(msg != NULL, return TRUE);

	host_to = crm_element_value(msg, PCMK__XA_CRM_HOST_TO);
	sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
	sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM);
	type = crm_element_value(msg, PCMK__XA_T);
	task = crm_element_value(msg, PCMK__XA_CRM_TASK);
	ref = crm_element_value(msg, PCMK_XA_REFERENCE);

	broadcast = pcmk__str_empty(host_to);

	if (ref == NULL) {
	ref = "without reference ID";
	}

	if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) {
	crm_trace("Received hello %s from %s (no processing needed)",
	ref, pcmk__s(sys_from, "unidentified source"));
	crm_log_xml_trace(msg, "hello");
	return TRUE;
	}

	// Require message type (set by create_request())
	if (!pcmk__str_eq(type, PCMK__VALUE_CRMD, pcmk__str_none)) {
	crm_warn("Ignoring invalid message %s with type '%s' "
	"(not '" PCMK__VALUE_CRMD "')",
	ref, pcmk__s(type, ""));
	crm_log_xml_trace(msg, "ignored");
	return TRUE;
	}

	// Require a destination subsystem (also set by create_request())
	if (sys_to == NULL) {
	crm_warn("Ignoring invalid message %s with no " PCMK__XA_CRM_SYS_TO,
	ref);
	crm_log_xml_trace(msg, "ignored");
	return TRUE;
	}

	// Get the message type appropriate to the destination subsystem
	if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
	dest = pcmk__cluster_parse_msg_type(sys_to);
	if (dest == crm_msg_none) {
	/* Unrecognized value, use a sane default
	*
	* @TODO Maybe we should bail instead
	*/
	dest = crm_msg_crmd;
	}
	}

	is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
	is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
	is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
	is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
	is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);

	// Check whether message should be processed locally
	is_local = false;
	if (broadcast) {
	if (is_for_dc \|\| is_for_te) {
	is_local = false;

	} else if (is_for_crm) {
	if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO,
	PCMK__CONTROLD_CMD_NODES, NULL)) {
	/* Node info requests do not specify a host, which is normally
	* treated as "all hosts", because the whole point is that the
	* client may not know the local node name. Always handle these
	* requests locally.
	*/
	is_local = true;
	} else {
	is_local = !originated_locally;
	}

	} else {
	is_local = true;
	}

	} else if (pcmk__str_eq(controld_globals.our_nodename, host_to,
	pcmk__str_casei)) {
	is_local = true;

	} else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
	xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL,
	NULL);
	xmlNode *msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
	const char *mode = crm_element_value(msg_data, PCMK__XA_MODE);

	if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
	// Local delete of an offline node's resource history
	is_local = true;
	}
	}

	// If is for DC and DC is not yet selected
	if (is_for_dc && pcmk__str_eq(task, CRM_OP_PING, pcmk__str_casei)
	&& (controld_globals.dc_name == NULL)) {

	xmlNode *reply = create_ping_reply(msg);
	sys_to = crm_element_value(reply, PCMK__XA_CRM_SYS_TO);
	// Explicitly leave src empty. It indicates that dc is "not yet selected"
	send_msg_via_ipc(reply, sys_to, NULL);
	free_xml(reply);
	return TRUE;
	}

	// Check whether message should be relayed

	if (is_for_dc \|\| is_for_dcib \|\| is_for_te) {
	if (AM_I_DC) {
	if (is_for_te) {
	crm_trace("Route message %s locally as transition request",
	ref);
	crm_log_xml_trace(msg, sys_to);
	send_msg_via_ipc(msg, sys_to, controld_globals.our_nodename);
	return TRUE; // No further processing of message is needed
	}
	crm_trace("Route message %s locally as DC request", ref);
	return FALSE; // More to be done by caller
	}

	if (originated_locally
	&& !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE,
	CRM_SYSTEM_TENGINE, NULL)) {
	crm_trace("Relay message %s to DC (via %s)",
	ref, pcmk__s(host_to, "broadcast"));
	crm_log_xml_trace(msg, "relayed");
	if (!broadcast) {
	node_to = pcmk__get_node(0, host_to, NULL,
	pcmk__node_search_cluster_member);
	}
	pcmk__cluster_send_message(node_to, dest, msg);
	return TRUE;
	}

	/* Transition engine and scheduler messages are sent only to the DC on
	* the same node. If we are no longer the DC, discard this message.
	*/
	crm_trace("Ignoring message %s because we are no longer DC", ref);
	crm_log_xml_trace(msg, "ignored");
	return TRUE; // No further processing of message is needed
	}

	if (is_local) {
	if (is_for_crm \|\| is_for_cib) {
	crm_trace("Route message %s locally as controller request", ref);
	return FALSE; // More to be done by caller
	}
	crm_trace("Relay message %s locally to %s", ref, sys_to);
	crm_log_xml_trace(msg, "IPC-relay");
	send_msg_via_ipc(msg, sys_to, controld_globals.our_nodename);
	return TRUE;
	}

	if (!broadcast) {
	- node_to = pcmk__search_node_caches(0, host_to,
	+ node_to = pcmk__search_node_caches(0, host_to, NULL,
	pcmk__node_search_cluster_member);
	if (node_to == NULL) {
	crm_warn("Ignoring message %s because node %s is unknown",
	ref, host_to);
	crm_log_xml_trace(msg, "ignored");
	return TRUE;
	}
	}

	crm_trace("Relay message %s to %s",
	ref, pcmk__s(host_to, "all peers"));
	crm_log_xml_trace(msg, "relayed");
	pcmk__cluster_send_message(node_to, dest, msg);
	return TRUE;
	}

	// Return true if field contains a positive integer
	static bool
	authorize_version(xmlNode message_data, const char field,
	const char client_name, const char ref, const char *uuid)
	{
	const char *version = crm_element_value(message_data, field);
	long long version_num;

	if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok)
	\|\| (version_num < 0LL)) {

	crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s "
	CRM_XS " ref=%s uuid=%s",
	client_name, ((version == NULL)? "" : version),
	field, (ref? ref : "none"), uuid);
	return false;
	}
	return true;
	}

	/*!
	* \internal
	* \brief Check whether a client IPC message is acceptable
	*
	* If a given client IPC message is a hello, "authorize" it by ensuring it has
	* valid information such as a protocol version, and return false indicating
	* that nothing further needs to be done with the message. If the message is not
	* a hello, just return true to indicate it needs further processing.
	*
	* \param[in] client_msg XML of IPC message
	* \param[in,out] curr_client If IPC is not proxied, client that sent message
	* \param[in] proxy_session If IPC is proxied, the session ID
	*
	* \return true if message needs further processing, false if it doesn't
	*/
	bool
	controld_authorize_ipc_message(const xmlNode client_msg, pcmk__client_t curr_client,
	const char *proxy_session)
	{
	xmlNode *wrapper = NULL;
	xmlNode *message_data = NULL;
	const char *client_name = NULL;
	const char *op = crm_element_value(client_msg, PCMK__XA_CRM_TASK);
	const char *ref = crm_element_value(client_msg, PCMK_XA_REFERENCE);
	const char *uuid = (curr_client? curr_client->id : proxy_session);

	if (uuid == NULL) {
	crm_warn("IPC message from client rejected: No client identifier "
	CRM_XS " ref=%s", (ref? ref : "none"));
	goto rejected;
	}

	if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) {
	// Only hello messages need to be authorized
	return true;
	}

	wrapper = pcmk__xe_first_child(client_msg, PCMK__XE_CRM_XML, NULL, NULL);
	message_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);

	client_name = crm_element_value(message_data, PCMK__XA_CLIENT_NAME);
	if (pcmk__str_empty(client_name)) {
	crm_warn("IPC hello from client rejected: No client name",
	CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid);
	goto rejected;
	}
	if (!authorize_version(message_data, PCMK__XA_MAJOR_VERSION, client_name,
	ref, uuid)) {
	goto rejected;
	}
	if (!authorize_version(message_data, PCMK__XA_MINOR_VERSION, client_name,
	ref, uuid)) {
	goto rejected;
	}

	crm_trace("Validated IPC hello from client %s", client_name);
	crm_log_xml_trace(client_msg, "hello");
	if (curr_client) {
	curr_client->userdata = pcmk__str_copy(client_name);
	}
	controld_trigger_fsa();
	return false;

	rejected:
	crm_log_xml_trace(client_msg, "rejected");
	if (curr_client) {
	qb_ipcs_disconnect(curr_client->ipcs);
	}
	return false;
	}

	static enum crmd_fsa_input
	handle_message(xmlNode *msg, enum crmd_fsa_cause cause)
	{
	const char *type = NULL;

	CRM_CHECK(msg != NULL, return I_NULL);

	type = crm_element_value(msg, PCMK__XA_SUBT);
	if (pcmk__str_eq(type, PCMK__VALUE_REQUEST, pcmk__str_none)) {
	return handle_request(msg, cause);
	}

	if (pcmk__str_eq(type, PCMK__VALUE_RESPONSE, pcmk__str_none)) {
	handle_response(msg);
	return I_NULL;
	}

	crm_warn("Ignoring message with unknown " PCMK__XA_SUBT" '%s'",
	pcmk__s(type, ""));
	crm_log_xml_trace(msg, "bad");
	return I_NULL;
	}

	static enum crmd_fsa_input
	handle_failcount_op(xmlNode * stored_msg)
	{
	const char *rsc = NULL;
	const char *uname = NULL;
	const char *op = NULL;
	char *interval_spec = NULL;
	guint interval_ms = 0;
	gboolean is_remote_node = FALSE;

	xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL,
	NULL);
	xmlNode *xml_op = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);

	if (xml_op) {
	xmlNode *xml_rsc = pcmk__xe_first_child(xml_op, PCMK_XE_PRIMITIVE, NULL,
	NULL);
	xmlNode *xml_attrs = pcmk__xe_first_child(xml_op, PCMK__XE_ATTRIBUTES,
	NULL, NULL);

	if (xml_rsc) {
	rsc = pcmk__xe_id(xml_rsc);
	}
	if (xml_attrs) {
	op = crm_element_value(xml_attrs,
	CRM_META "_" PCMK__META_CLEAR_FAILURE_OP);
	crm_element_value_ms(xml_attrs,
	CRM_META "_" PCMK__META_CLEAR_FAILURE_INTERVAL,
	&interval_ms);
	}
	}
	uname = crm_element_value(xml_op, PCMK__META_ON_NODE);

	if ((rsc == NULL) \|\| (uname == NULL)) {
	crm_log_xml_warn(stored_msg, "invalid failcount op");
	return I_NULL;
	}

	if (crm_element_value(xml_op, PCMK__XA_ROUTER_NODE)) {
	is_remote_node = TRUE;
	}

	crm_debug("Clearing failures for %s-interval %s on %s "
	"from attribute manager, CIB, and executor state",
	pcmk__readable_interval(interval_ms), rsc, uname);

	if (interval_ms) {
	interval_spec = crm_strdup_printf("%ums", interval_ms);
	}
	update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node);
	free(interval_spec);

	controld_cib_delete_last_failure(rsc, uname, op, interval_ms);

	lrm_clear_last_failure(rsc, uname, op, interval_ms);

	return I_NULL;
	}

	static enum crmd_fsa_input
	handle_lrm_delete(xmlNode *stored_msg)
	{
	const char *mode = NULL;
	xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL,
	NULL);
	xmlNode *msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);

	CRM_CHECK(msg_data != NULL, return I_NULL);

	/* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to
	* relay the operation to the affected node, which will unregister the
	* resource from the local executor, clear the resource's history from the
	* CIB, and do some bookkeeping in the controller.
	*
	* However, if the affected node is offline, the client will specify
	* mode=PCMK__VALUE_CIB which means the controller receiving the operation
	* should clear the resource's history from the CIB and nothing else. This
	* is used to clear shutdown locks.
	*/
	mode = crm_element_value(msg_data, PCMK__XA_MODE);
	if (!pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
	// Relay to affected node
	crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD);
	return I_ROUTER;

	} else {
	// Delete CIB history locally (compare with do_lrm_delete())
	const char *from_sys = NULL;
	const char *user_name = NULL;
	const char *rsc_id = NULL;
	const char *node = NULL;
	xmlNode *rsc_xml = NULL;
	int rc = pcmk_rc_ok;

	rsc_xml = pcmk__xe_first_child(msg_data, PCMK_XE_PRIMITIVE, NULL, NULL);
	CRM_CHECK(rsc_xml != NULL, return I_NULL);

	rsc_id = pcmk__xe_id(rsc_xml);
	from_sys = crm_element_value(stored_msg, PCMK__XA_CRM_SYS_FROM);
	node = crm_element_value(msg_data, PCMK__META_ON_NODE);
	user_name = pcmk__update_acl_user(stored_msg, PCMK__XA_CRM_USER, NULL);
	crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s "
	"(clearing CIB resource history only)", rsc_id, node,
	(user_name? " for user " : ""), (user_name? user_name : ""));
	rc = controld_delete_resource_history(rsc_id, node, user_name,
	cib_dryrun\|cib_sync_call);
	if (rc == pcmk_rc_ok) {
	rc = controld_delete_resource_history(rsc_id, node, user_name,
	crmd_cib_smart_opt());
	}

	/* Notify client. Also notify tengine if mode=PCMK__VALUE_CIB and
	* op=CRM_OP_LRM_DELETE.
	*/
	if (from_sys) {
	lrmd_event_data_t *op = NULL;
	const char *from_host = crm_element_value(stored_msg, PCMK__XA_SRC);
	const char *transition;

	if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) {
	transition = crm_element_value(msg_data,
	PCMK__XA_TRANSITION_KEY);
	} else {
	transition = crm_element_value(stored_msg,
	PCMK__XA_TRANSITION_KEY);
	}

	crm_info("Notifying %s on %s that %s was%s deleted",
	from_sys, (from_host? from_host : "local node"), rsc_id,
	((rc == pcmk_rc_ok)? "" : " not"));
	op = lrmd_new_event(rsc_id, PCMK_ACTION_DELETE, 0);
	op->type = lrmd_event_exec_complete;
	op->user_data = pcmk__str_copy(pcmk__s(transition, FAKE_TE_ID));
	op->params = pcmk__strkey_table(free, free);
	pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET,
	CRM_FEATURE_SET);
	controld_rc2event(op, rc);
	controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
	lrmd_free_event(op);
	controld_trigger_delete_refresh(from_sys, rsc_id);
	}
	return I_NULL;
	}
	}

	/*!
	* \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache
	*
	* \param[in] msg Message XML
	*
	* \return Next FSA input
	*/
	static enum crmd_fsa_input
	handle_remote_state(const xmlNode *msg)
	{
	const char *conn_host = NULL;
	const char *remote_uname = pcmk__xe_id(msg);
	crm_node_t *remote_peer;
	bool remote_is_up = false;
	int rc = pcmk_rc_ok;

	rc = pcmk__xe_get_bool_attr(msg, PCMK__XA_IN_CCM, &remote_is_up);

	CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL);

	remote_peer = pcmk__cluster_lookup_remote_node(remote_uname);
	CRM_CHECK(remote_peer, return I_NULL);

	pcmk__update_peer_state(__func__, remote_peer,
	remote_is_up ? CRM_NODE_MEMBER : CRM_NODE_LOST,
	0);

	conn_host = crm_element_value(msg, PCMK__XA_CONNECTION_HOST);
	if (conn_host) {
	pcmk__str_update(&remote_peer->conn_host, conn_host);
	} else if (remote_peer->conn_host) {
	free(remote_peer->conn_host);
	remote_peer->conn_host = NULL;
	}

	return I_NULL;
	}

	/*!
	* \brief Handle a CRM_OP_PING message
	*
	* \param[in] msg Message XML
	*
	* \return Next FSA input
	*/
	static xmlNode*
	create_ping_reply(const xmlNode *msg)
	{
	const char *value = NULL;
	xmlNode *ping = NULL;
	xmlNode *reply = NULL;

	// Build reply

	ping = pcmk__xe_create(NULL, PCMK__XE_PING_RESPONSE);
	value = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
	crm_xml_add(ping, PCMK__XA_CRM_SUBSYSTEM, value);

	// Add controller state
	value = fsa_state2string(controld_globals.fsa_state);
	crm_xml_add(ping, PCMK__XA_CRMD_STATE, value);
	crm_notice("Current ping state: %s", value); // CTS needs this

	// Add controller health
	// @TODO maybe do some checks to determine meaningful status
	crm_xml_add(ping, PCMK_XA_RESULT, "ok");

	reply = create_reply(msg, ping);
	free_xml(ping);
	return reply;
	}

	static enum crmd_fsa_input
	handle_ping(const xmlNode *msg)
	{
	xmlNode *reply = create_ping_reply(msg);
	if (reply != NULL) {
	(void) relay_message(reply, TRUE);
	free_xml(reply);
	}

	// Nothing further to do
	return I_NULL;
	}

	/*!
	* \brief Handle a PCMK__CONTROLD_CMD_NODES message
	*
	* \param[in] request Message XML
	*
	* \return Next FSA input
	*/
	static enum crmd_fsa_input
	handle_node_list(const xmlNode *request)
	{
	GHashTableIter iter;
	crm_node_t *node = NULL;
	xmlNode *reply = NULL;
	xmlNode *reply_data = NULL;

	// Create message data for reply
	reply_data = pcmk__xe_create(NULL, PCMK_XE_NODES);
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
	xmlNode *xml = pcmk__xe_create(reply_data, PCMK_XE_NODE);

	crm_xml_add_ll(xml, PCMK_XA_ID, (long long) node->id); // uint32_t
	crm_xml_add(xml, PCMK_XA_UNAME, node->uname);
	crm_xml_add(xml, PCMK__XA_IN_CCM, node->state);
	}

	// Create and send reply
	reply = create_reply(request, reply_data);
	free_xml(reply_data);
	if (reply) {
	(void) relay_message(reply, TRUE);
	free_xml(reply);
	}

	// Nothing further to do
	return I_NULL;
	}

	/*!
	* \brief Handle a CRM_OP_NODE_INFO request
	*
	* \param[in] msg Message XML
	*
	* \return Next FSA input
	*/
	static enum crmd_fsa_input
	handle_node_info_request(const xmlNode *msg)
	{
	const char *value = NULL;
	crm_node_t *node = NULL;
	int node_id = 0;
	xmlNode *reply = NULL;
	xmlNode *reply_data = NULL;

	// Build reply

	reply_data = pcmk__xe_create(NULL, PCMK_XE_NODE);
	crm_xml_add(reply_data, PCMK__XA_CRM_SUBSYSTEM, CRM_SYSTEM_CRMD);

	// Add whether current partition has quorum
	pcmk__xe_set_bool_attr(reply_data, PCMK_XA_HAVE_QUORUM,
	pcmk_is_set(controld_globals.flags,
	controld_has_quorum));

	/* Check whether client requested node info by ID and/or name
	*
	* @TODO A Corosync-layer node ID is of type uint32_t. We should be able to
	* handle legitimate node IDs greater than INT_MAX, but currently we do not.
	*/
	crm_element_value_int(msg, PCMK_XA_ID, &node_id);
	if (node_id < 0) {
	node_id = 0;
	}
	value = crm_element_value(msg, PCMK_XA_UNAME);

	// Default to local node if none given
	if ((node_id == 0) && (value == NULL)) {
	value = controld_globals.our_nodename;
	}

	- node = pcmk__search_node_caches(node_id, value, pcmk__node_search_any);
	+ node = pcmk__search_node_caches(node_id, value, NULL,
	+ pcmk__node_search_any);
	if (node) {
	crm_xml_add(reply_data, PCMK_XA_ID, node->uuid);
	crm_xml_add(reply_data, PCMK_XA_UNAME, node->uname);
	crm_xml_add(reply_data, PCMK_XA_CRMD, node->state);
	pcmk__xe_set_bool_attr(reply_data, PCMK_XA_REMOTE_NODE,
	pcmk_is_set(node->flags, crm_remote_node));
	}

	// Send reply
	reply = create_reply(msg, reply_data);
	free_xml(reply_data);
	if (reply != NULL) {
	(void) relay_message(reply, TRUE);
	free_xml(reply);
	}

	// Nothing further to do
	return I_NULL;
	}

	static void
	verify_feature_set(xmlNode *msg)
	{
	const char *dc_version = crm_element_value(msg, PCMK_XA_CRM_FEATURE_SET);

	if (dc_version == NULL) {
	/* All we really know is that the DC feature set is older than 3.1.0,
	* but that's also all that really matters.
	*/
	dc_version = "3.0.14";
	}

	if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) {
	crm_trace("Local feature set (%s) is compatible with DC's (%s)",
	CRM_FEATURE_SET, dc_version);
	} else {
	crm_err("Local feature set (%s) is incompatible with DC's (%s)",
	CRM_FEATURE_SET, dc_version);

	// Nothing is likely to improve without administrator involvement
	controld_set_fsa_input_flags(R_STAYDOWN);
	crmd_exit(CRM_EX_FATAL);
	}
	}

	// DC gets own shutdown all-clear
	static enum crmd_fsa_input
	handle_shutdown_self_ack(xmlNode *stored_msg)
	{
	const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);

	if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
	// The expected case -- we initiated own shutdown sequence
	crm_info("Shutting down controller");
	return I_STOP;
	}

	if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) {
	// Must be logic error -- DC confirming its own unrequested shutdown
	crm_err("Shutting down controller immediately due to "
	"unexpected shutdown confirmation");
	return I_TERMINATE;
	}

	if (controld_globals.fsa_state != S_STOPPING) {
	// Shouldn't happen -- non-DC confirming unrequested shutdown
	crm_err("Starting new DC election because %s is "
	"confirming shutdown we did not request",
	(host_from? host_from : "another node"));
	return I_ELECTION;
	}

	// Shouldn't happen, but we are already stopping anyway
	crm_debug("Ignoring unexpected shutdown confirmation from %s",
	(host_from? host_from : "another node"));
	return I_NULL;
	}

	// Non-DC gets shutdown all-clear from DC
	static enum crmd_fsa_input
	handle_shutdown_ack(xmlNode *stored_msg)
	{
	const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);

	if (host_from == NULL) {
	crm_warn("Ignoring shutdown request without origin specified");
	return I_NULL;
	}

	if (pcmk__str_eq(host_from, controld_globals.dc_name,
	pcmk__str_null_matches\|pcmk__str_casei)) {

	if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
	crm_info("Shutting down controller after confirmation from %s",
	host_from);
	} else {
	crm_err("Shutting down controller after unexpected "
	"shutdown request from %s", host_from);
	controld_set_fsa_input_flags(R_STAYDOWN);
	}
	return I_STOP;
	}

	crm_warn("Ignoring shutdown request from %s because DC is %s",
	host_from, controld_globals.dc_name);
	return I_NULL;
	}

	static enum crmd_fsa_input
	handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
	{
	xmlNode *msg = NULL;
	const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK);

	/* Optimize this for the DC - it has the most to do */

	crm_log_xml_trace(stored_msg, "request");
	if (op == NULL) {
	crm_warn("Ignoring request without " PCMK__XA_CRM_TASK);
	return I_NULL;
	}

	if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
	const char *from = crm_element_value(stored_msg, PCMK__XA_SRC);
	crm_node_t *node =
	- pcmk__search_node_caches(0, from, pcmk__node_search_cluster_member);
	+ pcmk__search_node_caches(0, from, NULL,
	+ pcmk__node_search_cluster_member);

	pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
	if(AM_I_DC == FALSE) {
	return I_NULL; /* Done */
	}
	}

	/========== DC-Only Actions ==========/
	if (AM_I_DC) {
	if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
	return I_NODE_JOIN;

	} else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
	return I_JOIN_REQUEST;

	} else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
	return I_JOIN_RESULT;

	} else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
	return handle_shutdown_self_ack(stored_msg);

	} else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
	// Another controller wants to shut down its node
	return handle_shutdown_request(stored_msg);
	}
	}

	/========== common actions ==========/
	if (strcmp(op, CRM_OP_NOVOTE) == 0) {
	ha_msg_input_t fsa_input;

	fsa_input.msg = stored_msg;
	register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
	A_ELECTION_COUNT \| A_ELECTION_CHECK, FALSE,
	__func__);

	} else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) {
	/* a remote connection host is letting us know the node state */
	return handle_remote_state(stored_msg);

	} else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
	throttle_update(stored_msg);
	if (AM_I_DC && (controld_globals.transition_graph != NULL)
	&& !controld_globals.transition_graph->complete) {

	crm_debug("The throttle changed. Trigger a graph.");
	trigger_graph();
	}
	return I_NULL;

	} else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
	return handle_failcount_op(stored_msg);

	} else if (strcmp(op, CRM_OP_VOTE) == 0) {
	/* count the vote and decide what to do after that */
	ha_msg_input_t fsa_input;

	fsa_input.msg = stored_msg;
	register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
	A_ELECTION_COUNT \| A_ELECTION_CHECK, FALSE,
	__func__);

	/* Sometimes we _must_ go into S_ELECTION */
	if (controld_globals.fsa_state == S_HALT) {
	crm_debug("Forcing an election from S_HALT");
	return I_ELECTION;
	}

	} else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
	verify_feature_set(stored_msg);
	crm_debug("Raising I_JOIN_OFFER: join-%s",
	crm_element_value(stored_msg, PCMK__XA_JOIN_ID));
	return I_JOIN_OFFER;

	} else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
	crm_debug("Raising I_JOIN_RESULT: join-%s",
	crm_element_value(stored_msg, PCMK__XA_JOIN_ID));
	return I_JOIN_RESULT;

	} else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) {
	return handle_lrm_delete(stored_msg);

	} else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
	\|\| (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT
	\|\| (strcmp(op, CRM_OP_REPROBE) == 0)) {

	crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD);
	return I_ROUTER;

	} else if (strcmp(op, CRM_OP_NOOP) == 0) {
	return I_NULL;

	} else if (strcmp(op, CRM_OP_PING) == 0) {
	return handle_ping(stored_msg);

	} else if (strcmp(op, CRM_OP_NODE_INFO) == 0) {
	return handle_node_info_request(stored_msg);

	} else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
	int id = 0;
	const char *name = NULL;

	crm_element_value_int(stored_msg, PCMK_XA_ID, &id);
	name = crm_element_value(stored_msg, PCMK_XA_UNAME);

	if(cause == C_IPC_MESSAGE) {
	msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
	if (!pcmk__cluster_send_message(NULL, crm_msg_crmd, msg)) {
	crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
	} else {
	crm_notice("Instructing peers to remove references to node %s/%u", name, id);
	}
	free_xml(msg);

	} else {
	pcmk__cluster_forget_cluster_node(id, name);

	/* If we're forgetting this node, also forget any failures to fence
	* it, so we don't carry that over to any node added later with the
	* same name.
	*/
	st_fail_count_reset(name);
	}

	} else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
	xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML,
	NULL, NULL);
	xmlNode *xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);

	remote_ra_process_maintenance_nodes(xml);

	} else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) {
	return handle_node_list(stored_msg);

	/========== (NOT_DC)-Only Actions ==========/
	} else if (!AM_I_DC) {

	if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
	return handle_shutdown_ack(stored_msg);
	}

	} else {
	crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
	crm_log_xml_err(stored_msg, "Unexpected");
	}

	return I_NULL;
	}

	static void
	handle_response(xmlNode *stored_msg)
	{
	const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK);

	crm_log_xml_trace(stored_msg, "reply");
	if (op == NULL) {
	crm_warn("Ignoring reply without " PCMK__XA_CRM_TASK);

	} else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) {
	// Check whether scheduler answer been superseded by subsequent request
	const char *msg_ref = crm_element_value(stored_msg, PCMK_XA_REFERENCE);

	if (msg_ref == NULL) {
	crm_err("%s - Ignoring calculation with no reference", op);

	} else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
	pcmk__str_none)) {
	ha_msg_input_t fsa_input;

	controld_stop_sched_timer();
	fsa_input.msg = stored_msg;
	register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);

	} else {
	crm_info("%s calculation %s is obsolete", op, msg_ref);
	}

	} else if (strcmp(op, CRM_OP_VOTE) == 0
	\|\| strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 \|\| strcmp(op, CRM_OP_SHUTDOWN) == 0) {

	} else {
	const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);

	crm_err("Unexpected response (op=%s, src=%s) sent to the %s",
	op, host_from, AM_I_DC ? "DC" : "controller");
	}
	}

	static enum crmd_fsa_input
	handle_shutdown_request(xmlNode * stored_msg)
	{
	/* handle here to avoid potential version issues
	* where the shutdown message/procedure may have
	* been changed in later versions.
	*
	* This way the DC is always in control of the shutdown
	*/

	char *now_s = NULL;
	const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);

	if (host_from == NULL) {
	/* we're shutting down and the DC */
	host_from = controld_globals.our_nodename;
	}

	crm_info("Creating shutdown request for %s (state=%s)", host_from,
	fsa_state2string(controld_globals.fsa_state));
	crm_log_xml_trace(stored_msg, "message");

	now_s = pcmk__ttoa(time(NULL));
	update_attrd(host_from, PCMK__NODE_ATTR_SHUTDOWN, now_s, NULL, FALSE);
	free(now_s);

	/* will be picked up by the TE as long as its running */
	return I_NULL;
	}

	static void
	send_msg_via_ipc(xmlNode * msg, const char sys, const char src)
	{
	pcmk__client_t *client_channel = NULL;

	CRM_CHECK(sys != NULL, return);

	client_channel = pcmk__find_client_by_id(sys);

	if (crm_element_value(msg, PCMK__XA_SRC) == NULL) {
	crm_xml_add(msg, PCMK__XA_SRC, src);
	}

	if (client_channel != NULL) {
	/* Transient clients such as crmadmin */
	pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event);

	} else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
	xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL,
	NULL);
	xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);

	process_te_message(msg, data);

	} else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) {
	fsa_data_t fsa_data;
	ha_msg_input_t fsa_input;
	xmlNode *wrapper = NULL;

	fsa_input.msg = msg;

	wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL);
	fsa_input.xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);

	fsa_data.id = 0;
	fsa_data.actions = 0;
	fsa_data.data = &fsa_input;
	fsa_data.fsa_input = I_MESSAGE;
	fsa_data.fsa_cause = C_IPC_MESSAGE;
	fsa_data.origin = __func__;
	fsa_data.data_type = fsa_dt_ha_msg;

	do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state,
	I_MESSAGE, &fsa_data);

	} else if (crmd_is_proxy_session(sys)) {
	crmd_proxy_send(sys, msg);

	} else {
	crm_info("Received invalid request: unknown subsystem '%s'", sys);
	}
	}

	void
	delete_ha_msg_input(ha_msg_input_t * orig)
	{
	if (orig == NULL) {
	return;
	}
	free_xml(orig->msg);
	free(orig);
	}

	/*!
	* \internal
	* \brief Notify the cluster of a remote node state change
	*
	* \param[in] node_name Node's name
	* \param[in] node_up true if node is up, false if down
	*/
	void
	broadcast_remote_state_message(const char *node_name, bool node_up)
	{
	xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, NULL,
	CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

	crm_info("Notifying cluster of Pacemaker Remote node %s %s",
	node_name, node_up? "coming up" : "going down");

	crm_xml_add(msg, PCMK_XA_ID, node_name);
	pcmk__xe_set_bool_attr(msg, PCMK__XA_IN_CCM, node_up);

	if (node_up) {
	crm_xml_add(msg, PCMK__XA_CONNECTION_HOST,
	controld_globals.our_nodename);
	}

	pcmk__cluster_send_message(NULL, crm_msg_crmd, msg);
	free_xml(msg);
	}

	diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
	index caee5ef30c..2501c63cb7 100644
	--- a/daemons/fenced/fenced_commands.c
	+++ b/daemons/fenced/fenced_commands.c
	@@ -1,3727 +1,3727 @@
	/*
	* Copyright 2009-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <stdio.h>
	#include <sys/types.h>
	#include <sys/wait.h>
	#include <sys/stat.h>
	#include <unistd.h>
	#include <sys/utsname.h>

	#include <stdlib.h>
	#include <errno.h>
	#include <fcntl.h>
	#include <ctype.h>

	#include <crm/crm.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipc_internal.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/mainloop.h>

	#include <crm/stonith-ng.h>
	#include <crm/fencing/internal.h>
	#include <crm/common/xml.h>

	#include <pacemaker-fenced.h>

	GHashTable *device_list = NULL;
	GHashTable *topology = NULL;
	static GList *cmd_list = NULL;

	static GHashTable *fenced_handlers = NULL;

	struct device_search_s {
	/* target of fence action */
	char *host;
	/* requested fence action */
	char *action;
	/* timeout to use if a device is queried dynamically for possible targets */
	int per_device_timeout;
	/* number of registered fencing devices at time of request */
	int replies_needed;
	/* number of device replies received so far */
	int replies_received;
	/* whether the target is eligible to perform requested action (or off) */
	bool allow_self;

	/* private data to pass to search callback function */
	void *user_data;
	/* function to call when all replies have been received */
	void (callback) (GList devices, void *user_data);
	/* devices capable of performing requested action (or off if remapping) */
	GList *capable;
	/* Whether to perform searches that support the action */
	uint32_t support_action_only;
	};

	static gboolean stonith_device_dispatch(gpointer user_data);
	static void st_child_done(int pid, const pcmk__action_result_t *result,
	void *user_data);

	static void search_devices_record_result(struct device_search_s search, const char device,
	gboolean can_fence);

	static int get_agent_metadata(const char agent, xmlNode *metadata);
	static void read_action_metadata(stonith_device_t *device);
	static enum fenced_target_by unpack_level_kind(const xmlNode *level);

	typedef struct async_command_s {

	int id;
	int pid;
	int fd_stdout;
	uint32_t options;
	int default_timeout; /* seconds */
	int timeout; /* seconds */

	int start_delay; // seconds (-1 means disable static/random fencing delays)
	int delay_id;

	char *op;
	char *origin;
	char *client;
	char *client_name;
	char *remote_op_id;

	char *target;
	uint32_t target_nodeid;
	char *action;
	char *device;

	GList *device_list;
	GList *next_device_iter; // device_list entry for next device to execute

	void *internal_user_data;
	void (done_cb) (int pid, const pcmk__action_result_t result,
	void *user_data);
	guint timer_sigterm;
	guint timer_sigkill;
	/*! If the operation timed out, this is the last signal
	* we sent to the process to get it to terminate */
	int last_timeout_signo;

	stonith_device_t *active_on;
	stonith_device_t *activating_on;
	} async_command_t;

	static xmlNode construct_async_reply(const async_command_t cmd,
	const pcmk__action_result_t *result);

	static gboolean
	is_action_required(const char action, const stonith_device_t device)
	{
	return (device != NULL) && device->automatic_unfencing
	&& pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none);
	}

	static int
	get_action_delay_max(const stonith_device_t device, const char action)
	{
	const char *value = NULL;
	guint delay_max = 0U;

	if (!pcmk__is_fencing_action(action)) {
	return 0;
	}

	value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX);
	if (value) {
	pcmk_parse_interval_spec(value, &delay_max);
	delay_max /= 1000;
	}

	return (int) delay_max;
	}

	static int
	get_action_delay_base(const stonith_device_t device, const char action,
	const char *target)
	{
	char *hash_value = NULL;
	guint delay_base = 0U;

	if (!pcmk__is_fencing_action(action)) {
	return 0;
	}

	hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE);

	if (hash_value) {
	char *value = pcmk__str_copy(hash_value);
	char *valptr = value;

	if (target != NULL) {
	for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) {
	char *mapval = strchr(val, ':');

	if (mapval == NULL \|\| mapval[1] == 0) {
	crm_err("pcmk_delay_base: empty value in mapping", val);
	continue;
	}

	if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) {
	value = mapval + 1;
	crm_debug("pcmk_delay_base mapped to %s for %s",
	value, target);
	break;
	}
	}
	}

	if (strchr(value, ':') == 0) {
	pcmk_parse_interval_spec(value, &delay_base);
	delay_base /= 1000;
	}

	free(valptr);
	}

	return (int) delay_base;
	}

	/*!
	* \internal
	* \brief Override STONITH timeout with pcmk_*_timeout if available
	*
	* \param[in] device STONITH device to use
	* \param[in] action STONITH action name
	* \param[in] default_timeout Timeout to use if device does not have
	* a pcmk_*_timeout parameter for action
	*
	* \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
	* \note For consistency, it would be nice if reboot/off/on timeouts could be
	* set the same way as start/stop/monitor timeouts, i.e. with an
	* <operation> entry in the fencing resource configuration. However that
	* is insufficient because fencing devices may be registered directly via
	* the fencer's register_device() API instead of going through the CIB
	* (e.g. stonith_admin uses it for its -R option, and the executor uses it
	* to ensure a device is registered when a command is issued). As device
	* properties, pcmk_*_timeout parameters can be grabbed by the fencer when
	* the device is registered, whether by CIB change or API call.
	*/
	static int
	get_action_timeout(const stonith_device_t device, const char action,
	int default_timeout)
	{
	if (action && device && device->params) {
	char buffer[64] = { 0, };
	const char *value = NULL;

	/* If "reboot" was requested but the device does not support it,
	* we will remap to "off", so check timeout for "off" instead
	*/
	if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)
	&& !pcmk_is_set(device->flags, st_device_supports_reboot)) {
	crm_trace("%s doesn't support reboot, using timeout for off instead",
	device->id);
	action = PCMK_ACTION_OFF;
	}

	/* If the device config specified an action-specific timeout, use it */
	snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
	value = g_hash_table_lookup(device->params, buffer);
	if (value) {
	long long timeout_ms = crm_get_msec(value);
	return (int) QB_MIN(timeout_ms / 1000, INT_MAX);
	}
	}
	return default_timeout;
	}

	/*!
	* \internal
	* \brief Get the currently executing device for a fencing operation
	*
	* \param[in] cmd Fencing operation to check
	*
	* \return Currently executing device for \p cmd if any, otherwise NULL
	*/
	static stonith_device_t *
	cmd_device(const async_command_t *cmd)
	{
	if ((cmd == NULL) \|\| (cmd->device == NULL) \|\| (device_list == NULL)) {
	return NULL;
	}
	return g_hash_table_lookup(device_list, cmd->device);
	}

	/*!
	* \internal
	* \brief Return the configured reboot action for a given device
	*
	* \param[in] device_id Device ID
	*
	* \return Configured reboot action for \p device_id
	*/
	const char *
	fenced_device_reboot_action(const char *device_id)
	{
	const char *action = NULL;

	if ((device_list != NULL) && (device_id != NULL)) {
	stonith_device_t *device = g_hash_table_lookup(device_list, device_id);

	if ((device != NULL) && (device->params != NULL)) {
	action = g_hash_table_lookup(device->params, "pcmk_reboot_action");
	}
	}
	return pcmk__s(action, PCMK_ACTION_REBOOT);
	}

	/*!
	* \internal
	* \brief Check whether a given device supports the "on" action
	*
	* \param[in] device_id Device ID
	*
	* \return true if \p device_id supports "on", otherwise false
	*/
	bool
	fenced_device_supports_on(const char *device_id)
	{
	if ((device_list != NULL) && (device_id != NULL)) {
	stonith_device_t *device = g_hash_table_lookup(device_list, device_id);

	if (device != NULL) {
	return pcmk_is_set(device->flags, st_device_supports_on);
	}
	}
	return false;
	}

	static void
	free_async_command(async_command_t * cmd)
	{
	if (!cmd) {
	return;
	}

	if (cmd->delay_id) {
	g_source_remove(cmd->delay_id);
	}

	cmd_list = g_list_remove(cmd_list, cmd);

	g_list_free_full(cmd->device_list, free);
	free(cmd->device);
	free(cmd->action);
	free(cmd->target);
	free(cmd->remote_op_id);
	free(cmd->client);
	free(cmd->client_name);
	free(cmd->origin);
	free(cmd->op);
	free(cmd);
	}

	/*!
	* \internal
	* \brief Create a new asynchronous fencing operation from request XML
	*
	* \param[in] msg Fencing request XML (from IPC or CPG)
	*
	* \return Newly allocated fencing operation on success, otherwise NULL
	*
	* \note This asserts on memory errors, so a NULL return indicates an
	* unparseable message.
	*/
	static async_command_t *
	create_async_command(xmlNode *msg)
	{
	xmlNode *op = NULL;
	async_command_t *cmd = NULL;
	int rc = pcmk_rc_ok;

	if (msg == NULL) {
	return NULL;
	}

	op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR);
	if (op == NULL) {
	return NULL;
	}

	cmd = pcmk__assert_alloc(1, sizeof(async_command_t));

	// All messages must include these
	cmd->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION);
	cmd->op = crm_element_value_copy(msg, PCMK__XA_ST_OP);
	cmd->client = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTID);
	if ((cmd->action == NULL) \|\| (cmd->op == NULL) \|\| (cmd->client == NULL)) {
	free_async_command(cmd);
	return NULL;
	}

	crm_element_value_int(msg, PCMK__XA_ST_CALLID, &(cmd->id));
	crm_element_value_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay));
	crm_element_value_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout));
	cmd->timeout = cmd->default_timeout;

	rc = pcmk__xe_get_flags(msg, PCMK__XA_ST_CALLOPT, &(cmd->options),
	st_opt_none);
	if (rc != pcmk_rc_ok) {
	crm_warn("Couldn't parse options from request: %s", pcmk_rc_str(rc));
	}

	cmd->origin = crm_element_value_copy(msg, PCMK__XA_SRC);
	cmd->remote_op_id = crm_element_value_copy(msg, PCMK__XA_ST_REMOTE_OP);
	cmd->client_name = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTNAME);
	cmd->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET);
	cmd->device = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ID);

	cmd->done_cb = st_child_done;

	// Track in global command list
	cmd_list = g_list_append(cmd_list, cmd);

	return cmd;
	}

	static int
	get_action_limit(stonith_device_t * device)
	{
	const char *value = NULL;
	int action_limit = 1;

	value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT);
	if ((value == NULL)
	\|\| (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok)
	\|\| (action_limit == 0)) {
	action_limit = 1;
	}
	return action_limit;
	}

	static int
	get_active_cmds(stonith_device_t * device)
	{
	int counter = 0;
	GList *gIter = NULL;
	GList *gIterNext = NULL;

	CRM_CHECK(device != NULL, return 0);

	for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
	async_command_t *cmd = gIter->data;

	gIterNext = gIter->next;

	if (cmd->active_on == device) {
	counter++;
	}
	}

	return counter;
	}

	static void
	fork_cb(int pid, void *user_data)
	{
	async_command_t cmd = (async_command_t ) user_data;
	stonith_device_t * device =
	/* in case of a retry we've done the move from
	activating_on to active_on already
	*/
	cmd->activating_on?cmd->activating_on:cmd->active_on;

	pcmk__assert(device != NULL);
	crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout",
	cmd->action, pid,
	((cmd->target == NULL)? "" : " targeting "),
	pcmk__s(cmd->target, ""), device->id, cmd->timeout);
	cmd->active_on = device;
	cmd->activating_on = NULL;
	}

	static int
	get_agent_metadata_cb(gpointer data) {
	stonith_device_t *device = data;
	guint period_ms;

	switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
	case pcmk_rc_ok:
	if (device->agent_metadata) {
	read_action_metadata(device);
	stonith__device_parameter_flags(&(device->flags), device->id,
	device->agent_metadata);
	}
	return G_SOURCE_REMOVE;

	case EAGAIN:
	period_ms = pcmk__mainloop_timer_get_period(device->timer);
	if (period_ms < 160 * 1000) {
	mainloop_timer_set_period(device->timer, 2 * period_ms);
	}
	return G_SOURCE_CONTINUE;

	default:
	return G_SOURCE_REMOVE;
	}
	}

	/*!
	* \internal
	* \brief Call a command's action callback for an internal (not library) result
	*
	* \param[in,out] cmd Command to report result for
	* \param[in] execution_status Execution status to use for result
	* \param[in] exit_status Exit status to use for result
	* \param[in] exit_reason Exit reason to use for result
	*/
	static void
	report_internal_result(async_command_t *cmd, int exit_status,
	int execution_status, const char *exit_reason)
	{
	pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;

	pcmk__set_result(&result, exit_status, execution_status, exit_reason);
	cmd->done_cb(0, &result, cmd);
	pcmk__reset_result(&result);
	}

	static gboolean
	stonith_device_execute(stonith_device_t * device)
	{
	int exec_rc = 0;
	const char *action_str = NULL;
	const char *host_arg = NULL;
	async_command_t *cmd = NULL;
	stonith_action_t *action = NULL;
	int active_cmds = 0;
	int action_limit = 0;
	GList *gIter = NULL;
	GList *gIterNext = NULL;

	CRM_CHECK(device != NULL, return FALSE);

	active_cmds = get_active_cmds(device);
	action_limit = get_action_limit(device);
	if (action_limit > -1 && active_cmds >= action_limit) {
	crm_trace("%s is over its action limit of %d (%u active action%s)",
	device->id, action_limit, active_cmds,
	pcmk__plural_s(active_cmds));
	return TRUE;
	}

	for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) {
	async_command_t *pending_op = gIter->data;

	gIterNext = gIter->next;

	if (pending_op && pending_op->delay_id) {
	crm_trace("Operation '%s'%s%s using %s was asked to run too early, "
	"waiting for start delay of %ds",
	pending_op->action,
	((pending_op->target == NULL)? "" : " targeting "),
	pcmk__s(pending_op->target, ""),
	device->id, pending_op->start_delay);
	continue;
	}

	device->pending_ops = g_list_remove_link(device->pending_ops, gIter);
	g_list_free_1(gIter);

	cmd = pending_op;
	break;
	}

	if (cmd == NULL) {
	crm_trace("No actions using %s are needed", device->id);
	return TRUE;
	}

	if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
	STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
	if (pcmk__is_fencing_action(cmd->action)) {
	if (node_does_watchdog_fencing(stonith_our_uname)) {
	pcmk__panic(__func__);
	goto done;
	}
	} else {
	crm_info("Faking success for %s watchdog operation", cmd->action);
	report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	goto done;
	}
	}

	#if SUPPORT_CIBSECRETS
	exec_rc = pcmk__substitute_secrets(device->id, device->params);
	if (exec_rc != pcmk_rc_ok) {
	if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) {
	crm_info("Proceeding with stop operation for %s "
	"despite being unable to load CIB secrets (%s)",
	device->id, pcmk_rc_str(exec_rc));
	} else {
	crm_err("Considering %s unconfigured "
	"because unable to load CIB secrets: %s",
	device->id, pcmk_rc_str(exec_rc));
	report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
	"Failed to get CIB secrets");
	goto done;
	}
	}
	#endif

	action_str = cmd->action;
	if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none)
	&& !pcmk_is_set(device->flags, st_device_supports_reboot)) {

	crm_notice("Remapping 'reboot' action%s%s using %s to 'off' "
	"because agent '%s' does not support reboot",
	((cmd->target == NULL)? "" : " targeting "),
	pcmk__s(cmd->target, ""), device->id, device->agent);
	action_str = PCMK_ACTION_OFF;
	}

	if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) {
	host_arg = "port";

	} else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) {
	host_arg = "plug";
	}

	action = stonith__action_create(device->agent, action_str, cmd->target,
	cmd->target_nodeid, cmd->timeout,
	device->params, device->aliases, host_arg);

	/* for async exec, exec_rc is negative for early error exit
	otherwise handling of success/errors is done via callbacks */
	cmd->activating_on = device;
	exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
	fork_cb);
	if (exec_rc < 0) {
	cmd->activating_on = NULL;
	cmd->done_cb(0, stonith__action_result(action), cmd);
	stonith__destroy_action(action);
	}

	done:
	/* Device might get triggered to work by multiple fencing commands
	* simultaneously. Trigger the device again to make sure any
	* remaining concurrent commands get executed. */
	if (device->pending_ops) {
	mainloop_set_trigger(device->work);
	}
	return TRUE;
	}

	static gboolean
	stonith_device_dispatch(gpointer user_data)
	{
	return stonith_device_execute(user_data);
	}

	static gboolean
	start_delay_helper(gpointer data)
	{
	async_command_t *cmd = data;
	stonith_device_t *device = cmd_device(cmd);

	cmd->delay_id = 0;
	if (device) {
	mainloop_set_trigger(device->work);
	}

	return FALSE;
	}

	static void
	schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
	{
	int delay_max = 0;
	int delay_base = 0;
	int requested_delay = cmd->start_delay;

	CRM_CHECK(cmd != NULL, return);
	CRM_CHECK(device != NULL, return);

	if (cmd->device) {
	free(cmd->device);
	}

	if (device->include_nodeid && (cmd->target != NULL)) {
	crm_node_t *node = pcmk__get_node(0, cmd->target, NULL,
	pcmk__node_search_cluster_member);

	cmd->target_nodeid = node->id;
	}

	cmd->device = pcmk__str_copy(device->id);
	cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);

	if (cmd->remote_op_id) {
	crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s "
	"with op id %.8s and timeout %ds",
	cmd->action,
	(cmd->target == NULL)? "" : " targeting ",
	pcmk__s(cmd->target, ""),
	device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
	} else {
	crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds",
	cmd->action,
	(cmd->target == NULL)? "" : " targeting ",
	pcmk__s(cmd->target, ""),
	device->id, cmd->client, cmd->timeout);
	}

	device->pending_ops = g_list_append(device->pending_ops, cmd);
	mainloop_set_trigger(device->work);

	// Value -1 means disable any static/random fencing delays
	if (requested_delay < 0) {
	return;
	}

	delay_max = get_action_delay_max(device, cmd->action);
	delay_base = get_action_delay_base(device, cmd->action, cmd->target);
	if (delay_max == 0) {
	delay_max = delay_base;
	}
	if (delay_max < delay_base) {
	crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than "
	PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s "
	"(limiting to maximum delay)",
	delay_base, delay_max, cmd->action, device->id);
	delay_base = delay_max;
	}
	if (delay_max > 0) {
	// coverity[dontcall] It doesn't matter here if rand() is predictable
	cmd->start_delay +=
	((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
	+ delay_base;
	}

	if (cmd->start_delay > 0) {
	crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS
	" timeout=%ds requested_delay=%ds base=%ds max=%ds",
	cmd->action,
	(cmd->target == NULL)? "" : " targeting ",
	pcmk__s(cmd->target, ""),
	device->id, cmd->start_delay, cmd->timeout,
	requested_delay, delay_base, delay_max);
	cmd->delay_id =
	g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
	}
	}

	static void
	free_device(gpointer data)
	{
	GList *gIter = NULL;
	stonith_device_t *device = data;

	g_hash_table_destroy(device->params);
	g_hash_table_destroy(device->aliases);

	for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
	async_command_t *cmd = gIter->data;

	crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
	report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
	"Device was removed before action could be executed");
	}
	g_list_free(device->pending_ops);

	g_list_free_full(device->targets, free);

	if (device->timer) {
	mainloop_timer_stop(device->timer);
	mainloop_timer_del(device->timer);
	}

	mainloop_destroy_trigger(device->work);

	free_xml(device->agent_metadata);
	free(device->namespace);
	if (device->on_target_actions != NULL) {
	g_string_free(device->on_target_actions, TRUE);
	}
	free(device->agent);
	free(device->id);
	free(device);
	}

	void free_device_list(void)
	{
	if (device_list != NULL) {
	g_hash_table_destroy(device_list);
	device_list = NULL;
	}
	}

	void
	init_device_list(void)
	{
	if (device_list == NULL) {
	device_list = pcmk__strkey_table(NULL, free_device);
	}
	}

	static GHashTable *
	build_port_aliases(const char hostmap, GList * targets)
	{
	char *name = NULL;
	int last = 0, lpc = 0, max = 0, added = 0;
	GHashTable *aliases = pcmk__strikey_table(free, free);

	if (hostmap == NULL) {
	return aliases;
	}

	max = strlen(hostmap);
	for (; lpc <= max; lpc++) {
	switch (hostmap[lpc]) {
	/* Skip escaped chars */
	case '\\':
	lpc++;
	break;

	/* Assignment chars */
	case '=':
	case ':':
	if (lpc > last) {
	free(name);
	name = pcmk__assert_alloc(1, 1 + lpc - last);
	memcpy(name, hostmap + last, lpc - last);
	}
	last = lpc + 1;
	break;

	/* Delimeter chars */
	/* case ',': Potentially used to specify multiple ports */
	case 0:
	case ';':
	case ' ':
	case '\t':
	if (name) {
	char *value = NULL;
	int k = 0;

	value = pcmk__assert_alloc(1, 1 + lpc - last);
	memcpy(value, hostmap + last, lpc - last);

	for (int i = 0; value[i] != '\0'; i++) {
	if (value[i] != '\\') {
	value[k++] = value[i];
	}
	}
	value[k] = '\0';

	crm_debug("Adding alias '%s'='%s'", name, value);
	g_hash_table_replace(aliases, name, value);
	if (targets) {
	targets = g_list_append(targets, pcmk__str_copy(value));
	}
	value = NULL;
	name = NULL;
	added++;

	} else if (lpc > last) {
	crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
	}

	last = lpc + 1;
	break;
	}

	if (hostmap[lpc] == 0) {
	break;
	}
	}

	if (added == 0) {
	crm_info("No host mappings detected in '%s'", hostmap);
	}

	free(name);
	return aliases;
	}

	GHashTable *metadata_cache = NULL;

	void
	free_metadata_cache(void) {
	if (metadata_cache != NULL) {
	g_hash_table_destroy(metadata_cache);
	metadata_cache = NULL;
	}
	}

	static void
	init_metadata_cache(void) {
	if (metadata_cache == NULL) {
	metadata_cache = pcmk__strkey_table(free, free);
	}
	}

	int
	get_agent_metadata(const char agent, xmlNode * metadata)
	{
	char *buffer = NULL;

	if (metadata == NULL) {
	return EINVAL;
	}
	*metadata = NULL;
	if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
	return pcmk_rc_ok;
	}
	init_metadata_cache();
	buffer = g_hash_table_lookup(metadata_cache, agent);
	if (buffer == NULL) {
	stonith_t *st = stonith_api_new();
	int rc;

	if (st == NULL) {
	crm_warn("Could not get agent meta-data: "
	"API memory allocation failed");
	return EAGAIN;
	}
	rc = st->cmds->metadata(st, st_opt_sync_call, agent,
	NULL, &buffer, 10);
	stonith_api_delete(st);
	if (rc \|\| !buffer) {
	crm_err("Could not retrieve metadata for fencing agent %s", agent);
	return EAGAIN;
	}
	g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer);
	}

	*metadata = pcmk__xml_parse(buffer);
	return pcmk_rc_ok;
	}

	static gboolean
	is_nodeid_required(xmlNode * xml)
	{
	xmlXPathObjectPtr xpath = NULL;

	if (stand_alone) {
	return FALSE;
	}

	if (!xml) {
	return FALSE;
	}

	xpath = xpath_search(xml,
	"//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='nodeid']");
	if (numXpathResults(xpath) <= 0) {
	freeXpathObject(xpath);
	return FALSE;
	}

	freeXpathObject(xpath);
	return TRUE;
	}

	static void
	read_action_metadata(stonith_device_t *device)
	{
	xmlXPathObjectPtr xpath = NULL;
	int max = 0;
	int lpc = 0;

	if (device->agent_metadata == NULL) {
	return;
	}

	xpath = xpath_search(device->agent_metadata, "//action");
	max = numXpathResults(xpath);

	if (max <= 0) {
	freeXpathObject(xpath);
	return;
	}

	for (lpc = 0; lpc < max; lpc++) {
	const char *action = NULL;
	xmlNode *match = getXpathResult(xpath, lpc);

	CRM_LOG_ASSERT(match != NULL);
	if(match == NULL) { continue; };

	action = crm_element_value(match, PCMK_XA_NAME);

	if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
	stonith__set_device_flags(device->flags, device->id,
	st_device_supports_list);
	} else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) {
	stonith__set_device_flags(device->flags, device->id,
	st_device_supports_status);
	} else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
	stonith__set_device_flags(device->flags, device->id,
	st_device_supports_reboot);
	} else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
	/* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it
	* joins.
	*
	* @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for
	* PCMK_XA_AUTOMATIC.
	*/
	if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC)
	\|\| pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) {
	device->automatic_unfencing = TRUE;
	}
	stonith__set_device_flags(device->flags, device->id,
	st_device_supports_on);
	}

	if ((action != NULL)
	&& pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) {

	pcmk__add_word(&(device->on_target_actions), 64, action);
	}
	}

	freeXpathObject(xpath);
	}

	/*!
	* \internal
	* \brief Set a pcmk_*_action parameter if not already set
	*
	* \param[in,out] params Device parameters
	* \param[in] action Name of action
	* \param[in] value Value to use if action is not already set
	*/
	static void
	map_action(GHashTable params, const char action, const char *value)
	{
	char *key = crm_strdup_printf("pcmk_%s_action", action);

	if (g_hash_table_lookup(params, key)) {
	crm_warn("Ignoring %s='%s', see %s instead",
	STONITH_ATTR_ACTION_OP, value, key);
	free(key);
	} else {
	crm_warn("Mapping %s='%s' to %s='%s'",
	STONITH_ATTR_ACTION_OP, value, key, value);
	g_hash_table_insert(params, key, pcmk__str_copy(value));
	}
	}

	/*!
	* \internal
	* \brief Create device parameter table from XML
	*
	* \param[in] name Device name (used for logging only)
	* \param[in] dev XML containing device parameters
	*/
	static GHashTable *
	xml2device_params(const char name, const xmlNode dev)
	{
	GHashTable *params = xml2list(dev);
	const char *value;

	/* Action should never be specified in the device configuration,
	* but we support it for users who are familiar with other software
	* that worked that way.
	*/
	value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP);
	if (value != NULL) {
	crm_warn("%s has '%s' parameter, which should never be specified in configuration",
	name, STONITH_ATTR_ACTION_OP);

	if (*value == '\0') {
	crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);

	} else if (strcmp(value, PCMK_ACTION_REBOOT) == 0) {
	crm_warn("Ignoring %s='reboot' (see " PCMK_OPT_STONITH_ACTION
	" cluster property instead)",
	STONITH_ATTR_ACTION_OP);

	} else if (strcmp(value, PCMK_ACTION_OFF) == 0) {
	map_action(params, PCMK_ACTION_REBOOT, value);

	} else {
	map_action(params, PCMK_ACTION_OFF, value);
	map_action(params, PCMK_ACTION_REBOOT, value);
	}

	g_hash_table_remove(params, STONITH_ATTR_ACTION_OP);
	}

	return params;
	}

	static const char *
	target_list_type(stonith_device_t * dev)
	{
	const char *check_type = NULL;

	check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK);

	if (check_type == NULL) {

	if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) {
	check_type = PCMK_VALUE_STATIC_LIST;
	} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) {
	check_type = PCMK_VALUE_STATIC_LIST;
	} else if (pcmk_is_set(dev->flags, st_device_supports_list)) {
	check_type = PCMK_VALUE_DYNAMIC_LIST;
	} else if (pcmk_is_set(dev->flags, st_device_supports_status)) {
	check_type = PCMK_VALUE_STATUS;
	} else {
	check_type = PCMK_VALUE_NONE;
	}
	}

	return check_type;
	}

	static stonith_device_t *
	build_device_from_xml(xmlNode *dev)
	{
	const char *value;
	stonith_device_t *device = NULL;
	char *agent = crm_element_value_copy(dev, PCMK_XA_AGENT);

	CRM_CHECK(agent != NULL, return device);

	device = pcmk__assert_alloc(1, sizeof(stonith_device_t));

	device->id = crm_element_value_copy(dev, PCMK_XA_ID);
	device->agent = agent;
	device->namespace = crm_element_value_copy(dev, PCMK__XA_NAMESPACE);
	device->params = xml2device_params(device->id, dev);

	value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST);
	if (value) {
	device->targets = stonith__parse_targets(value);
	}

	value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP);
	device->aliases = build_port_aliases(value, &(device->targets));

	value = target_list_type(device);
	if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)
	&& (device->targets != NULL)) {

	// device->targets is necessary only with PCMK_VALUE_STATIC_LIST
	g_list_free_full(device->targets, free);
	device->targets = NULL;
	}
	switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
	case pcmk_rc_ok:
	if (device->agent_metadata) {
	read_action_metadata(device);
	stonith__device_parameter_flags(&(device->flags), device->id,
	device->agent_metadata);
	}
	break;

	case EAGAIN:
	if (device->timer == NULL) {
	device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
	TRUE, get_agent_metadata_cb, device);
	}
	if (!mainloop_timer_running(device->timer)) {
	mainloop_timer_start(device->timer);
	}
	break;

	default:
	break;
	}

	value = g_hash_table_lookup(device->params, "nodeid");
	if (!value) {
	device->include_nodeid = is_nodeid_required(device->agent_metadata);
	}

	value = crm_element_value(dev, PCMK__XA_RSC_PROVIDES);
	if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) {
	device->automatic_unfencing = TRUE;
	}

	if (is_action_required(PCMK_ACTION_ON, device)) {
	crm_info("Fencing device '%s' requires unfencing", device->id);
	}

	if (device->on_target_actions != NULL) {
	crm_info("Fencing device '%s' requires actions (%s) to be executed "
	"on target", device->id,
	(const char *) device->on_target_actions->str);
	}

	device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
	/* TODO: Hook up priority */

	return device;
	}

	static void
	schedule_internal_command(const char *origin,
	stonith_device_t * device,
	const char *action,
	const char *target,
	int timeout,
	void *internal_user_data,
	void (*done_cb) (int pid,
	const pcmk__action_result_t *result,
	void *user_data))
	{
	async_command_t *cmd = NULL;

	cmd = pcmk__assert_alloc(1, sizeof(async_command_t));

	cmd->id = -1;
	cmd->default_timeout = timeout ? timeout : 60;
	cmd->timeout = cmd->default_timeout;
	cmd->action = pcmk__str_copy(action);
	cmd->target = pcmk__str_copy(target);
	cmd->device = pcmk__str_copy(device->id);
	cmd->origin = pcmk__str_copy(origin);
	cmd->client = pcmk__str_copy(crm_system_name);
	cmd->client_name = pcmk__str_copy(crm_system_name);

	cmd->internal_user_data = internal_user_data;
	cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */

	schedule_stonith_command(cmd, device);
	}

	// Fence agent status commands use custom exit status codes
	enum fence_status_code {
	fence_status_invalid = -1,
	fence_status_active = 0,
	fence_status_unknown = 1,
	fence_status_inactive = 2,
	};

	static void
	status_search_cb(int pid, const pcmk__action_result_t result, void user_data)
	{
	async_command_t *cmd = user_data;
	struct device_search_s *search = cmd->internal_user_data;
	stonith_device_t *dev = cmd_device(cmd);
	gboolean can = FALSE;

	free_async_command(cmd);

	if (!dev) {
	search_devices_record_result(search, NULL, FALSE);
	return;
	}

	mainloop_set_trigger(dev->work);

	if (result->execution_status != PCMK_EXEC_DONE) {
	crm_warn("Assuming %s cannot fence %s "
	"because status could not be executed: %s%s%s%s",
	dev->id, search->host,
	pcmk_exec_status_str(result->execution_status),
	((result->exit_reason == NULL)? "" : " ("),
	((result->exit_reason == NULL)? "" : result->exit_reason),
	((result->exit_reason == NULL)? "" : ")"));
	search_devices_record_result(search, dev->id, FALSE);
	return;
	}

	switch (result->exit_status) {
	case fence_status_unknown:
	crm_trace("%s reported it cannot fence %s", dev->id, search->host);
	break;

	case fence_status_active:
	case fence_status_inactive:
	crm_trace("%s reported it can fence %s", dev->id, search->host);
	can = TRUE;
	break;

	default:
	crm_warn("Assuming %s cannot fence %s "
	"(status returned unknown code %d)",
	dev->id, search->host, result->exit_status);
	break;
	}
	search_devices_record_result(search, dev->id, can);
	}

	static void
	dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
	void *user_data)
	{
	async_command_t *cmd = user_data;
	struct device_search_s *search = cmd->internal_user_data;
	stonith_device_t *dev = cmd_device(cmd);
	gboolean can_fence = FALSE;

	free_async_command(cmd);

	/* Host/alias must be in the list output to be eligible to be fenced
	*
	* Will cause problems if down'd nodes aren't listed or (for virtual nodes)
	* if the guest is still listed despite being moved to another machine
	*/
	if (!dev) {
	search_devices_record_result(search, NULL, FALSE);
	return;
	}

	mainloop_set_trigger(dev->work);

	if (pcmk__result_ok(result)) {
	crm_info("Refreshing target list for %s", dev->id);
	g_list_free_full(dev->targets, free);
	dev->targets = stonith__parse_targets(result->action_stdout);
	dev->targets_age = time(NULL);

	} else if (dev->targets != NULL) {
	if (result->execution_status == PCMK_EXEC_DONE) {
	crm_info("Reusing most recent target list for %s "
	"because list returned error code %d",
	dev->id, result->exit_status);
	} else {
	crm_info("Reusing most recent target list for %s "
	"because list could not be executed: %s%s%s%s",
	dev->id, pcmk_exec_status_str(result->execution_status),
	((result->exit_reason == NULL)? "" : " ("),
	((result->exit_reason == NULL)? "" : result->exit_reason),
	((result->exit_reason == NULL)? "" : ")"));
	}

	} else { // We have never successfully executed list
	if (result->execution_status == PCMK_EXEC_DONE) {
	crm_warn("Assuming %s cannot fence %s "
	"because list returned error code %d",
	dev->id, search->host, result->exit_status);
	} else {
	crm_warn("Assuming %s cannot fence %s "
	"because list could not be executed: %s%s%s%s",
	dev->id, search->host,
	pcmk_exec_status_str(result->execution_status),
	((result->exit_reason == NULL)? "" : " ("),
	((result->exit_reason == NULL)? "" : result->exit_reason),
	((result->exit_reason == NULL)? "" : ")"));
	}

	/* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't
	* explicitly specify PCMK_VALUE_DYNAMIC_LIST
	*/
	if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) {
	crm_notice("Switching to pcmk_host_check='status' for %s", dev->id);
	pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK,
	PCMK_VALUE_STATUS);
	}
	}

	if (dev->targets) {
	const char *alias = g_hash_table_lookup(dev->aliases, search->host);

	if (!alias) {
	alias = search->host;
	}
	if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) {
	can_fence = TRUE;
	}
	}
	search_devices_record_result(search, dev->id, can_fence);
	}

	/*!
	* \internal
	* \brief Returns true if any key in first is not in second or second has a different value for key
	*/
	static int
	device_params_diff(GHashTable first, GHashTable second) {
	char *key = NULL;
	char *value = NULL;
	GHashTableIter gIter;

	g_hash_table_iter_init(&gIter, first);
	while (g_hash_table_iter_next(&gIter, (void )&key, (void )&value)) {

	if(strstr(key, "CRM_meta") == key) {
	continue;
	} else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) {
	continue;
	} else {
	char *other_value = g_hash_table_lookup(second, key);

	if (!other_value \|\| !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
	crm_trace("Different value for %s: %s != %s", key, other_value, value);
	return 1;
	}
	}
	}

	return 0;
	}

	/*!
	* \internal
	* \brief Checks to see if an identical device already exists in the device_list
	*/
	static stonith_device_t *
	device_has_duplicate(const stonith_device_t *device)
	{
	stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);

	if (!dup) {
	crm_trace("No match for %s", device->id);
	return NULL;

	} else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
	crm_trace("Different agent: %s != %s", dup->agent, device->agent);
	return NULL;
	}

	/* Use calculate_operation_digest() here? */
	if (device_params_diff(device->params, dup->params) \|\|
	device_params_diff(dup->params, device->params)) {
	return NULL;
	}

	crm_trace("Match");
	return dup;
	}

	int
	stonith_device_register(xmlNode *dev, gboolean from_cib)
	{
	stonith_device_t *dup = NULL;
	stonith_device_t *device = build_device_from_xml(dev);
	guint ndevices = 0;
	int rv = pcmk_ok;

	CRM_CHECK(device != NULL, return -ENOMEM);

	/* do we have a watchdog-device? */
	if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) \|\|
	pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
	STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do {
	if (stonith_watchdog_timeout_ms <= 0) {
	crm_err("Ignoring watchdog fence device without "
	PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set.");
	rv = -ENODEV;
	/* fall through to cleanup & return */
	} else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
	STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
	crm_err("Ignoring watchdog fence device with unknown "
	"agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.",
	device->agent?device->agent:"");
	rv = -ENODEV;
	/* fall through to cleanup & return */
	} else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID,
	pcmk__str_none)) {
	crm_err("Ignoring watchdog fence device "
	"named %s !='"STONITH_WATCHDOG_ID"'.",
	device->id?device->id:"");
	rv = -ENODEV;
	/* fall through to cleanup & return */
	} else {
	if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
	pcmk__str_none)) {
	/* this either has an empty list or the targets
	configured for watchdog-fencing
	*/
	g_list_free_full(stonith_watchdog_targets, free);
	stonith_watchdog_targets = device->targets;
	device->targets = NULL;
	}
	if (node_does_watchdog_fencing(stonith_our_uname)) {
	g_list_free_full(device->targets, free);
	device->targets = stonith__parse_targets(stonith_our_uname);
	pcmk__insert_dup(device->params,
	PCMK_STONITH_HOST_LIST, stonith_our_uname);
	/* proceed as with any other stonith-device */
	break;
	}

	crm_debug("Skip registration of watchdog fence device on node not in host-list.");
	/* cleanup and fall through to more cleanup and return */
	device->targets = NULL;
	stonith_device_remove(device->id, from_cib);
	}
	free_device(device);
	return rv;
	} while (0);

	dup = device_has_duplicate(device);
	if (dup) {
	ndevices = g_hash_table_size(device_list);
	crm_debug("Device '%s' already in device list (%d active device%s)",
	device->id, ndevices, pcmk__plural_s(ndevices));
	free_device(device);
	device = dup;
	dup = g_hash_table_lookup(device_list, device->id);
	dup->dirty = FALSE;

	} else {
	stonith_device_t *old = g_hash_table_lookup(device_list, device->id);

	if (from_cib && old && old->api_registered) {
	/* If the cib is writing over an entry that is shared with a stonith client,
	* copy any pending ops that currently exist on the old entry to the new one.
	* Otherwise the pending ops will be reported as failures
	*/
	crm_info("Overwriting existing entry for %s from CIB", device->id);
	device->pending_ops = old->pending_ops;
	device->api_registered = TRUE;
	old->pending_ops = NULL;
	if (device->pending_ops) {
	mainloop_set_trigger(device->work);
	}
	}
	g_hash_table_replace(device_list, device->id, device);

	ndevices = g_hash_table_size(device_list);
	crm_notice("Added '%s' to device list (%d active device%s)",
	device->id, ndevices, pcmk__plural_s(ndevices));
	}

	if (from_cib) {
	device->cib_registered = TRUE;
	} else {
	device->api_registered = TRUE;
	}

	return pcmk_ok;
	}

	void
	stonith_device_remove(const char *id, bool from_cib)
	{
	stonith_device_t *device = g_hash_table_lookup(device_list, id);
	guint ndevices = 0;

	if (!device) {
	ndevices = g_hash_table_size(device_list);
	crm_info("Device '%s' not found (%d active device%s)",
	id, ndevices, pcmk__plural_s(ndevices));
	return;
	}

	if (from_cib) {
	device->cib_registered = FALSE;
	} else {
	device->verified = FALSE;
	device->api_registered = FALSE;
	}

	if (!device->cib_registered && !device->api_registered) {
	g_hash_table_remove(device_list, id);
	ndevices = g_hash_table_size(device_list);
	crm_info("Removed '%s' from device list (%d active device%s)",
	id, ndevices, pcmk__plural_s(ndevices));
	} else {
	crm_trace("Not removing '%s' from device list (%d active) because "
	"still registered via:%s%s",
	id, g_hash_table_size(device_list),
	(device->cib_registered? " cib" : ""),
	(device->api_registered? " api" : ""));
	}
	}

	/*!
	* \internal
	* \brief Return the number of stonith levels registered for a node
	*
	* \param[in] tp Node's topology table entry
	*
	* \return Number of non-NULL levels in topology entry
	* \note This function is used only for log messages.
	*/
	static int
	count_active_levels(const stonith_topology_t *tp)
	{
	int lpc = 0;
	int count = 0;

	for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) {
	if (tp->levels[lpc] != NULL) {
	count++;
	}
	}
	return count;
	}

	static void
	free_topology_entry(gpointer data)
	{
	stonith_topology_t *tp = data;

	int lpc = 0;

	for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) {
	if (tp->levels[lpc] != NULL) {
	g_list_free_full(tp->levels[lpc], free);
	}
	}
	free(tp->target);
	free(tp->target_value);
	free(tp->target_pattern);
	free(tp->target_attribute);
	free(tp);
	}

	void
	free_topology_list(void)
	{
	if (topology != NULL) {
	g_hash_table_destroy(topology);
	topology = NULL;
	}
	}

	void
	init_topology_list(void)
	{
	if (topology == NULL) {
	topology = pcmk__strkey_table(NULL, free_topology_entry);
	}
	}

	char *
	stonith_level_key(const xmlNode *level, enum fenced_target_by mode)
	{
	if (mode == fenced_target_by_unknown) {
	mode = unpack_level_kind(level);
	}
	switch (mode) {
	case fenced_target_by_name:
	return crm_element_value_copy(level, PCMK_XA_TARGET);

	case fenced_target_by_pattern:
	return crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN);

	case fenced_target_by_attribute:
	return crm_strdup_printf("%s=%s",
	crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE),
	crm_element_value(level, PCMK_XA_TARGET_VALUE));

	default:
	return crm_strdup_printf("unknown-%s", pcmk__xe_id(level));
	}
	}

	/*!
	* \internal
	* \brief Parse target identification from topology level XML
	*
	* \param[in] level Topology level XML to parse
	*
	* \return How to identify target of \p level
	*/
	static enum fenced_target_by
	unpack_level_kind(const xmlNode *level)
	{
	if (crm_element_value(level, PCMK_XA_TARGET) != NULL) {
	return fenced_target_by_name;
	}
	if (crm_element_value(level, PCMK_XA_TARGET_PATTERN) != NULL) {
	return fenced_target_by_pattern;
	}
	if (!stand_alone /* if standalone, there's no attribute manager */
	&& (crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL)
	&& (crm_element_value(level, PCMK_XA_TARGET_VALUE) != NULL)) {
	return fenced_target_by_attribute;
	}
	return fenced_target_by_unknown;
	}

	static stonith_key_value_t *
	parse_device_list(const char *devices)
	{
	int lpc = 0;
	int max = 0;
	int last = 0;
	stonith_key_value_t *output = NULL;

	if (devices == NULL) {
	return output;
	}

	max = strlen(devices);
	for (lpc = 0; lpc <= max; lpc++) {
	if (devices[lpc] == ',' \|\| devices[lpc] == 0) {
	char *line = strndup(devices + last, lpc - last);

	output = stonith_key_value_add(output, NULL, line);
	free(line);

	last = lpc + 1;
	}
	}

	return output;
	}

	/*!
	* \internal
	* \brief Unpack essential information from topology request XML
	*
	* \param[in] xml Request XML to search
	* \param[out] mode If not NULL, where to store level kind
	* \param[out] target If not NULL, where to store representation of target
	* \param[out] id If not NULL, where to store level number
	* \param[out] desc If not NULL, where to store log-friendly level description
	*
	* \return Topology level XML from within \p xml, or NULL if not found
	* \note The caller is responsible for freeing \p target and \p desc if set.
	*/
	static xmlNode *
	unpack_level_request(xmlNode xml, enum fenced_target_by mode, char **target,
	int id, char *desc)
	{
	enum fenced_target_by local_mode = fenced_target_by_unknown;
	char *local_target = NULL;
	int local_id = 0;

	/* The level element can be the top element or lower. If top level, don't
	* search by xpath, because it might give multiple hits if the XML is the
	* CIB.
	*/
	if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) {
	xml = get_xpath_object("//" PCMK_XE_FENCING_LEVEL, xml, LOG_WARNING);
	}

	if (xml == NULL) {
	if (desc != NULL) {
	*desc = crm_strdup_printf("missing");
	}
	} else {
	local_mode = unpack_level_kind(xml);
	local_target = stonith_level_key(xml, local_mode);
	crm_element_value_int(xml, PCMK_XA_INDEX, &local_id);
	if (desc != NULL) {
	*desc = crm_strdup_printf("%s[%d]", local_target, local_id);
	}
	}

	if (mode != NULL) {
	*mode = local_mode;
	}
	if (id != NULL) {
	*id = local_id;
	}

	if (target != NULL) {
	*target = local_target;
	} else {
	free(local_target);
	}

	return xml;
	}

	/*!
	* \internal
	* \brief Register a fencing topology level for a target
	*
	* Given an XML request specifying the target name, level index, and device IDs
	* for the level, this will create an entry for the target in the global topology
	* table if one does not already exist, then append the specified device IDs to
	* the entry's device list for the specified level.
	*
	* \param[in] msg XML request for STONITH level registration
	* \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
	* \param[out] result Where to set result of registration
	*/
	void
	fenced_register_level(xmlNode msg, char desc, pcmk__action_result_t result)
	{
	int id = 0;
	xmlNode *level;
	enum fenced_target_by mode;
	char *target;

	stonith_topology_t *tp;
	stonith_key_value_t *dIter = NULL;
	stonith_key_value_t *devices = NULL;

	CRM_CHECK((msg != NULL) && (result != NULL), return);

	level = unpack_level_request(msg, &mode, &target, &id, desc);
	if (level == NULL) {
	fenced_set_protocol_error(result);
	return;
	}

	// Ensure an ID was given (even the client API adds an ID)
	if (pcmk__str_empty(pcmk__xe_id(level))) {
	crm_warn("Ignoring registration for topology level without ID");
	free(target);
	crm_log_xml_trace(level, "Bad level");
	pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
	"Topology level is invalid without ID");
	return;
	}

	// Ensure a valid target was specified
	if (mode == fenced_target_by_unknown) {
	crm_warn("Ignoring registration for topology level '%s' "
	"without valid target", pcmk__xe_id(level));
	free(target);
	crm_log_xml_trace(level, "Bad level");
	pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
	"Invalid target for topology level '%s'",
	pcmk__xe_id(level));
	return;
	}

	// Ensure level ID is in allowed range
	if ((id < ST__LEVEL_MIN) \|\| (id > ST__LEVEL_MAX)) {
	crm_warn("Ignoring topology registration for %s with invalid level %d",
	target, id);
	free(target);
	crm_log_xml_trace(level, "Bad level");
	pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
	"Invalid level number '%s' for topology level '%s'",
	pcmk__s(crm_element_value(level, PCMK_XA_INDEX),
	""),
	pcmk__xe_id(level));
	return;
	}

	/* Find or create topology table entry */
	tp = g_hash_table_lookup(topology, target);
	if (tp == NULL) {
	tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t));

	tp->kind = mode;
	tp->target = target;
	tp->target_value = crm_element_value_copy(level, PCMK_XA_TARGET_VALUE);
	tp->target_pattern = crm_element_value_copy(level,
	PCMK_XA_TARGET_PATTERN);
	tp->target_attribute = crm_element_value_copy(level,
	PCMK_XA_TARGET_ATTRIBUTE);

	g_hash_table_replace(topology, tp->target, tp);
	crm_trace("Added %s (%d) to the topology (%d active entries)",
	target, (int) mode, g_hash_table_size(topology));
	} else {
	free(target);
	}

	if (tp->levels[id] != NULL) {
	crm_info("Adding to the existing %s[%d] topology entry",
	tp->target, id);
	}

	devices = parse_device_list(crm_element_value(level, PCMK_XA_DEVICES));
	for (dIter = devices; dIter; dIter = dIter->next) {
	const char *device = dIter->value;

	crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
	tp->levels[id] = g_list_append(tp->levels[id], pcmk__str_copy(device));
	}
	stonith_key_value_freeall(devices, 1, 1);

	{
	int nlevels = count_active_levels(tp);

	crm_info("Target %s has %d active fencing level%s",
	tp->target, nlevels, pcmk__plural_s(nlevels));
	}

	pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	}

	/*!
	* \internal
	* \brief Unregister a fencing topology level for a target
	*
	* Given an XML request specifying the target name and level index (or 0 for all
	* levels), this will remove any corresponding entry for the target from the
	* global topology table.
	*
	* \param[in] msg XML request for STONITH level registration
	* \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
	* \param[out] result Where to set result of unregistration
	*/
	void
	fenced_unregister_level(xmlNode msg, char *desc,
	pcmk__action_result_t *result)
	{
	int id = -1;
	stonith_topology_t *tp;
	char *target;
	xmlNode *level = NULL;

	CRM_CHECK(result != NULL, return);

	level = unpack_level_request(msg, NULL, &target, &id, desc);
	if (level == NULL) {
	fenced_set_protocol_error(result);
	return;
	}

	// Ensure level ID is in allowed range
	if ((id < 0) \|\| (id >= ST__LEVEL_COUNT)) {
	crm_warn("Ignoring topology unregistration for %s with invalid level %d",
	target, id);
	free(target);
	crm_log_xml_trace(level, "Bad level");
	pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
	"Invalid level number '%s' for topology level %s",
	pcmk__s(crm_element_value(level, PCMK_XA_INDEX),
	"<null>"),

	// Client API doesn't add ID to unregistration XML
	pcmk__s(pcmk__xe_id(level), ""));
	return;
	}

	tp = g_hash_table_lookup(topology, target);
	if (tp == NULL) {
	guint nentries = g_hash_table_size(topology);

	crm_info("No fencing topology found for %s (%d active %s)",
	target, nentries,
	pcmk__plural_alt(nentries, "entry", "entries"));

	} else if (id == 0 && g_hash_table_remove(topology, target)) {
	guint nentries = g_hash_table_size(topology);

	crm_info("Removed all fencing topology entries related to %s "
	"(%d active %s remaining)", target, nentries,
	pcmk__plural_alt(nentries, "entry", "entries"));

	} else if (tp->levels[id] != NULL) {
	guint nlevels;

	g_list_free_full(tp->levels[id], free);
	tp->levels[id] = NULL;

	nlevels = count_active_levels(tp);
	crm_info("Removed level %d from fencing topology for %s "
	"(%d active level%s remaining)",
	id, target, nlevels, pcmk__plural_s(nlevels));
	}

	free(target);
	pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	}

	static char *
	list_to_string(GList list, const char delim, gboolean terminate_with_delim)
	{
	int max = g_list_length(list);
	size_t delim_len = delim?strlen(delim):0;
	size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
	char *rv;
	GList *gIter;

	char *pos = NULL;
	const char *lead_delim = "";

	for (gIter = list; gIter != NULL; gIter = gIter->next) {
	const char value = (const char ) gIter->data;

	alloc_size += strlen(value);
	}

	rv = pcmk__assert_alloc(alloc_size, sizeof(char));
	pos = rv;

	for (gIter = list; gIter != NULL; gIter = gIter->next) {
	const char value = (const char ) gIter->data;

	pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
	lead_delim = delim;
	}

	if (max && terminate_with_delim) {
	sprintf(pos, "%s", delim);
	}

	return rv;
	}

	/*!
	* \internal
	* \brief Execute a fence agent action directly (and asynchronously)
	*
	* Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
	* directly on a specified device. Only list, monitor, and status actions are
	* expected to use this call, though it should work with any agent command.
	*
	* \param[in] msg Request XML specifying action
	* \param[out] result Where to store result of action
	*
	* \note If the action is monitor, the device must be registered via the API
	* (CIB registration is not sufficient), because monitor should not be
	* possible unless the device is "started" (API registered).
	*/
	static void
	execute_agent_action(xmlNode msg, pcmk__action_result_t result)
	{
	xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, msg, LOG_ERR);
	xmlNode *op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg,
	LOG_ERR);
	const char *id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
	const char *action = crm_element_value(op, PCMK__XA_ST_DEVICE_ACTION);
	async_command_t *cmd = NULL;
	stonith_device_t *device = NULL;

	if ((id == NULL) \|\| (action == NULL)) {
	crm_info("Malformed API action request: device %s, action %s",
	(id? id : "not specified"),
	(action? action : "not specified"));
	fenced_set_protocol_error(result);
	return;
	}

	if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
	// Watchdog agent actions are implemented internally
	if (stonith_watchdog_timeout_ms <= 0) {
	pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
	"Watchdog fence device not configured");
	return;

	} else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
	pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	pcmk__set_result_output(result,
	list_to_string(stonith_watchdog_targets,
	"\n", TRUE),
	NULL);
	return;

	} else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) {
	pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return;
	}
	}

	device = g_hash_table_lookup(device_list, id);
	if (device == NULL) {
	crm_info("Ignoring API '%s' action request because device %s not found",
	action, id);
	pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
	"'%s' not found", id);
	return;

	} else if (!device->api_registered
	&& (strcmp(action, PCMK_ACTION_MONITOR) == 0)) {
	// Monitors may run only on "started" (API-registered) devices
	crm_info("Ignoring API '%s' action request because device %s not active",
	action, id);
	pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
	"'%s' not active", id);
	return;
	}

	cmd = create_async_command(msg);
	if (cmd == NULL) {
	crm_log_xml_warn(msg, "invalid");
	fenced_set_protocol_error(result);
	return;
	}

	schedule_stonith_command(cmd, device);
	pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
	}

	static void
	search_devices_record_result(struct device_search_s search, const char device, gboolean can_fence)
	{
	search->replies_received++;
	if (can_fence && device) {
	if (search->support_action_only != st_device_supports_none) {
	stonith_device_t *dev = g_hash_table_lookup(device_list, device);
	if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) {
	return;
	}
	}
	search->capable = g_list_append(search->capable,
	pcmk__str_copy(device));
	}

	if (search->replies_needed == search->replies_received) {

	guint ndevices = g_list_length(search->capable);

	crm_debug("Search found %d device%s that can perform '%s' targeting %s",
	ndevices, pcmk__plural_s(ndevices),
	(search->action? search->action : "unknown action"),
	(search->host? search->host : "any node"));

	search->callback(search->capable, search->user_data);
	free(search->host);
	free(search->action);
	free(search);
	}
	}

	/*!
	* \internal
	* \brief Check whether the local host is allowed to execute a fencing action
	*
	* \param[in] device Fence device to check
	* \param[in] action Fence action to check
	* \param[in] target Hostname of fence target
	* \param[in] allow_self Whether self-fencing is allowed for this operation
	*
	* \return TRUE if local host is allowed to execute action, FALSE otherwise
	*/
	static gboolean
	localhost_is_eligible(const stonith_device_t device, const char action,
	const char *target, gboolean allow_self)
	{
	gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname,
	pcmk__str_casei);

	if ((device != NULL) && (action != NULL)
	&& (device->on_target_actions != NULL)
	&& (strstr((const char*) device->on_target_actions->str,
	action) != NULL)) {

	if (!localhost_is_target) {
	crm_trace("Operation '%s' using %s can only be executed for local "
	"host, not %s", action, device->id, target);
	return FALSE;
	}

	} else if (localhost_is_target && !allow_self) {
	crm_trace("'%s' operation does not support self-fencing", action);
	return FALSE;
	}
	return TRUE;
	}

	/*!
	* \internal
	* \brief Check if local node is allowed to execute (possibly remapped) action
	*
	* \param[in] device Fence device to check
	* \param[in] action Fence action to check
	* \param[in] target Node name of fence target
	* \param[in] allow_self Whether self-fencing is allowed for this operation
	*
	* \return true if local node is allowed to execute \p action or any actions it
	* might be remapped to, otherwise false
	*/
	static bool
	localhost_is_eligible_with_remap(const stonith_device_t *device,
	const char action, const char target,
	gboolean allow_self)
	{
	// Check exact action
	if (localhost_is_eligible(device, action, target, allow_self)) {
	return true;
	}

	// Check potential remaps

	if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
	/* "reboot" might get remapped to "off" then "on", so even if reboot is
	* disallowed, return true if either of those is allowed. We'll report
	* the disallowed actions with the results. We never allow self-fencing
	* for remapped "on" actions because the target is off at that point.
	*/
	if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self)
	\|\| localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) {
	return true;
	}
	}

	return false;
	}

	static void
	can_fence_host_with_device(stonith_device_t *dev,
	struct device_search_s *search)
	{
	gboolean can = FALSE;
	const char *check_type = "Internal bug";
	const char *target = NULL;
	const char *alias = NULL;
	const char *dev_id = "Unspecified device";
	const char *action = (search == NULL)? NULL : search->action;

	CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results);

	if (dev->id != NULL) {
	dev_id = dev->id;
	}

	target = search->host;
	if (target == NULL) {
	can = TRUE;
	check_type = "No target";
	goto search_report_results;
	}

	/* Answer immediately if the device does not support the action
	* or the local node is not allowed to perform it
	*/
	if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)
	&& !pcmk_is_set(dev->flags, st_device_supports_on)) {
	check_type = "Agent does not support 'on'";
	goto search_report_results;

	} else if (!localhost_is_eligible_with_remap(dev, action, target,
	search->allow_self)) {
	check_type = "This node is not allowed to execute action";
	goto search_report_results;
	}

	// Check eligibility as specified by pcmk_host_check
	check_type = target_list_type(dev);
	alias = g_hash_table_lookup(dev->aliases, target);
	if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) {
	can = TRUE;

	} else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST,
	pcmk__str_casei)) {

	if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) {
	can = TRUE;
	} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
	&& g_hash_table_lookup(dev->aliases, target)) {
	can = TRUE;
	}

	} else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST,
	pcmk__str_casei)) {
	time_t now = time(NULL);

	if (dev->targets == NULL \|\| dev->targets_age + 60 < now) {
	int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST,
	search->per_device_timeout);

	if (device_timeout > search->per_device_timeout) {
	crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s "
	"is larger than " PCMK_OPT_STONITH_TIMEOUT
	" (%ds), timeout may occur",
	device_timeout, dev_id, search->per_device_timeout);
	}

	crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
	check_type, dev_id, target, action);

	schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL,
	search->per_device_timeout, search, dynamic_list_search_cb);

	/* we'll respond to this search request async in the cb */
	return;
	}

	if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets,
	pcmk__str_casei)) {
	can = TRUE;
	}

	} else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) {
	int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout);

	if (device_timeout > search->per_device_timeout) {
	crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is "
	"larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), "
	"timeout may occur",
	device_timeout, dev_id, search->per_device_timeout);
	}

	crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
	check_type, dev_id, target, action);
	schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target,
	search->per_device_timeout, search, status_search_cb);
	/* we'll respond to this search request async in the cb */
	return;
	} else {
	crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type);
	check_type = "Invalid " PCMK_STONITH_HOST_CHECK;
	}

	search_report_results:
	crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s",
	dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"),
	pcmk__s(target, "unspecified target"),
	(alias == NULL)? "" : " (as '", pcmk__s(alias, ""),
	(alias == NULL)? "" : "')", check_type);
	search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can);
	}

	static void
	search_devices(gpointer key, gpointer value, gpointer user_data)
	{
	stonith_device_t *dev = value;
	struct device_search_s *search = user_data;

	can_fence_host_with_device(dev, search);
	}

	#define DEFAULT_QUERY_TIMEOUT 20
	static void
	get_capable_devices(const char host, const char action, int timeout,
	bool allow_self, void *user_data,
	void (callback) (GList devices, void *user_data),
	uint32_t support_action_only)
	{
	struct device_search_s *search;
	guint ndevices = g_hash_table_size(device_list);

	if (ndevices == 0) {
	callback(NULL, user_data);
	return;
	}

	search = pcmk__assert_alloc(1, sizeof(struct device_search_s));

	search->host = pcmk__str_copy(host);
	search->action = pcmk__str_copy(action);
	search->per_device_timeout = timeout;
	search->allow_self = allow_self;
	search->callback = callback;
	search->user_data = user_data;
	search->support_action_only = support_action_only;

	/* We are guaranteed this many replies, even if a device is
	* unregistered while the search is in progress.
	*/
	search->replies_needed = ndevices;

	crm_debug("Searching %d device%s to see which can execute '%s' targeting %s",
	ndevices, pcmk__plural_s(ndevices),
	(search->action? search->action : "unknown action"),
	(search->host? search->host : "any node"));
	g_hash_table_foreach(device_list, search_devices, search);
	}

	struct st_query_data {
	xmlNode *reply;
	char *remote_peer;
	char *client_id;
	char *target;
	char *action;
	int call_options;
	};

	/*!
	* \internal
	* \brief Add action-specific attributes to query reply XML
	*
	* \param[in,out] xml XML to add attributes to
	* \param[in] action Fence action
	* \param[in] device Fence device
	* \param[in] target Fence target
	*/
	static void
	add_action_specific_attributes(xmlNode xml, const char action,
	const stonith_device_t *device,
	const char *target)
	{
	int action_specific_timeout;
	int delay_max;
	int delay_base;

	CRM_CHECK(xml && action && device, return);

	// PCMK__XA_ST_REQUIRED is currently used only for unfencing
	if (is_action_required(action, device)) {
	crm_trace("Action '%s' is required using %s", action, device->id);
	crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1);
	}

	// pcmk_<action>_timeout if configured
	action_specific_timeout = get_action_timeout(device, action, 0);
	if (action_specific_timeout) {
	crm_trace("Action '%s' has timeout %ds using %s",
	action, action_specific_timeout, device->id);
	crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT,
	action_specific_timeout);
	}

	delay_max = get_action_delay_max(device, action);
	if (delay_max > 0) {
	crm_trace("Action '%s' has maximum random delay %ds using %s",
	action, delay_max, device->id);
	crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max);
	}

	delay_base = get_action_delay_base(device, action, target);
	if (delay_base > 0) {
	crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base);
	}

	if ((delay_max > 0) && (delay_base == 0)) {
	crm_trace("Action '%s' has maximum random delay %ds using %s",
	action, delay_max, device->id);
	} else if ((delay_max == 0) && (delay_base > 0)) {
	crm_trace("Action '%s' has a static delay of %ds using %s",
	action, delay_base, device->id);
	} else if ((delay_max > 0) && (delay_base > 0)) {
	crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen "
	"maximum delay of %ds using %s",
	action, delay_base, delay_max, device->id);
	}
	}

	/*!
	* \internal
	* \brief Add "disallowed" attribute to query reply XML if appropriate
	*
	* \param[in,out] xml XML to add attribute to
	* \param[in] action Fence action
	* \param[in] device Fence device
	* \param[in] target Fence target
	* \param[in] allow_self Whether self-fencing is allowed
	*/
	static void
	add_disallowed(xmlNode xml, const char action, const stonith_device_t *device,
	const char *target, gboolean allow_self)
	{
	if (!localhost_is_eligible(device, action, target, allow_self)) {
	crm_trace("Action '%s' using %s is disallowed for local host",
	action, device->id);
	pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true);
	}
	}

	/*!
	* \internal
	* \brief Add child element with action-specific values to query reply XML
	*
	* \param[in,out] xml XML to add attribute to
	* \param[in] action Fence action
	* \param[in] device Fence device
	* \param[in] target Fence target
	* \param[in] allow_self Whether self-fencing is allowed
	*/
	static void
	add_action_reply(xmlNode xml, const char action,
	const stonith_device_t device, const char target,
	gboolean allow_self)
	{
	xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION);

	crm_xml_add(child, PCMK_XA_ID, action);
	add_action_specific_attributes(child, action, device, target);
	add_disallowed(child, action, device, target, allow_self);
	}

	/*!
	* \internal
	* \brief Send a reply to a CPG peer or IPC client
	*
	* \param[in] reply XML reply to send
	* \param[in] call_options Send synchronously if st_opt_sync_call is set
	* \param[in] remote_peer If not NULL, name of peer node to send CPG reply
	* \param[in,out] client If not NULL, client to send IPC reply
	*/
	static void
	stonith_send_reply(const xmlNode *reply, int call_options,
	const char remote_peer, pcmk__client_t client)
	{
	CRM_CHECK((reply != NULL) && ((remote_peer != NULL) \|\| (client != NULL)),
	return);

	if (remote_peer == NULL) {
	do_local_reply(reply, client, call_options);
	} else {
	const crm_node_t *node =
	pcmk__get_node(0, remote_peer, NULL,
	pcmk__node_search_cluster_member);

	pcmk__cluster_send_message(node, crm_msg_stonith_ng, reply);
	}
	}

	static void
	stonith_query_capable_device_cb(GList * devices, void *user_data)
	{
	struct st_query_data *query = user_data;
	int available_devices = 0;
	xmlNode *wrapper = NULL;
	xmlNode *list = NULL;
	GList *lpc = NULL;
	pcmk__client_t *client = NULL;

	if (query->client_id != NULL) {
	client = pcmk__find_client_by_id(query->client_id);
	if ((client == NULL) && (query->remote_peer == NULL)) {
	crm_trace("Skipping reply to %s: no longer a client",
	query->client_id);
	goto done;
	}
	}

	// Pack the results into XML
	wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA);
	list = pcmk__xe_create(wrapper, __func__);
	crm_xml_add(list, PCMK__XA_ST_TARGET, query->target);

	for (lpc = devices; lpc != NULL; lpc = lpc->next) {
	stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
	const char *action = query->action;
	xmlNode *dev = NULL;

	if (!device) {
	/* It is possible the device got unregistered while
	* determining who can fence the target */
	continue;
	}

	available_devices++;

	dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID);
	crm_xml_add(dev, PCMK_XA_ID, device->id);
	crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace);
	crm_xml_add(dev, PCMK_XA_AGENT, device->agent);

	// Has had successful monitor, list, or status on this node
	crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, device->verified);

	crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags);

	/* If the originating fencer wants to reboot the node, and we have a
	* capable device that doesn't support "reboot", remap to "off" instead.
	*/
	if (!pcmk_is_set(device->flags, st_device_supports_reboot)
	&& pcmk__str_eq(query->action, PCMK_ACTION_REBOOT,
	pcmk__str_none)) {
	crm_trace("%s doesn't support reboot, using values for off instead",
	device->id);
	action = PCMK_ACTION_OFF;
	}

	/* Add action-specific values if available */
	add_action_specific_attributes(dev, action, device, query->target);
	if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
	/* A "reboot" might get remapped to "off" then "on", so after
	* sending the "reboot"-specific values in the main element, we add
	* sub-elements for "off" and "on" values.
	*
	* We short-circuited earlier if "reboot", "off" and "on" are all
	* disallowed for the local host. However if only one or two are
	* disallowed, we send back the results and mark which ones are
	* disallowed. If "reboot" is disallowed, this might cause problems
	* with older fencer versions, which won't check for it. Older
	* versions will ignore "off" and "on", so they are not a problem.
	*/
	add_disallowed(dev, action, device, query->target,
	pcmk_is_set(query->call_options,
	st_opt_allow_self_fencing));
	add_action_reply(dev, PCMK_ACTION_OFF, device, query->target,
	pcmk_is_set(query->call_options,
	st_opt_allow_self_fencing));
	add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE);
	}

	/* A query without a target wants device parameters */
	if (query->target == NULL) {
	xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES);

	g_hash_table_foreach(device->params, hash2field, attrs);
	}
	}

	crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices);
	if (query->target) {
	crm_debug("Found %d matching device%s for target '%s'",
	available_devices, pcmk__plural_s(available_devices),
	query->target);
	} else {
	crm_debug("%d device%s installed",
	available_devices, pcmk__plural_s(available_devices));
	}

	crm_log_xml_trace(list, "query-result");

	stonith_send_reply(query->reply, query->call_options, query->remote_peer,
	client);

	done:
	free_xml(query->reply);
	free(query->remote_peer);
	free(query->client_id);
	free(query->target);
	free(query->action);
	free(query);
	g_list_free_full(devices, free);
	}

	/*!
	* \internal
	* \brief Log the result of an asynchronous command
	*
	* \param[in] cmd Command the result is for
	* \param[in] result Result of command
	* \param[in] pid Process ID of command, if available
	* \param[in] next Alternate device that will be tried if command failed
	* \param[in] op_merged Whether this command was merged with an earlier one
	*/
	static void
	log_async_result(const async_command_t *cmd,
	const pcmk__action_result_t *result,
	int pid, const char *next, bool op_merged)
	{
	int log_level = LOG_ERR;
	int output_log_level = LOG_NEVER;
	guint devices_remaining = g_list_length(cmd->next_device_iter);

	GString *msg = g_string_sized_new(80); // Reasonable starting size

	// Choose log levels appropriately if we have a result
	if (pcmk__result_ok(result)) {
	log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE;
	if ((result->action_stdout != NULL)
	&& !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
	pcmk__str_none)) {
	output_log_level = LOG_DEBUG;
	}
	next = NULL;
	} else {
	log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR;
	if ((result->action_stdout != NULL)
	&& !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
	pcmk__str_none)) {
	output_log_level = LOG_WARNING;
	}
	}

	// Build the log message piece by piece
	pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL);
	if (pid != 0) {
	g_string_append_printf(msg, "[%d] ", pid);
	}
	if (cmd->target != NULL) {
	pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL);
	}
	if (cmd->device != NULL) {
	pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL);
	}

	// Add exit status or execution status as appropriate
	if (result->execution_status == PCMK_EXEC_DONE) {
	g_string_append_printf(msg, "returned %d", result->exit_status);
	} else {
	pcmk__g_strcat(msg, "could not be executed: ",
	pcmk_exec_status_str(result->execution_status), NULL);
	}

	// Add exit reason and next device if appropriate
	if (result->exit_reason != NULL) {
	pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL);
	}
	if (next != NULL) {
	pcmk__g_strcat(msg, ", retrying with ", next, NULL);
	}
	if (devices_remaining > 0) {
	g_string_append_printf(msg, " (%u device%s remaining)",
	(unsigned int) devices_remaining,
	pcmk__plural_s(devices_remaining));
	}
	g_string_append_printf(msg, " " CRM_XS " %scall %d from %s",
	(op_merged? "merged " : ""), cmd->id,
	cmd->client_name);

	// Log the result
	do_crm_log(log_level, "%s", msg->str);
	g_string_free(msg, TRUE);

	// Log the output (which may have multiple lines), if appropriate
	if (output_log_level != LOG_NEVER) {
	char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);

	crm_log_output(output_log_level, prefix, result->action_stdout);
	free(prefix);
	}
	}

	/*!
	* \internal
	* \brief Reply to requester after asynchronous command completion
	*
	* \param[in] cmd Command that completed
	* \param[in] result Result of command
	* \param[in] pid Process ID of command, if available
	* \param[in] merged If true, command was merged with another, not executed
	*/
	static void
	send_async_reply(const async_command_t cmd, const pcmk__action_result_t result,
	int pid, bool merged)
	{
	xmlNode *reply = NULL;
	pcmk__client_t *client = NULL;

	CRM_CHECK((cmd != NULL) && (result != NULL), return);

	log_async_result(cmd, result, pid, NULL, merged);

	if (cmd->client != NULL) {
	client = pcmk__find_client_by_id(cmd->client);
	if ((client == NULL) && (cmd->origin == NULL)) {
	crm_trace("Skipping reply to %s: no longer a client", cmd->client);
	return;
	}
	}

	reply = construct_async_reply(cmd, result);
	if (merged) {
	pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true);
	}

	if (!stand_alone && pcmk__is_fencing_action(cmd->action)
	&& pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) {
	/* The target was also the originator, so broadcast the result on its
	* behalf (since it will be unable to).
	*/
	crm_trace("Broadcast '%s' result for %s (target was also originator)",
	cmd->action, cmd->target);
	crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
	crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
	pcmk__cluster_send_message(NULL, crm_msg_stonith_ng, reply);
	} else {
	// Reply only to the originator
	stonith_send_reply(reply, cmd->options, cmd->origin, client);
	}

	crm_log_xml_trace(reply, "Reply");
	free_xml(reply);

	if (stand_alone) {
	/* Do notification with a clean data object */
	xmlNode *notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_NOTIFY_FENCE);

	stonith__xe_set_result(notify_data, result);
	crm_xml_add(notify_data, PCMK__XA_ST_TARGET, cmd->target);
	crm_xml_add(notify_data, PCMK__XA_ST_OP, cmd->op);
	crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, "localhost");
	crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, cmd->device);
	crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id);
	crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, cmd->client);

	fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, result,
	notify_data);
	fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
	}
	}

	static void
	cancel_stonith_command(async_command_t * cmd)
	{
	stonith_device_t *device = cmd_device(cmd);

	if (device) {
	crm_trace("Cancel scheduled '%s' action using %s",
	cmd->action, device->id);
	device->pending_ops = g_list_remove(device->pending_ops, cmd);
	}
	}

	/*!
	* \internal
	* \brief Cancel and reply to any duplicates of a just-completed operation
	*
	* Check whether any fencing operations are scheduled to do the same thing as
	* one that just succeeded. If so, rather than performing the same operation
	* twice, return the result of this operation for all matching pending commands.
	*
	* \param[in,out] cmd Fencing operation that just succeeded
	* \param[in] result Result of \p cmd
	* \param[in] pid If nonzero, process ID of agent invocation (for logs)
	*
	* \note Duplicate merging will do the right thing for either type of remapped
	* reboot. If the executing fencer remapped an unsupported reboot to off,
	* then cmd->action will be "reboot" and will be merged with any other
	* reboot requests. If the originating fencer remapped a topology reboot
	* to off then on, we will get here once with cmd->action "off" and once
	* with "on", and they will be merged separately with similar requests.
	*/
	static void
	reply_to_duplicates(async_command_t cmd, const pcmk__action_result_t result,
	int pid)
	{
	GList *next = NULL;

	for (GList *iter = cmd_list; iter != NULL; iter = next) {
	async_command_t *cmd_other = iter->data;

	next = iter->next; // We might delete this entry, so grab next now

	if (cmd == cmd_other) {
	continue;
	}

	/* A pending operation matches if:
	* 1. The client connections are different.
	* 2. The target is the same.
	* 3. The fencing action is the same.
	* 4. The device scheduled to execute the action is the same.
	*/
	if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) \|\|
	!pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) \|\|
	!pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) \|\|
	!pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {

	continue;
	}

	crm_notice("Merging fencing action '%s'%s%s originating from "
	"client %s with identical fencing request from client %s",
	cmd_other->action,
	(cmd_other->target == NULL)? "" : " targeting ",
	pcmk__s(cmd_other->target, ""), cmd_other->client_name,
	cmd->client_name);

	// Stop tracking the duplicate, send its result, and cancel it
	cmd_list = g_list_remove_link(cmd_list, iter);
	send_async_reply(cmd_other, result, pid, true);
	cancel_stonith_command(cmd_other);

	free_async_command(cmd_other);
	g_list_free_1(iter);
	}
	}

	/*!
	* \internal
	* \brief Return the next required device (if any) for an operation
	*
	* \param[in,out] cmd Fencing operation that just succeeded
	*
	* \return Next device required for action if any, otherwise NULL
	*/
	static stonith_device_t *
	next_required_device(async_command_t *cmd)
	{
	for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) {
	stonith_device_t *next_device = g_hash_table_lookup(device_list,
	iter->data);

	if (is_action_required(cmd->action, next_device)) {
	/* This is only called for successful actions, so it's OK to skip
	* non-required devices.
	*/
	cmd->next_device_iter = iter->next;
	return next_device;
	}
	}
	return NULL;
	}

	static void
	st_child_done(int pid, const pcmk__action_result_t result, void user_data)
	{
	async_command_t *cmd = user_data;

	stonith_device_t *device = NULL;
	stonith_device_t *next_device = NULL;

	CRM_CHECK(cmd != NULL, return);

	device = cmd_device(cmd);
	cmd->active_on = NULL;

	/* The device is ready to do something else now */
	if (device) {
	if (!device->verified && pcmk__result_ok(result)
	&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST,
	PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS,
	NULL)) {

	device->verified = TRUE;
	}

	mainloop_set_trigger(device->work);
	}

	if (pcmk__result_ok(result)) {
	next_device = next_required_device(cmd);

	} else if ((cmd->next_device_iter != NULL)
	&& !is_action_required(cmd->action, device)) {
	/* if this device didn't work out, see if there are any others we can try.
	* if the failed device was 'required', we can't pick another device. */
	next_device = g_hash_table_lookup(device_list,
	cmd->next_device_iter->data);
	cmd->next_device_iter = cmd->next_device_iter->next;
	}

	if (next_device == NULL) {
	send_async_reply(cmd, result, pid, false);
	if (pcmk__result_ok(result)) {
	reply_to_duplicates(cmd, result, pid);
	}
	free_async_command(cmd);

	} else { // This operation requires more fencing
	log_async_result(cmd, result, pid, next_device->id, false);
	schedule_stonith_command(cmd, next_device);
	}
	}

	static gint
	sort_device_priority(gconstpointer a, gconstpointer b)
	{
	const stonith_device_t *dev_a = a;
	const stonith_device_t *dev_b = b;

	if (dev_a->priority > dev_b->priority) {
	return -1;
	} else if (dev_a->priority < dev_b->priority) {
	return 1;
	}
	return 0;
	}

	static void
	stonith_fence_get_devices_cb(GList * devices, void *user_data)
	{
	async_command_t *cmd = user_data;
	stonith_device_t *device = NULL;
	guint ndevices = g_list_length(devices);

	crm_info("Found %d matching device%s for target '%s'",
	ndevices, pcmk__plural_s(ndevices), cmd->target);

	if (devices != NULL) {
	/* Order based on priority */
	devices = g_list_sort(devices, sort_device_priority);
	device = g_hash_table_lookup(device_list, devices->data);
	}

	if (device == NULL) { // No device found
	pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;

	pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
	"No device configured for target '%s'",
	cmd->target);
	send_async_reply(cmd, &result, 0, false);
	pcmk__reset_result(&result);
	free_async_command(cmd);
	g_list_free_full(devices, free);

	} else { // Device found, schedule it for fencing
	cmd->device_list = devices;
	cmd->next_device_iter = devices->next;
	schedule_stonith_command(cmd, device);
	}
	}

	/*!
	* \internal
	* \brief Execute a fence action via the local node
	*
	* \param[in] msg Fencing request
	* \param[out] result Where to store result of fence action
	*/
	static void
	fence_locally(xmlNode msg, pcmk__action_result_t result)
	{
	const char *device_id = NULL;
	stonith_device_t *device = NULL;
	async_command_t *cmd = NULL;
	xmlNode *dev = NULL;

	CRM_CHECK((msg != NULL) && (result != NULL), return);

	dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR);

	cmd = create_async_command(msg);
	if (cmd == NULL) {
	crm_log_xml_warn(msg, "invalid");
	fenced_set_protocol_error(result);
	return;
	}

	device_id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
	if (device_id != NULL) {
	device = g_hash_table_lookup(device_list, device_id);
	if (device == NULL) {
	crm_err("Requested device '%s' is not available", device_id);
	pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
	"Requested device '%s' not found", device_id);
	return;
	}
	schedule_stonith_command(cmd, device);

	} else {
	const char *host = crm_element_value(dev, PCMK__XA_ST_TARGET);

	if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) {
	int nodeid = 0;
	crm_node_t *node = NULL;

	pcmk__scan_min_int(host, &nodeid, 0);
	- node = pcmk__search_node_caches(nodeid, NULL,
	+ node = pcmk__search_node_caches(nodeid, NULL, NULL,
	pcmk__node_search_any
	\|pcmk__node_search_cluster_cib);
	if (node != NULL) {
	host = node->uname;
	}
	}

	/* If we get to here, then self-fencing is implicitly allowed */
	get_capable_devices(host, cmd->action, cmd->default_timeout,
	TRUE, cmd, stonith_fence_get_devices_cb,
	fenced_support_flag(cmd->action));
	}

	pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
	}

	/*!
	* \internal
	* \brief Build an XML reply for a fencing operation
	*
	* \param[in] request Request that reply is for
	* \param[in] data If not NULL, add to reply as call data
	* \param[in] result Full result of fencing operation
	*
	* \return Newly created XML reply
	* \note The caller is responsible for freeing the result.
	* \note This has some overlap with construct_async_reply(), but that copies
	* values from an async_command_t, whereas this one copies them from the
	* request.
	*/
	xmlNode *
	fenced_construct_reply(const xmlNode request, xmlNode data,
	const pcmk__action_result_t *result)
	{
	xmlNode *reply = NULL;

	reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);

	crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__);
	crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
	stonith__xe_set_result(reply, result);

	if (request == NULL) {
	/* Most likely, this is the result of a stonith operation that was
	* initiated before we came up. Unfortunately that means we lack enough
	* information to provide clients with a full result.
	*
	* @TODO Maybe synchronize this information at start-up?
	*/
	crm_warn("Missing request information for client notifications for "
	"operation with result '%s' (initiated before we came up?)",
	pcmk_exec_status_str(result->execution_status));

	} else {
	const char *name = NULL;
	const char *value = NULL;

	// Attributes to copy from request to reply
	const char *names[] = {
	PCMK__XA_ST_OP,
	PCMK__XA_ST_CALLID,
	PCMK__XA_ST_CLIENTID,
	PCMK__XA_ST_CLIENTNAME,
	PCMK__XA_ST_REMOTE_OP,
	PCMK__XA_ST_CALLOPT,
	};

	for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
	name = names[lpc];
	value = crm_element_value(request, name);
	crm_xml_add(reply, name, value);
	}
	if (data != NULL) {
	xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA);

	pcmk__xml_copy(wrapper, data);
	}
	}
	return reply;
	}

	/*!
	* \internal
	* \brief Build an XML reply to an asynchronous fencing command
	*
	* \param[in] cmd Fencing command that reply is for
	* \param[in] result Command result
	*/
	static xmlNode *
	construct_async_reply(const async_command_t *cmd,
	const pcmk__action_result_t *result)
	{
	xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);

	crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__);
	crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
	crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op);
	crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device);
	crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id);
	crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client);
	crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name);
	crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target);
	crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op);
	crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin);
	crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id);
	crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options);

	stonith__xe_set_result(reply, result);
	return reply;
	}

	bool fencing_peer_active(crm_node_t *peer)
	{
	if (peer == NULL) {
	return FALSE;
	} else if (peer->uname == NULL) {
	return FALSE;
	} else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
	return TRUE;
	}
	return FALSE;
	}

	void
	set_fencing_completed(remote_fencing_op_t *op)
	{
	struct timespec tv;

	qb_util_timespec_from_epoch_get(&tv);
	op->completed = tv.tv_sec;
	op->completed_nsec = tv.tv_nsec;
	}

	/*!
	* \internal
	* \brief Look for alternate node needed if local node shouldn't fence target
	*
	* \param[in] target Node that must be fenced
	*
	* \return Name of an alternate node that should fence \p target if any,
	* or NULL otherwise
	*/
	static const char *
	check_alternate_host(const char *target)
	{
	if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
	GHashTableIter gIter;
	crm_node_t *entry = NULL;

	g_hash_table_iter_init(&gIter, crm_peer_cache);
	while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
	if (fencing_peer_active(entry)
	&& !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) {
	crm_notice("Forwarding self-fencing request to %s",
	entry->uname);
	return entry->uname;
	}
	}
	crm_warn("Will handle own fencing because no peer can");
	}
	return NULL;
	}

	static void
	remove_relay_op(xmlNode * request)
	{
	xmlNode *dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request,
	LOG_TRACE);
	const char *relay_op_id = NULL;
	const char *op_id = NULL;
	const char *client_name = NULL;
	const char *target = NULL;
	remote_fencing_op_t *relay_op = NULL;

	if (dev) {
	target = crm_element_value(dev, PCMK__XA_ST_TARGET);
	}

	relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP_RELAY);
	op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP);
	client_name = crm_element_value(request, PCMK__XA_ST_CLIENTNAME);

	/* Delete RELAY operation. */
	if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
	relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);

	if (relay_op) {
	GHashTableIter iter;
	remote_fencing_op_t *list_op = NULL;
	g_hash_table_iter_init(&iter, stonith_remote_op_list);

	/* If the operation to be deleted is registered as a duplicate, delete the registration. */
	while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
	GList *dup_iter = NULL;
	if (list_op != relay_op) {
	for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) {
	remote_fencing_op_t *other = dup_iter->data;
	if (other == relay_op) {
	other->duplicates = g_list_remove(other->duplicates, relay_op);
	break;
	}
	}
	}
	}
	crm_debug("Deleting relay op %s ('%s'%s%s for %s), "
	"replaced by op %s ('%s'%s%s for %s)",
	relay_op->id, relay_op->action,
	(relay_op->target == NULL)? "" : " targeting ",
	pcmk__s(relay_op->target, ""),
	relay_op->client_name, op_id, relay_op->action,
	(target == NULL)? "" : " targeting ", pcmk__s(target, ""),
	client_name);

	g_hash_table_remove(stonith_remote_op_list, relay_op_id);
	}
	}
	}

	/*!
	* \internal
	* \brief Check whether an API request was sent by a privileged user
	*
	* API commands related to fencing configuration may be done only by privileged
	* IPC users (i.e. root or hacluster), because all other users should go through
	* the CIB to have ACLs applied. If no client was given, this is a peer request,
	* which is always allowed.
	*
	* \param[in] c IPC client that sent request (or NULL if sent by CPG peer)
	* \param[in] op Requested API operation (for logging only)
	*
	* \return true if sender is peer or privileged client, otherwise false
	*/
	static inline bool
	is_privileged(const pcmk__client_t c, const char op)
	{
	if ((c == NULL) \|\| pcmk_is_set(c->flags, pcmk__client_privileged)) {
	return true;
	} else {
	crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
	pcmk__s(op, ""), pcmk__client_name(c));
	return false;
	}
	}

	// CRM_OP_REGISTER
	static xmlNode *
	handle_register_request(pcmk__request_t *request)
	{
	xmlNode *reply = pcmk__xe_create(NULL, "reply");

	pcmk__assert(request->ipc_client != NULL);
	crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER);
	crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	pcmk__set_request_flags(request, pcmk__request_reuse_options);
	return reply;
	}

	// STONITH_OP_EXEC
	static xmlNode *
	handle_agent_request(pcmk__request_t *request)
	{
	execute_agent_action(request->xml, &request->result);
	if (request->result.execution_status == PCMK_EXEC_PENDING) {
	return NULL;
	}
	return fenced_construct_reply(request->xml, NULL, &request->result);
	}

	// STONITH_OP_TIMEOUT_UPDATE
	static xmlNode *
	handle_update_timeout_request(pcmk__request_t *request)
	{
	const char *call_id = crm_element_value(request->xml, PCMK__XA_ST_CALLID);
	const char *client_id = crm_element_value(request->xml,
	PCMK__XA_ST_CLIENTID);
	int op_timeout = 0;

	crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout);
	do_stonith_async_timeout_update(client_id, call_id, op_timeout);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	}

	// STONITH_OP_QUERY
	static xmlNode *
	handle_query_request(pcmk__request_t *request)
	{
	int timeout = 0;
	xmlNode *dev = NULL;
	const char *action = NULL;
	const char *target = NULL;
	const char *client_id = crm_element_value(request->xml,
	PCMK__XA_ST_CLIENTID);
	struct st_query_data *query = NULL;

	if (request->peer != NULL) {
	// Record it for the future notification
	create_remote_stonith_op(client_id, request->xml, TRUE);
	}

	/* Delete the DC node RELAY operation. */
	remove_relay_op(request->xml);

	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);

	dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request->xml,
	LOG_NEVER);
	if (dev != NULL) {
	const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);

	if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
	return NULL; // No query or reply necessary
	}
	target = crm_element_value(dev, PCMK__XA_ST_TARGET);
	action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION);
	}

	crm_log_xml_trace(request->xml, "Query");

	query = pcmk__assert_alloc(1, sizeof(struct st_query_data));

	query->reply = fenced_construct_reply(request->xml, NULL, &request->result);
	query->remote_peer = pcmk__str_copy(request->peer);
	query->client_id = pcmk__str_copy(client_id);
	query->target = pcmk__str_copy(target);
	query->action = pcmk__str_copy(action);
	query->call_options = request->call_options;

	crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout);
	get_capable_devices(target, action, timeout,
	pcmk_is_set(query->call_options,
	st_opt_allow_self_fencing),
	query, stonith_query_capable_device_cb, st_device_supports_none);
	return NULL;
	}

	// STONITH_OP_NOTIFY
	static xmlNode *
	handle_notify_request(pcmk__request_t *request)
	{
	const char *flag_name = NULL;

	pcmk__assert(request->ipc_client != NULL);
	flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE);
	if (flag_name != NULL) {
	crm_debug("Enabling %s callbacks for client %s",
	flag_name, pcmk__request_origin(request));
	pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name));
	}

	flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE);
	if (flag_name != NULL) {
	crm_debug("Disabling %s callbacks for client %s",
	flag_name, pcmk__request_origin(request));
	pcmk__clear_client_flags(request->ipc_client,
	get_stonith_flag(flag_name));
	}

	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	pcmk__set_request_flags(request, pcmk__request_reuse_options);

	return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL,
	CRM_EX_OK);
	}

	// STONITH_OP_RELAY
	static xmlNode *
	handle_relay_request(pcmk__request_t *request)
	{
	xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml,
	LOG_TRACE);

	crm_notice("Received forwarded fencing request from "
	"%s %s to fence (%s) peer %s",
	pcmk__request_origin_type(request),
	pcmk__request_origin(request),
	crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION),
	crm_element_value(dev, PCMK__XA_ST_TARGET));

	if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) {
	fenced_set_protocol_error(&request->result);
	return fenced_construct_reply(request->xml, NULL, &request->result);
	}

	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
	return NULL;
	}

	// STONITH_OP_FENCE
	static xmlNode *
	handle_fence_request(pcmk__request_t *request)
	{
	if ((request->peer != NULL) \|\| stand_alone) {
	fence_locally(request->xml, &request->result);

	} else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) {
	switch (fenced_handle_manual_confirmation(request->ipc_client,
	request->xml)) {
	case pcmk_rc_ok:
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
	NULL);
	break;
	case EINPROGRESS:
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
	NULL);
	break;
	default:
	fenced_set_protocol_error(&request->result);
	break;
	}

	} else {
	const char *alternate_host = NULL;
	xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml,
	LOG_TRACE);
	const char *target = crm_element_value(dev, PCMK__XA_ST_TARGET);
	const char *action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION);
	const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);

	if (request->ipc_client != NULL) {
	int tolerance = 0;

	crm_notice("Client %s wants to fence (%s) %s using %s",
	pcmk__request_origin(request), action,
	target, (device? device : "any device"));
	crm_element_value_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance);
	if (stonith_check_fence_tolerance(tolerance, target, action)) {
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
	NULL);
	return fenced_construct_reply(request->xml, NULL,
	&request->result);
	}
	alternate_host = check_alternate_host(target);

	} else {
	crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
	request->peer, action, target,
	(device == NULL)? "(any)" : device);
	}

	if (alternate_host != NULL) {
	const char *client_id = NULL;
	remote_fencing_op_t *op = NULL;
	crm_node_t *node = pcmk__get_node(0, alternate_host, NULL,
	pcmk__node_search_cluster_member);

	if (request->ipc_client->id == 0) {
	client_id = crm_element_value(request->xml,
	PCMK__XA_ST_CLIENTID);
	} else {
	client_id = request->ipc_client->id;
	}

	/* Create a duplicate fencing operation to relay with the client ID.
	* When a query response is received, this operation should be
	* deleted to avoid keeping the duplicate around.
	*/
	op = create_remote_stonith_op(client_id, request->xml, FALSE);

	crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY);
	crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID,
	request->ipc_client->id);
	crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id);
	pcmk__cluster_send_message(node, crm_msg_stonith_ng, request->xml);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
	NULL);

	} else if (initiate_remote_stonith_op(request->ipc_client, request->xml,
	FALSE) == NULL) {
	fenced_set_protocol_error(&request->result);

	} else {
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
	NULL);
	}
	}

	if (request->result.execution_status == PCMK_EXEC_PENDING) {
	return NULL;
	}
	return fenced_construct_reply(request->xml, NULL, &request->result);
	}

	// STONITH_OP_FENCE_HISTORY
	static xmlNode *
	handle_history_request(pcmk__request_t *request)
	{
	xmlNode *reply = NULL;
	xmlNode *data = NULL;

	stonith_fence_history(request->xml, &data, request->peer,
	request->call_options);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) {
	/* When the local node broadcasts its history, it sets
	* st_opt_discard_reply and doesn't need a reply.
	*/
	reply = fenced_construct_reply(request->xml, data, &request->result);
	}
	free_xml(data);
	return reply;
	}

	// STONITH_OP_DEVICE_ADD
	static xmlNode *
	handle_device_add_request(pcmk__request_t *request)
	{
	const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
	xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml,
	LOG_ERR);

	if (is_privileged(request->ipc_client, op)) {
	int rc = stonith_device_register(dev, FALSE);

	pcmk__set_result(&request->result,
	((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
	stonith__legacy2status(rc),
	((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
	} else {
	pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
	PCMK_EXEC_INVALID,
	"Unprivileged users must register device via CIB");
	}
	fenced_send_config_notification(op, &request->result,
	(dev == NULL)? NULL : pcmk__xe_id(dev));
	return fenced_construct_reply(request->xml, NULL, &request->result);
	}

	// STONITH_OP_DEVICE_DEL
	static xmlNode *
	handle_device_delete_request(pcmk__request_t *request)
	{
	xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml,
	LOG_ERR);
	const char *device_id = crm_element_value(dev, PCMK_XA_ID);
	const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);

	if (is_privileged(request->ipc_client, op)) {
	stonith_device_remove(device_id, false);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	} else {
	pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
	PCMK_EXEC_INVALID,
	"Unprivileged users must delete device via CIB");
	}
	fenced_send_config_notification(op, &request->result, device_id);
	return fenced_construct_reply(request->xml, NULL, &request->result);
	}

	// STONITH_OP_LEVEL_ADD
	static xmlNode *
	handle_level_add_request(pcmk__request_t *request)
	{
	char *desc = NULL;
	const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);

	if (is_privileged(request->ipc_client, op)) {
	fenced_register_level(request->xml, &desc, &request->result);
	} else {
	unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
	pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
	PCMK_EXEC_INVALID,
	"Unprivileged users must add level via CIB");
	}
	fenced_send_config_notification(op, &request->result, desc);
	free(desc);
	return fenced_construct_reply(request->xml, NULL, &request->result);
	}

	// STONITH_OP_LEVEL_DEL
	static xmlNode *
	handle_level_delete_request(pcmk__request_t *request)
	{
	char *desc = NULL;
	const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);

	if (is_privileged(request->ipc_client, op)) {
	fenced_unregister_level(request->xml, &desc, &request->result);
	} else {
	unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
	pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
	PCMK_EXEC_INVALID,
	"Unprivileged users must delete level via CIB");
	}
	fenced_send_config_notification(op, &request->result, desc);
	free(desc);
	return fenced_construct_reply(request->xml, NULL, &request->result);
	}

	// CRM_OP_RM_NODE_CACHE
	static xmlNode *
	handle_cache_request(pcmk__request_t *request)
	{
	int node_id = 0;
	const char *name = NULL;

	crm_element_value_int(request->xml, PCMK_XA_ID, &node_id);
	name = crm_element_value(request->xml, PCMK_XA_UNAME);
	pcmk__cluster_forget_cluster_node(node_id, name);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	}

	static xmlNode *
	handle_unknown_request(pcmk__request_t *request)
	{
	crm_err("Unknown IPC request %s from %s %s",
	request->op, pcmk__request_origin_type(request),
	pcmk__request_origin(request));
	pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
	"Unknown IPC request type '%s' (bug?)", request->op);
	return fenced_construct_reply(request->xml, NULL, &request->result);
	}

	static void
	fenced_register_handlers(void)
	{
	pcmk__server_command_t handlers[] = {
	{ CRM_OP_REGISTER, handle_register_request },
	{ STONITH_OP_EXEC, handle_agent_request },
	{ STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request },
	{ STONITH_OP_QUERY, handle_query_request },
	{ STONITH_OP_NOTIFY, handle_notify_request },
	{ STONITH_OP_RELAY, handle_relay_request },
	{ STONITH_OP_FENCE, handle_fence_request },
	{ STONITH_OP_FENCE_HISTORY, handle_history_request },
	{ STONITH_OP_DEVICE_ADD, handle_device_add_request },
	{ STONITH_OP_DEVICE_DEL, handle_device_delete_request },
	{ STONITH_OP_LEVEL_ADD, handle_level_add_request },
	{ STONITH_OP_LEVEL_DEL, handle_level_delete_request },
	{ CRM_OP_RM_NODE_CACHE, handle_cache_request },
	{ NULL, handle_unknown_request },
	};

	fenced_handlers = pcmk__register_handlers(handlers);
	}

	void
	fenced_unregister_handlers(void)
	{
	if (fenced_handlers != NULL) {
	g_hash_table_destroy(fenced_handlers);
	fenced_handlers = NULL;
	}
	}

	static void
	handle_request(pcmk__request_t *request)
	{
	xmlNode *reply = NULL;
	const char *reason = NULL;

	if (fenced_handlers == NULL) {
	fenced_register_handlers();
	}
	reply = pcmk__process_request(request, fenced_handlers);
	if (reply != NULL) {
	if (pcmk_is_set(request->flags, pcmk__request_reuse_options)
	&& (request->ipc_client != NULL)) {
	/* Certain IPC-only commands must reuse the call options from the
	* original request rather than the ones set by stonith_send_reply()
	* -> do_local_reply().
	*/
	pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
	request->ipc_flags);
	request->ipc_client->request_id = 0;
	} else {
	stonith_send_reply(reply, request->call_options,
	request->peer, request->ipc_client);
	}
	free_xml(reply);
	}

	reason = request->result.exit_reason;
	crm_debug("Processed %s request from %s %s: %s%s%s%s",
	request->op, pcmk__request_origin_type(request),
	pcmk__request_origin(request),
	pcmk_exec_status_str(request->result.execution_status),
	(reason == NULL)? "" : " (",
	(reason == NULL)? "" : reason,
	(reason == NULL)? "" : ")");
	}

	static void
	handle_reply(pcmk__client_t client, xmlNode request, const char *remote_peer)
	{
	// Copy, because request might be freed before we want to log this
	char *op = crm_element_value_copy(request, PCMK__XA_ST_OP);

	if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
	process_remote_stonith_query(request);

	} else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE,
	NULL)) {
	fenced_process_fencing_reply(request);

	} else {
	crm_err("Ignoring unknown %s reply from %s %s",
	pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"),
	((client == NULL)? remote_peer : pcmk__client_name(client)));
	crm_log_xml_warn(request, "UnknownOp");
	free(op);
	return;
	}
	crm_debug("Processed %s reply from %s %s",
	op, ((client == NULL)? "peer" : "client"),
	((client == NULL)? remote_peer : pcmk__client_name(client)));
	free(op);
	}

	/*!
	* \internal
	* \brief Handle a message from an IPC client or CPG peer
	*
	* \param[in,out] client If not NULL, IPC client that sent message
	* \param[in] id If from IPC client, IPC message ID
	* \param[in] flags Message flags
	* \param[in,out] message Message XML
	* \param[in] remote_peer If not NULL, CPG peer that sent message
	*/
	void
	stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
	xmlNode message, const char remote_peer)
	{
	uint32_t call_options = st_opt_none;
	int rc = pcmk_rc_ok;
	bool is_reply = false;

	CRM_CHECK(message != NULL, return);

	if (get_xpath_object("//" PCMK__XE_ST_REPLY, message, LOG_NEVER) != NULL) {
	is_reply = true;
	}

	rc = pcmk__xe_get_flags(message, PCMK__XA_ST_CALLOPT, &call_options,
	st_opt_none);
	if (rc != pcmk_rc_ok) {
	crm_warn("Couldn't parse options from message: %s", pcmk_rc_str(rc));
	}

	crm_debug("Processing %ssynchronous %s %s %u from %s %s",
	pcmk_is_set(call_options, st_opt_sync_call)? "" : "a",
	crm_element_value(message, PCMK__XA_ST_OP),
	(is_reply? "reply" : "request"), id,
	((client == NULL)? "peer" : "client"),
	((client == NULL)? remote_peer : pcmk__client_name(client)));

	if (pcmk_is_set(call_options, st_opt_sync_call)) {
	pcmk__assert((client == NULL) \|\| (client->request_id == id));
	}

	if (is_reply) {
	handle_reply(client, message, remote_peer);
	} else {
	pcmk__request_t request = {
	.ipc_client = client,
	.ipc_id = id,
	.ipc_flags = flags,
	.peer = remote_peer,
	.xml = message,
	.call_options = call_options,
	.result = PCMK__UNKNOWN_RESULT,
	};

	request.op = crm_element_value_copy(request.xml, PCMK__XA_ST_OP);
	CRM_CHECK(request.op != NULL, return);

	if (pcmk_is_set(request.call_options, st_opt_sync_call)) {
	pcmk__set_request_flags(&request, pcmk__request_sync);
	}

	handle_request(&request);
	pcmk__reset_request(&request);
	}
	}
	diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
	index 5fcdb1ff14..6d55e9c71f 100644
	--- a/daemons/fenced/fenced_history.c
	+++ b/daemons/fenced/fenced_history.c
	@@ -1,572 +1,572 @@
	/*
	* Copyright 2009-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <stdio.h>
	#include <unistd.h>
	#include <stdlib.h>

	#include <crm/crm.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipc_internal.h>
	#include <crm/cluster/internal.h>

	#include <crm/stonith-ng.h>
	#include <crm/fencing/internal.h>
	#include <crm/common/xml.h>
	#include <crm/common/xml_internal.h>

	#include <pacemaker-fenced.h>

	#define MAX_STONITH_HISTORY 500

	/*!
	* \internal
	* \brief Send a broadcast to all nodes to trigger cleanup or
	* history synchronisation
	*
	* \param[in] history Optional history to be attached
	* \param[in] callopts We control cleanup via a flag in the callopts
	* \param[in] target Cleanup can be limited to certain fence-targets
	*/
	static void
	stonith_send_broadcast_history(xmlNode *history,
	int callopts,
	const char *target)
	{
	xmlNode *bcast = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND);
	xmlNode *wrapper = pcmk__xe_create(bcast, PCMK__XE_ST_CALLDATA);
	xmlNode *call_data = pcmk__xe_create(wrapper, __func__);

	crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
	crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
	crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_FENCE_HISTORY);
	crm_xml_add_int(bcast, PCMK__XA_ST_CALLOPT, callopts);

	pcmk__xml_copy(call_data, history);
	if (target != NULL) {
	crm_xml_add(call_data, PCMK__XA_ST_TARGET, target);
	}

	pcmk__cluster_send_message(NULL, crm_msg_stonith_ng, bcast);

	free_xml(bcast);
	}

	static gboolean
	stonith_remove_history_entry (gpointer key,
	gpointer value,
	gpointer user_data)
	{
	remote_fencing_op_t *op = value;
	const char target = (const char ) user_data;

	if ((op->state == st_failed) \|\| (op->state == st_done)) {
	if ((target) && (strcmp(op->target, target) != 0)) {
	return FALSE;
	}
	return TRUE;
	}

	return FALSE; /* don't clean pending operations */
	}

	/*!
	* \internal
	* \brief Send out a cleanup broadcast or do a local history-cleanup
	*
	* \param[in] target Cleanup can be limited to certain fence-targets
	* \param[in] broadcast Send out a cleanup broadcast
	*/
	static void
	stonith_fence_history_cleanup(const char *target,
	gboolean broadcast)
	{
	if (broadcast) {
	stonith_send_broadcast_history(NULL,
	st_opt_cleanup \| st_opt_discard_reply,
	target);
	/* we'll do the local clean when we receive back our own broadcast */
	} else if (stonith_remote_op_list) {
	g_hash_table_foreach_remove(stonith_remote_op_list,
	stonith_remove_history_entry,
	(gpointer) target);
	fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
	}
	}

	/* keeping the length of fence-history within bounds
	* =================================================
	*
	* If things are really running wild a lot of fencing-attempts
	* might fill up the hash-map, eventually using up a lot
	* of memory and creating huge history-sync messages.
	* Before the history being synced across nodes at least
	* the reboot of a cluster-node helped keeping the
	* history within bounds even though not in a reliable
	* manner.
	*
	* stonith_remote_op_list isn't sorted for time-stamps
	* thus it would be kind of expensive to delete e.g.
	* the oldest entry if it would grow past MAX_STONITH_HISTORY
	* entries.
	* It is more efficient to purge MAX_STONITH_HISTORY/2
	* entries whenever the list grows beyond MAX_STONITH_HISTORY.
	* (sort for age + purge the MAX_STONITH_HISTORY/2 oldest)
	* That done on a per-node-base might raise the
	* probability of large syncs to occur.
	* Things like introducing a broadcast to purge
	* MAX_STONITH_HISTORY/2 entries or not sync above a certain
	* threshold coming to mind ...
	* Simplest thing though is to purge the full history
	* throughout the cluster once MAX_STONITH_HISTORY is reached.
	* On the other hand this leads to purging the history in
	* situations where it would be handy to have it probably.
	*/

	/*!
	* \internal
	* \brief Compare two remote fencing operations by status and completion time
	*
	* A pending operation is ordered before a completed operation. If both
	* operations have completed, then the more recently completed operation is
	* ordered first. Two pending operations are considered equal.
	*
	* \param[in] a First \c remote_fencing_op_t to compare
	* \param[in] b Second \c remote_fencing_op_t to compare
	*
	* \return Standard comparison result (a negative integer if \p a is lesser,
	* 0 if the values are equal, and a positive integer if \p a is greater)
	*/
	static gint
	cmp_op_by_completion(gconstpointer a, gconstpointer b)
	{
	const remote_fencing_op_t *op1 = a;
	const remote_fencing_op_t *op2 = b;
	bool op1_pending = stonith__op_state_pending(op1->state);
	bool op2_pending = stonith__op_state_pending(op2->state);

	if (op1_pending && op2_pending) {
	return 0;
	}
	if (op1_pending) {
	return -1;
	}
	if (op2_pending) {
	return 1;
	}
	if (op1->completed > op2->completed) {
	return -1;
	}
	if (op1->completed < op2->completed) {
	return 1;
	}
	if (op1->completed_nsec > op2->completed_nsec) {
	return -1;
	}
	if (op1->completed_nsec < op2->completed_nsec) {
	return 1;
	}
	return 0;
	}

	/*!
	* \internal
	* \brief Remove a completed operation from \c stonith_remote_op_list
	*
	* \param[in] data \c remote_fencing_op_t to remove
	* \param[in] user_data Ignored
	*/
	static void
	remove_completed_remote_op(gpointer data, gpointer user_data)
	{
	const remote_fencing_op_t *op = data;

	if (!stonith__op_state_pending(op->state)) {
	g_hash_table_remove(stonith_remote_op_list, op->id);
	}
	}

	/*!
	* \internal
	* \brief Do a local history-trim to MAX_STONITH_HISTORY / 2 entries
	* once over MAX_STONITH_HISTORY
	*/
	void
	stonith_fence_history_trim(void)
	{
	if (stonith_remote_op_list == NULL) {
	return;
	}

	if (g_hash_table_size(stonith_remote_op_list) > MAX_STONITH_HISTORY) {
	GList *ops = g_hash_table_get_values(stonith_remote_op_list);

	crm_trace("More than %d entries in fencing history, purging oldest "
	"completed operations", MAX_STONITH_HISTORY);

	ops = g_list_sort(ops, cmp_op_by_completion);

	// Always keep pending ops regardless of number of entries
	g_list_foreach(g_list_nth(ops, MAX_STONITH_HISTORY / 2),
	remove_completed_remote_op, NULL);

	// No need for a notification after purging old data
	g_list_free(ops);
	}
	}

	/*!
	* \internal
	* \brief Convert xml fence-history to a hash-table like stonith_remote_op_list
	*
	* \param[in] history Fence-history in xml
	*
	* \return Fence-history as hash-table
	*/
	static GHashTable *
	stonith_xml_history_to_list(const xmlNode *history)
	{
	xmlNode *xml_op = NULL;
	GHashTable *rv = NULL;

	init_stonith_remote_op_hash_table(&rv);

	CRM_LOG_ASSERT(rv != NULL);

	for (xml_op = pcmk__xe_first_child(history, NULL, NULL, NULL);
	xml_op != NULL; xml_op = pcmk__xe_next(xml_op)) {

	remote_fencing_op_t *op = NULL;
	char *id = crm_element_value_copy(xml_op, PCMK__XA_ST_REMOTE_OP);
	int state;
	int exit_status = CRM_EX_OK;
	int execution_status = PCMK_EXEC_DONE;
	long long completed;
	long long completed_nsec = 0L;

	if (!id) {
	crm_warn("Malformed fencing history received from peer");
	continue;
	}

	crm_trace("Attaching op %s to hashtable", id);

	op = pcmk__assert_alloc(1, sizeof(remote_fencing_op_t));

	op->id = id;
	op->target = crm_element_value_copy(xml_op, PCMK__XA_ST_TARGET);
	op->action = crm_element_value_copy(xml_op, PCMK__XA_ST_DEVICE_ACTION);
	op->originator = crm_element_value_copy(xml_op, PCMK__XA_ST_ORIGIN);
	op->delegate = crm_element_value_copy(xml_op, PCMK__XA_ST_DELEGATE);
	op->client_name = crm_element_value_copy(xml_op,
	PCMK__XA_ST_CLIENTNAME);
	crm_element_value_ll(xml_op, PCMK__XA_ST_DATE, &completed);
	op->completed = (time_t) completed;
	crm_element_value_ll(xml_op, PCMK__XA_ST_DATE_NSEC, &completed_nsec);
	op->completed_nsec = completed_nsec;
	crm_element_value_int(xml_op, PCMK__XA_ST_STATE, &state);
	op->state = (enum op_state) state;

	/* @COMPAT We can't use stonith__xe_get_result() here because
	* fencers <2.1.3 didn't include results, leading it to assume an error
	* status. Instead, set an unknown status in that case.
	*/
	if ((crm_element_value_int(xml_op, PCMK__XA_RC_CODE, &exit_status) < 0)
	\|\| (crm_element_value_int(xml_op, PCMK__XA_OP_STATUS,
	&execution_status) < 0)) {
	exit_status = CRM_EX_INDETERMINATE;
	execution_status = PCMK_EXEC_UNKNOWN;
	}
	pcmk__set_result(&op->result, exit_status, execution_status,
	crm_element_value(xml_op, PCMK_XA_EXIT_REASON));
	pcmk__set_result_output(&op->result,
	crm_element_value_copy(xml_op,
	PCMK__XA_ST_OUTPUT),
	NULL);


	g_hash_table_replace(rv, id, op);
	CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL);
	}

	return rv;
	}

	/*!
	* \internal
	* \brief Craft xml difference between local fence-history and a history
	* coming from remote, and merge the remote history into the local
	*
	* \param[in,out] remote_history Fence-history as hash-table (may be NULL)
	* \param[in] add_id If crafting the answer for an API
	* history-request there is no need for the id
	* \param[in] target Optionally limit to certain fence-target
	*
	* \return The fence-history as xml
	*/
	static xmlNode *
	stonith_local_history_diff_and_merge(GHashTable *remote_history,
	gboolean add_id, const char *target)
	{
	xmlNode *history = NULL;
	GHashTableIter iter;
	remote_fencing_op_t *op = NULL;
	gboolean updated = FALSE;
	int cnt = 0;

	if (stonith_remote_op_list) {
	char *id = NULL;

	history = pcmk__xe_create(NULL, PCMK__XE_ST_HISTORY);

	g_hash_table_iter_init(&iter, stonith_remote_op_list);
	while (g_hash_table_iter_next(&iter, (void )&id, (void )&op)) {
	xmlNode *entry = NULL;

	if (remote_history) {
	remote_fencing_op_t *remote_op =
	g_hash_table_lookup(remote_history, op->id);

	if (remote_op) {
	if (stonith__op_state_pending(op->state)
	&& !stonith__op_state_pending(remote_op->state)) {

	crm_debug("Updating outdated pending operation %.8s "
	"(state=%s) according to the one (state=%s) from "
	"remote peer history",
	op->id, stonith_op_state_str(op->state),
	stonith_op_state_str(remote_op->state));

	g_hash_table_steal(remote_history, op->id);
	op->id = remote_op->id;
	remote_op->id = id;
	g_hash_table_iter_replace(&iter, remote_op);

	updated = TRUE;
	continue; /* skip outdated entries */

	} else if (!stonith__op_state_pending(op->state)
	&& stonith__op_state_pending(remote_op->state)) {

	crm_debug("Broadcasting operation %.8s (state=%s) to "
	"update the outdated pending one "
	"(state=%s) in remote peer history",
	op->id, stonith_op_state_str(op->state),
	stonith_op_state_str(remote_op->state));

	g_hash_table_remove(remote_history, op->id);

	} else {
	g_hash_table_remove(remote_history, op->id);
	continue; /* skip entries broadcasted already */
	}
	}
	}

	if (!pcmk__str_eq(target, op->target, pcmk__str_null_matches)) {
	continue;
	}

	cnt++;
	crm_trace("Attaching op %s", op->id);
	entry = pcmk__xe_create(history, STONITH_OP_EXEC);
	if (add_id) {
	crm_xml_add(entry, PCMK__XA_ST_REMOTE_OP, op->id);
	}
	crm_xml_add(entry, PCMK__XA_ST_TARGET, op->target);
	crm_xml_add(entry, PCMK__XA_ST_DEVICE_ACTION, op->action);
	crm_xml_add(entry, PCMK__XA_ST_ORIGIN, op->originator);
	crm_xml_add(entry, PCMK__XA_ST_DELEGATE, op->delegate);
	crm_xml_add(entry, PCMK__XA_ST_CLIENTNAME, op->client_name);
	crm_xml_add_ll(entry, PCMK__XA_ST_DATE, op->completed);
	crm_xml_add_ll(entry, PCMK__XA_ST_DATE_NSEC,
	op->completed_nsec);
	crm_xml_add_int(entry, PCMK__XA_ST_STATE, op->state);
	stonith__xe_set_result(entry, &op->result);
	}
	}

	if (remote_history) {
	init_stonith_remote_op_hash_table(&stonith_remote_op_list);

	updated \|= g_hash_table_size(remote_history);

	g_hash_table_iter_init(&iter, remote_history);
	while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
	if (stonith__op_state_pending(op->state) &&
	pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {

	crm_warn("Failing pending operation %.8s originated by us but "
	"known only from peer history", op->id);
	op->state = st_failed;
	set_fencing_completed(op);

	/* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op()
	* from setting a delegate
	*/
	pcmk__set_result(&op->result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
	"Initiated by earlier fencer "
	"process and presumed failed");
	fenced_broadcast_op_result(op, false);
	}

	g_hash_table_iter_steal(&iter);
	g_hash_table_replace(stonith_remote_op_list, op->id, op);
	/* we could trim the history here but if we bail
	* out after trim we might miss more recent entries
	* of those that might still be in the list
	* if we don't bail out trimming once is more
	* efficient and memory overhead is minimal as
	* we are just moving pointers from one hash to
	* another
	*/
	}

	g_hash_table_destroy(remote_history); /* remove what is left */
	}

	if (updated) {
	stonith_fence_history_trim();
	fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
	}

	if (cnt == 0) {
	free_xml(history);
	return NULL;
	} else {
	return history;
	}
	}

	/*!
	* \internal
	* \brief Craft xml from the local fence-history
	*
	* \param[in] add_id If crafting the answer for an API
	* history-request there is no need for the id
	* \param[in] target Optionally limit to certain fence-target
	*
	* \return The fence-history as xml
	*/
	static xmlNode *
	stonith_local_history(gboolean add_id, const char *target)
	{
	return stonith_local_history_diff_and_merge(NULL, add_id, target);
	}

	/*!
	* \internal
	* \brief Handle fence-history messages (from API or coming in as broadcasts)
	*
	* \param[in,out] msg Request XML
	* \param[out] output Where to set local history, if requested
	* \param[in] remote_peer If broadcast, peer that sent it
	* \param[in] options Call options from the request
	*/
	void
	stonith_fence_history(xmlNode msg, xmlNode *output,
	const char *remote_peer, int options)
	{
	const char *target = NULL;
	xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_NEVER);
	xmlNode *out_history = NULL;

	if (dev) {
	target = crm_element_value(dev, PCMK__XA_ST_TARGET);
	if (target && (options & st_opt_cs_nodeid)) {
	int nodeid;
	crm_node_t *node;

	pcmk__scan_min_int(target, &nodeid, 0);
	- node = pcmk__search_node_caches(nodeid, NULL,
	+ node = pcmk__search_node_caches(nodeid, NULL, NULL,
	pcmk__node_search_any
	\|pcmk__node_search_cluster_cib);
	if (node) {
	target = node->uname;
	}
	}
	}

	if (options & st_opt_cleanup) {
	const char *call_id = crm_element_value(msg, PCMK__XA_ST_CALLID);

	crm_trace("Cleaning up operations on %s in %p", target,
	stonith_remote_op_list);
	stonith_fence_history_cleanup(target, (call_id != NULL));

	} else if (options & st_opt_broadcast) {
	/* there is no clear sign atm for when a history sync
	is done so send a notification for anything
	that smells like history-sync
	*/
	fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, NULL,
	NULL);
	if (crm_element_value(msg, PCMK__XA_ST_CALLID) != NULL) {
	/* this is coming from the stonith-API
	*
	* craft a broadcast with node's history
	* so that every node can merge and broadcast
	* what it has on top
	*/
	out_history = stonith_local_history(TRUE, NULL);
	crm_trace("Broadcasting history to peers");
	stonith_send_broadcast_history(out_history,
	st_opt_broadcast \| st_opt_discard_reply,
	NULL);
	} else if (remote_peer &&
	!pcmk__str_eq(remote_peer, stonith_our_uname, pcmk__str_casei)) {
	xmlNode *history = get_xpath_object("//" PCMK__XE_ST_HISTORY, msg,
	LOG_NEVER);

	/* either a broadcast created directly upon stonith-API request
	* or a diff as response to such a thing
	*
	* in both cases it may have a history or not
	* if we have differential data
	* merge in what we've received and stop
	* otherwise broadcast what we have on top
	* marking as differential and merge in afterwards
	*/
	if (!history
	\|\| !pcmk__xe_attr_is_true(history, PCMK__XA_ST_DIFFERENTIAL)) {

	GHashTable *received_history = NULL;

	if (history != NULL) {
	received_history = stonith_xml_history_to_list(history);
	}
	out_history =
	stonith_local_history_diff_and_merge(received_history, TRUE, NULL);
	if (out_history) {
	crm_trace("Broadcasting history-diff to peers");
	pcmk__xe_set_bool_attr(out_history,
	PCMK__XA_ST_DIFFERENTIAL, true);
	stonith_send_broadcast_history(out_history,
	st_opt_broadcast \| st_opt_discard_reply,
	NULL);
	} else {
	crm_trace("History-diff is empty - skip broadcast");
	}
	}
	} else {
	crm_trace("Skipping history-query-broadcast (%s%s)"
	" we sent ourselves",
	remote_peer?"remote-peer=":"local-ipc",
	remote_peer?remote_peer:"");
	}
	} else {
	/* plain history request */
	crm_trace("Looking for operations on %s in %p", target,
	stonith_remote_op_list);
	*output = stonith_local_history(FALSE, target);
	}
	free_xml(out_history);
	}
	diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
	index 2935c307f5..d04ae85cf9 100644
	--- a/daemons/fenced/fenced_remote.c
	+++ b/daemons/fenced/fenced_remote.c
	@@ -1,2608 +1,2608 @@
	/*
	* Copyright 2009-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <stdio.h>
	#include <sys/types.h>
	#include <sys/wait.h>
	#include <sys/stat.h>
	#include <unistd.h>
	#include <sys/utsname.h>

	#include <stdlib.h>
	#include <errno.h>
	#include <fcntl.h>
	#include <ctype.h>
	#include <regex.h>

	#include <crm/crm.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipc_internal.h>
	#include <crm/cluster/internal.h>

	#include <crm/stonith-ng.h>
	#include <crm/fencing/internal.h>
	#include <crm/common/xml.h>
	#include <crm/common/xml_internal.h>

	#include <crm/common/util.h>
	#include <pacemaker-fenced.h>

	#define TIMEOUT_MULTIPLY_FACTOR 1.2

	/* When one fencer queries its peers for devices able to handle a fencing
	* request, each peer will reply with a list of such devices available to it.
	* Each reply will be parsed into a peer_device_info_t, with each device's
	* information kept in a device_properties_t.
	*/

	typedef struct device_properties_s {
	/* Whether access to this device has been verified */
	gboolean verified;

	/* The remaining members are indexed by the operation's "phase" */

	/* Whether this device has been executed in each phase */
	gboolean executed[st_phase_max];
	/* Whether this device is disallowed from executing in each phase */
	gboolean disallowed[st_phase_max];
	/* Action-specific timeout for each phase */
	int custom_action_timeout[st_phase_max];
	/* Action-specific maximum random delay for each phase */
	int delay_max[st_phase_max];
	/* Action-specific base delay for each phase */
	int delay_base[st_phase_max];
	/* Group of enum st_device_flags */
	uint32_t device_support_flags;
	} device_properties_t;

	typedef struct {
	/* Name of peer that sent this result */
	char *host;
	/* Only try peers for non-topology based operations once */
	gboolean tried;
	/* Number of entries in the devices table */
	int ndevices;
	/* Devices available to this host that are capable of fencing the target */
	GHashTable *devices;
	} peer_device_info_t;

	GHashTable *stonith_remote_op_list = NULL;

	extern xmlNode stonith_create_op(int call_id, const char token, const char op, xmlNode data,
	int call_options);

	static void request_peer_fencing(remote_fencing_op_t *op,
	peer_device_info_t *peer);
	static void finalize_op(remote_fencing_op_t op, xmlNode data, bool dup);
	static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
	static int get_op_total_timeout(const remote_fencing_op_t *op,
	const peer_device_info_t *chosen_peer);

	static gint
	sort_strings(gconstpointer a, gconstpointer b)
	{
	return strcmp(a, b);
	}

	static void
	free_remote_query(gpointer data)
	{
	if (data != NULL) {
	peer_device_info_t *peer = data;

	g_hash_table_destroy(peer->devices);
	free(peer->host);
	free(peer);
	}
	}

	void
	free_stonith_remote_op_list(void)
	{
	if (stonith_remote_op_list != NULL) {
	g_hash_table_destroy(stonith_remote_op_list);
	stonith_remote_op_list = NULL;
	}
	}

	struct peer_count_data {
	const remote_fencing_op_t *op;
	gboolean verified_only;
	uint32_t support_action_only;
	int count;
	};

	/*!
	* \internal
	* \brief Increment a counter if a device has not been executed yet
	*
	* \param[in] key Device ID (ignored)
	* \param[in] value Device properties
	* \param[in,out] user_data Peer count data
	*/
	static void
	count_peer_device(gpointer key, gpointer value, gpointer user_data)
	{
	device_properties_t props = (device_properties_t)value;
	struct peer_count_data *data = user_data;

	if (!props->executed[data->op->phase]
	&& (!data->verified_only \|\| props->verified)
	&& ((data->support_action_only == st_device_supports_none) \|\| pcmk_is_set(props->device_support_flags, data->support_action_only))) {
	++(data->count);
	}
	}

	/*!
	* \internal
	* \brief Check the number of available devices in a peer's query results
	*
	* \param[in] op Operation that results are for
	* \param[in] peer Peer to count
	* \param[in] verified_only Whether to count only verified devices
	* \param[in] support_action_only Whether to count only devices that support action
	*
	* \return Number of devices available to peer that were not already executed
	*/
	static int
	count_peer_devices(const remote_fencing_op_t *op,
	const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only)
	{
	struct peer_count_data data;

	data.op = op;
	data.verified_only = verified_only;
	data.support_action_only = support_on_action_only;
	data.count = 0;
	if (peer) {
	g_hash_table_foreach(peer->devices, count_peer_device, &data);
	}
	return data.count;
	}

	/*!
	* \internal
	* \brief Search for a device in a query result
	*
	* \param[in] op Operation that result is for
	* \param[in] peer Query result for a peer
	* \param[in] device Device ID to search for
	*
	* \return Device properties if found, NULL otherwise
	*/
	static device_properties_t *
	find_peer_device(const remote_fencing_op_t op, const peer_device_info_t peer,
	const char *device, uint32_t support_action_only)
	{
	device_properties_t *props = g_hash_table_lookup(peer->devices, device);

	if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) {
	return NULL;
	}
	return (props && !props->executed[op->phase]
	&& !props->disallowed[op->phase])? props : NULL;
	}

	/*!
	* \internal
	* \brief Find a device in a peer's device list and mark it as executed
	*
	* \param[in] op Operation that peer result is for
	* \param[in,out] peer Peer with results to search
	* \param[in] device ID of device to mark as done
	* \param[in] verified_devices_only Only consider verified devices
	*
	* \return TRUE if device was found and marked, FALSE otherwise
	*/
	static gboolean
	grab_peer_device(const remote_fencing_op_t op, peer_device_info_t peer,
	const char *device, gboolean verified_devices_only)
	{
	device_properties_t *props = find_peer_device(op, peer, device,
	fenced_support_flag(op->action));

	if ((props == NULL) \|\| (verified_devices_only && !props->verified)) {
	return FALSE;
	}

	crm_trace("Removing %s from %s (%d remaining)",
	device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none));
	props->executed[op->phase] = TRUE;
	return TRUE;
	}

	static void
	clear_remote_op_timers(remote_fencing_op_t * op)
	{
	if (op->query_timer) {
	g_source_remove(op->query_timer);
	op->query_timer = 0;
	}
	if (op->op_timer_total) {
	g_source_remove(op->op_timer_total);
	op->op_timer_total = 0;
	}
	if (op->op_timer_one) {
	g_source_remove(op->op_timer_one);
	op->op_timer_one = 0;
	}
	}

	static void
	free_remote_op(gpointer data)
	{
	remote_fencing_op_t *op = data;

	crm_log_xml_debug(op->request, "Destroying");

	clear_remote_op_timers(op);

	free(op->id);
	free(op->action);
	free(op->delegate);
	free(op->target);
	free(op->client_id);
	free(op->client_name);
	free(op->originator);

	if (op->query_results) {
	g_list_free_full(op->query_results, free_remote_query);
	}
	if (op->request) {
	free_xml(op->request);
	op->request = NULL;
	}
	if (op->devices_list) {
	g_list_free_full(op->devices_list, free);
	op->devices_list = NULL;
	}
	g_list_free_full(op->automatic_list, free);
	g_list_free(op->duplicates);

	pcmk__reset_result(&op->result);
	free(op);
	}

	void
	init_stonith_remote_op_hash_table(GHashTable **table)
	{
	if (*table == NULL) {
	*table = pcmk__strkey_table(NULL, free_remote_op);
	}
	}

	/*!
	* \internal
	* \brief Return an operation's originally requested action (before any remap)
	*
	* \param[in] op Operation to check
	*
	* \return Operation's original action
	*/
	static const char *
	op_requested_action(const remote_fencing_op_t *op)
	{
	return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action);
	}

	/*!
	* \internal
	* \brief Remap a "reboot" operation to the "off" phase
	*
	* \param[in,out] op Operation to remap
	*/
	static void
	op_phase_off(remote_fencing_op_t *op)
	{
	crm_info("Remapping multiple-device reboot targeting %s to 'off' "
	CRM_XS " id=%.8s", op->target, op->id);
	op->phase = st_phase_off;

	/* Happily, "off" and "on" are shorter than "reboot", so we can reuse the
	* memory allocation at each phase.
	*/
	strcpy(op->action, PCMK_ACTION_OFF);
	}

	/*!
	* \internal
	* \brief Advance a remapped reboot operation to the "on" phase
	*
	* \param[in,out] op Operation to remap
	*/
	static void
	op_phase_on(remote_fencing_op_t *op)
	{
	GList *iter = NULL;

	crm_info("Remapped 'off' targeting %s complete, "
	"remapping to 'on' for %s " CRM_XS " id=%.8s",
	op->target, op->client_name, op->id);
	op->phase = st_phase_on;
	strcpy(op->action, PCMK_ACTION_ON);

	/* Skip devices with automatic unfencing, because the cluster will handle it
	* when the node rejoins.
	*/
	for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
	GList *match = g_list_find_custom(op->devices_list, iter->data,
	sort_strings);

	if (match) {
	op->devices_list = g_list_remove(op->devices_list, match->data);
	}
	}
	g_list_free_full(op->automatic_list, free);
	op->automatic_list = NULL;

	/* Rewind device list pointer */
	op->devices = op->devices_list;
	}

	/*!
	* \internal
	* \brief Reset a remapped reboot operation
	*
	* \param[in,out] op Operation to reset
	*/
	static void
	undo_op_remap(remote_fencing_op_t *op)
	{
	if (op->phase > 0) {
	crm_info("Undoing remap of reboot targeting %s for %s "
	CRM_XS " id=%.8s", op->target, op->client_name, op->id);
	op->phase = st_phase_requested;
	strcpy(op->action, PCMK_ACTION_REBOOT);
	}
	}

	/*!
	* \internal
	* \brief Create notification data XML for a fencing operation result
	*
	* \param[in,out] parent Parent XML element for newly created element
	* \param[in] op Fencer operation that completed
	*
	* \return Newly created XML to add as notification data
	* \note The caller is responsible for freeing the result.
	*/
	static xmlNode *
	fencing_result2xml(xmlNode parent, const remote_fencing_op_t op)
	{
	xmlNode *notify_data = pcmk__xe_create(parent, PCMK__XE_ST_NOTIFY_FENCE);

	crm_xml_add_int(notify_data, PCMK_XA_STATE, op->state);
	crm_xml_add(notify_data, PCMK__XA_ST_TARGET, op->target);
	crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ACTION, op->action);
	crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, op->delegate);
	crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, op->id);
	crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, op->originator);
	crm_xml_add(notify_data, PCMK__XA_ST_CLIENTID, op->client_id);
	crm_xml_add(notify_data, PCMK__XA_ST_CLIENTNAME, op->client_name);

	return notify_data;
	}

	/*!
	* \internal
	* \brief Broadcast a fence result notification to all CPG peers
	*
	* \param[in] op Fencer operation that completed
	* \param[in] op_merged Whether this operation is a duplicate of another
	*/
	void
	fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged)
	{
	static int count = 0;
	xmlNode *bcast = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
	xmlNode *wrapper = NULL;
	xmlNode *notify_data = NULL;

	count++;
	crm_trace("Broadcasting result to peers");
	crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY);
	crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
	crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
	crm_xml_add_int(bcast, PCMK_XA_COUNT, count);

	if (op_merged) {
	pcmk__xe_set_bool_attr(bcast, PCMK__XA_ST_OP_MERGED, true);
	}

	wrapper = pcmk__xe_create(bcast, PCMK__XE_ST_CALLDATA);
	notify_data = fencing_result2xml(wrapper, op);
	stonith__xe_set_result(notify_data, &op->result);

	pcmk__cluster_send_message(NULL, crm_msg_stonith_ng, bcast);
	free_xml(bcast);

	return;
	}

	/*!
	* \internal
	* \brief Reply to a local request originator and notify all subscribed clients
	*
	* \param[in,out] op Fencer operation that completed
	* \param[in,out] data Top-level XML to add notification to
	*/
	static void
	handle_local_reply_and_notify(remote_fencing_op_t op, xmlNode data)
	{
	xmlNode *notify_data = NULL;
	xmlNode *reply = NULL;
	pcmk__client_t *client = NULL;

	if (op->notify_sent == TRUE) {
	/* nothing to do */
	return;
	}

	/* Do notification with a clean data object */
	crm_xml_add_int(data, PCMK_XA_STATE, op->state);
	crm_xml_add(data, PCMK__XA_ST_TARGET, op->target);
	crm_xml_add(data, PCMK__XA_ST_OP, op->action);

	reply = fenced_construct_reply(op->request, data, &op->result);
	crm_xml_add(reply, PCMK__XA_ST_DELEGATE, op->delegate);

	/* Send fencing OP reply to local client that initiated fencing */
	client = pcmk__find_client_by_id(op->client_id);
	if (client == NULL) {
	crm_trace("Skipping reply to %s: no longer a client", op->client_id);
	} else {
	do_local_reply(reply, client, op->call_options);
	}

	/* bcast to all local clients that the fencing operation happend */
	notify_data = fencing_result2xml(NULL, op);
	fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, &op->result,
	notify_data);
	free_xml(notify_data);
	fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);

	/* mark this op as having notify's already sent */
	op->notify_sent = TRUE;
	free_xml(reply);
	}

	/*!
	* \internal
	* \brief Finalize all duplicates of a given fencer operation
	*
	* \param[in,out] op Fencer operation that completed
	* \param[in,out] data Top-level XML to add notification to
	*/
	static void
	finalize_op_duplicates(remote_fencing_op_t op, xmlNode data)
	{
	for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
	remote_fencing_op_t *other = iter->data;

	if (other->state == st_duplicate) {
	other->state = op->state;
	crm_debug("Performing duplicate notification for %s@%s: %s "
	CRM_XS " id=%.8s",
	other->client_name, other->originator,
	pcmk_exec_status_str(op->result.execution_status),
	other->id);
	pcmk__copy_result(&op->result, &other->result);
	finalize_op(other, data, true);

	} else {
	// Possible if (for example) it timed out already
	crm_err("Skipping duplicate notification for %s@%s "
	CRM_XS " state=%s id=%.8s",
	other->client_name, other->originator,
	stonith_op_state_str(other->state), other->id);
	}
	}
	}

	static char *
	delegate_from_xml(xmlNode *xml)
	{
	xmlNode *match = get_xpath_object("//@" PCMK__XA_ST_DELEGATE, xml,
	LOG_NEVER);

	if (match == NULL) {
	return crm_element_value_copy(xml, PCMK__XA_SRC);
	} else {
	return crm_element_value_copy(match, PCMK__XA_ST_DELEGATE);
	}
	}

	/*!
	* \internal
	* \brief Finalize a peer fencing operation
	*
	* Clean up after a fencing operation completes. This function has two code
	* paths: the executioner uses it to broadcast the result to CPG peers, and then
	* each peer (including the executioner) uses it to process that broadcast and
	* notify its IPC clients of the result.
	*
	* \param[in,out] op Fencer operation that completed
	* \param[in,out] data If not NULL, XML reply of last delegated operation
	* \param[in] dup Whether this operation is a duplicate of another
	* (in which case, do not broadcast the result)
	*
	* \note The operation result should be set before calling this function.
	*/
	static void
	finalize_op(remote_fencing_op_t op, xmlNode data, bool dup)
	{
	int level = LOG_ERR;
	const char *subt = NULL;
	xmlNode *local_data = NULL;
	gboolean op_merged = FALSE;

	CRM_CHECK((op != NULL), return);

	// This is a no-op if timers have already been cleared
	clear_remote_op_timers(op);

	if (op->notify_sent) {
	// Most likely, this is a timed-out action that eventually completed
	crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
	"Result arrived too late " CRM_XS " id=%.8s",
	op->action, (op->target? " targeting " : ""),
	(op->target? op->target : ""),
	(op->delegate? op->delegate : "unknown node"),
	op->client_name, op->originator,
	(op_merged? " (merged)" : ""),
	op->id);
	return;
	}

	set_fencing_completed(op);
	undo_op_remap(op);

	if (data == NULL) {
	data = pcmk__xe_create(NULL, "remote-op");
	local_data = data;

	} else if (op->delegate == NULL) {
	switch (op->result.execution_status) {
	case PCMK_EXEC_NO_FENCE_DEVICE:
	break;

	case PCMK_EXEC_INVALID:
	if (op->result.exit_status != CRM_EX_EXPIRED) {
	op->delegate = delegate_from_xml(data);
	}
	break;

	default:
	op->delegate = delegate_from_xml(data);
	break;
	}
	}

	if (dup \|\| (crm_element_value(data, PCMK__XA_ST_OP_MERGED) != NULL)) {
	op_merged = true;
	}

	/* Tell everyone the operation is done, we will continue
	* with doing the local notifications once we receive
	* the broadcast back. */
	subt = crm_element_value(data, PCMK__XA_SUBT);
	if (!dup && !pcmk__str_eq(subt, PCMK__VALUE_BROADCAST, pcmk__str_none)) {
	/* Defer notification until the bcast message arrives */
	fenced_broadcast_op_result(op, op_merged);
	free_xml(local_data);
	return;
	}

	if (pcmk__result_ok(&op->result) \|\| dup
	\|\| !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
	level = LOG_NOTICE;
	}
	do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
	CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
	(op->target? op->target : ""),
	(op->delegate? op->delegate : "unknown node"),
	op->client_name, op->originator,
	(op_merged? " (merged)" : ""),
	crm_exit_str(op->result.exit_status),
	pcmk_exec_status_str(op->result.execution_status),
	((op->result.exit_reason == NULL)? "" : ": "),
	((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
	op->id);

	handle_local_reply_and_notify(op, data);

	if (!dup) {
	finalize_op_duplicates(op, data);
	}

	/* Free non-essential parts of the record
	* Keep the record around so we can query the history
	*/
	if (op->query_results) {
	g_list_free_full(op->query_results, free_remote_query);
	op->query_results = NULL;
	}
	if (op->request) {
	free_xml(op->request);
	op->request = NULL;
	}

	free_xml(local_data);
	}

	/*!
	* \internal
	* \brief Finalize a watchdog fencer op after the waiting time expires
	*
	* \param[in,out] userdata Fencer operation that completed
	*
	* \return G_SOURCE_REMOVE (which tells glib not to restart timer)
	*/
	static gboolean
	remote_op_watchdog_done(gpointer userdata)
	{
	remote_fencing_op_t *op = userdata;

	op->op_timer_one = 0;

	crm_notice("Self-fencing (%s) by %s for %s assumed complete "
	CRM_XS " id=%.8s",
	op->action, op->target, op->client_name, op->id);
	op->state = st_done;
	pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	finalize_op(op, NULL, false);
	return G_SOURCE_REMOVE;
	}

	static gboolean
	remote_op_timeout_one(gpointer userdata)
	{
	remote_fencing_op_t *op = userdata;

	op->op_timer_one = 0;

	crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
	" id=%.8s", op->action, op->target, op->client_name, op->id);
	pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
	"Peer did not return fence result within timeout");

	// The requested delay has been applied for the first device
	if (op->client_delay > 0) {
	op->client_delay = 0;
	crm_trace("Try another device for '%s' action targeting %s "
	"for client %s without delay " CRM_XS " id=%.8s",
	op->action, op->target, op->client_name, op->id);
	}

	// Try another device, if appropriate
	request_peer_fencing(op, NULL);
	return G_SOURCE_REMOVE;
	}

	/*!
	* \internal
	* \brief Finalize a remote fencer operation that timed out
	*
	* \param[in,out] op Fencer operation that timed out
	* \param[in] reason Readable description of what step timed out
	*/
	static void
	finalize_timed_out_op(remote_fencing_op_t op, const char reason)
	{
	crm_debug("Action '%s' targeting %s for client %s timed out "
	CRM_XS " id=%.8s",
	op->action, op->target, op->client_name, op->id);

	if (op->phase == st_phase_on) {
	/* A remapped reboot operation timed out in the "on" phase, but the
	* "off" phase completed successfully, so quit trying any further
	* devices, and return success.
	*/
	op->state = st_done;
	pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	} else {
	op->state = st_failed;
	pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
	}
	finalize_op(op, NULL, false);
	}

	/*!
	* \internal
	* \brief Finalize a remote fencer operation that timed out
	*
	* \param[in,out] userdata Fencer operation that timed out
	*
	* \return G_SOURCE_REMOVE (which tells glib not to restart timer)
	*/
	static gboolean
	remote_op_timeout(gpointer userdata)
	{
	remote_fencing_op_t *op = userdata;

	op->op_timer_total = 0;

	if (op->state == st_done) {
	crm_debug("Action '%s' targeting %s for client %s already completed "
	CRM_XS " id=%.8s",
	op->action, op->target, op->client_name, op->id);
	} else {
	finalize_timed_out_op(userdata, "Fencing did not complete within a "
	"total timeout based on the "
	"configured timeout and retries for "
	"any devices attempted");
	}
	return G_SOURCE_REMOVE;
	}

	static gboolean
	remote_op_query_timeout(gpointer data)
	{
	remote_fencing_op_t *op = data;

	op->query_timer = 0;

	if (op->state == st_done) {
	crm_debug("Operation %.8s targeting %s already completed",
	op->id, op->target);
	} else if (op->state == st_exec) {
	crm_debug("Operation %.8s targeting %s already in progress",
	op->id, op->target);
	} else if (op->query_results) {
	// Query succeeded, so attempt the actual fencing
	crm_debug("Query %.8s targeting %s complete (state=%s)",
	op->id, op->target, stonith_op_state_str(op->state));
	request_peer_fencing(op, NULL);
	} else {
	crm_debug("Query %.8s targeting %s timed out (state=%s)",
	op->id, op->target, stonith_op_state_str(op->state));
	finalize_timed_out_op(op, "No capable peers replied to device query "
	"within timeout");
	}

	return G_SOURCE_REMOVE;
	}

	static gboolean
	topology_is_empty(stonith_topology_t *tp)
	{
	int i;

	if (tp == NULL) {
	return TRUE;
	}

	for (i = 0; i < ST__LEVEL_COUNT; i++) {
	if (tp->levels[i] != NULL) {
	return FALSE;
	}
	}
	return TRUE;
	}

	/*!
	* \internal
	* \brief Add a device to an operation's automatic unfencing list
	*
	* \param[in,out] op Operation to modify
	* \param[in] device Device ID to add
	*/
	static void
	add_required_device(remote_fencing_op_t op, const char device)
	{
	GList *match = g_list_find_custom(op->automatic_list, device,
	sort_strings);

	if (!match) {
	op->automatic_list = g_list_prepend(op->automatic_list,
	pcmk__str_copy(device));
	}
	}

	/*!
	* \internal
	* \brief Remove a device from the automatic unfencing list
	*
	* \param[in,out] op Operation to modify
	* \param[in] device Device ID to remove
	*/
	static void
	remove_required_device(remote_fencing_op_t op, const char device)
	{
	GList *match = g_list_find_custom(op->automatic_list, device,
	sort_strings);

	if (match) {
	op->automatic_list = g_list_remove(op->automatic_list, match->data);
	}
	}

	/* deep copy the device list */
	static void
	set_op_device_list(remote_fencing_op_t * op, GList *devices)
	{
	GList *lpc = NULL;

	if (op->devices_list) {
	g_list_free_full(op->devices_list, free);
	op->devices_list = NULL;
	}
	for (lpc = devices; lpc != NULL; lpc = lpc->next) {
	const char *device = lpc->data;

	op->devices_list = g_list_append(op->devices_list,
	pcmk__str_copy(device));
	}
	op->devices = op->devices_list;
	}

	/*!
	* \internal
	* \brief Check whether a node matches a topology target
	*
	* \param[in] tp Topology table entry to check
	* \param[in] node Name of node to check
	*
	* \return TRUE if node matches topology target
	*/
	static gboolean
	topology_matches(const stonith_topology_t tp, const char node)
	{
	regex_t r_patt;

	CRM_CHECK(node && tp && tp->target, return FALSE);
	switch (tp->kind) {
	case fenced_target_by_attribute:
	/* This level targets by attribute, so tp->target is a NAME=VALUE pair
	* of a permanent attribute applied to targeted nodes. The test below
	* relies on the locally cached copy of the CIB, so if fencing needs to
	* be done before the initial CIB is received or after a malformed CIB
	* is received, then the topology will be unable to be used.
	*/
	if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
	crm_notice("Matched %s with %s by attribute", node, tp->target);
	return TRUE;
	}
	break;

	case fenced_target_by_pattern:
	/* This level targets node names matching a pattern, so tp->target
	* (and tp->target_pattern) is a regular expression.
	*/
	if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED\|REG_NOSUB)) {
	crm_info("Bad regex '%s' for fencing level", tp->target);
	} else {
	int status = regexec(&r_patt, node, 0, NULL, 0);

	regfree(&r_patt);
	if (status == 0) {
	crm_notice("Matched %s with %s by name", node, tp->target);
	return TRUE;
	}
	}
	break;

	case fenced_target_by_name:
	crm_trace("Testing %s against %s", node, tp->target);
	return pcmk__str_eq(tp->target, node, pcmk__str_casei);

	default:
	break;
	}
	crm_trace("No match for %s with %s", node, tp->target);
	return FALSE;
	}

	stonith_topology_t *
	find_topology_for_host(const char *host)
	{
	GHashTableIter tIter;
	stonith_topology_t *tp = g_hash_table_lookup(topology, host);

	if(tp != NULL) {
	crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
	return tp;
	}

	g_hash_table_iter_init(&tIter, topology);
	while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
	if (topology_matches(tp, host)) {
	crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
	return tp;
	}
	}

	crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
	return NULL;
	}

	/*!
	* \internal
	* \brief Set fencing operation's device list to target's next topology level
	*
	* \param[in,out] op Remote fencing operation to modify
	* \param[in] empty_ok If true, an operation without a target (i.e.
	* queries) or a target without a topology will get a
	* pcmk_rc_ok return value instead of ENODEV
	*
	* \return Standard Pacemaker return value
	*/
	static int
	advance_topology_level(remote_fencing_op_t *op, bool empty_ok)
	{
	stonith_topology_t *tp = NULL;

	if (op->target) {
	tp = find_topology_for_host(op->target);
	}
	if (topology_is_empty(tp)) {
	return empty_ok? pcmk_rc_ok : ENODEV;
	}

	pcmk__assert(tp->levels != NULL);

	stonith__set_call_options(op->call_options, op->id, st_opt_topology);

	/* This is a new level, so undo any remapping left over from previous */
	undo_op_remap(op);

	do {
	op->level++;

	} while (op->level < ST__LEVEL_COUNT && tp->levels[op->level] == NULL);

	if (op->level < ST__LEVEL_COUNT) {
	crm_trace("Attempting fencing level %d targeting %s (%d devices) "
	"for client %s@%s (id=%.8s)",
	op->level, op->target, g_list_length(tp->levels[op->level]),
	op->client_name, op->originator, op->id);
	set_op_device_list(op, tp->levels[op->level]);

	// The requested delay has been applied for the first fencing level
	if ((op->level > 1) && (op->client_delay > 0)) {
	op->client_delay = 0;
	}

	if ((g_list_next(op->devices_list) != NULL)
	&& pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
	/* A reboot has been requested for a topology level with multiple
	* devices. Instead of rebooting the devices sequentially, we will
	* turn them all off, then turn them all on again. (Think about
	* switched power outlets for redundant power supplies.)
	*/
	op_phase_off(op);
	}
	return pcmk_rc_ok;
	}

	crm_info("All %sfencing options targeting %s for client %s@%s failed "
	CRM_XS " id=%.8s",
	(stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"",
	op->target, op->client_name, op->originator, op->id);
	return ENODEV;
	}

	/*!
	* \internal
	* \brief If fencing operation is a duplicate, merge it into the other one
	*
	* \param[in,out] op Fencing operation to check
	*/
	static void
	merge_duplicates(remote_fencing_op_t *op)
	{
	GHashTableIter iter;
	remote_fencing_op_t *other = NULL;

	time_t now = time(NULL);

	g_hash_table_iter_init(&iter, stonith_remote_op_list);
	while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
	const char *other_action = op_requested_action(other);
	crm_node_t *node = NULL;

	if (!strcmp(op->id, other->id)) {
	continue; // Don't compare against self
	}
	if (other->state > st_exec) {
	crm_trace("%.8s not duplicate of %.8s: not in progress",
	op->id, other->id);
	continue;
	}
	if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) {
	crm_trace("%.8s not duplicate of %.8s: node %s vs. %s",
	op->id, other->id, op->target, other->target);
	continue;
	}
	if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) {
	crm_trace("%.8s not duplicate of %.8s: action %s vs. %s",
	op->id, other->id, op->action, other_action);
	continue;
	}
	if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) {
	crm_trace("%.8s not duplicate of %.8s: same client %s",
	op->id, other->id, op->client_name);
	continue;
	}
	if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) {
	crm_trace("%.8s not duplicate of %.8s: self-fencing for %s",
	op->id, other->id, other->target);
	continue;
	}

	node = pcmk__get_node(0, other->originator, NULL,
	pcmk__node_search_cluster_member);

	if (!fencing_peer_active(node)) {
	crm_notice("Failing action '%s' targeting %s originating from "
	"client %s@%s: Originator is dead " CRM_XS " id=%.8s",
	other->action, other->target, other->client_name,
	other->originator, other->id);
	crm_trace("%.8s not duplicate of %.8s: originator dead",
	op->id, other->id);
	other->state = st_failed;
	continue;
	}
	if ((other->total_timeout > 0)
	&& (now > (other->total_timeout + other->created))) {
	crm_trace("%.8s not duplicate of %.8s: old (%lld vs. %lld + %ds)",
	op->id, other->id, (long long)now, (long long)other->created,
	other->total_timeout);
	continue;
	}

	/* There is another in-flight request to fence the same host
	* Piggyback on that instead. If it fails, so do we.
	*/
	other->duplicates = g_list_append(other->duplicates, op);
	if (other->total_timeout == 0) {
	other->total_timeout = op->total_timeout =
	TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
	crm_trace("Best guess as to timeout used for %.8s: %ds",
	other->id, other->total_timeout);
	}
	crm_notice("Merging fencing action '%s' targeting %s originating from "
	"client %s with identical request from %s@%s "
	CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds",
	op->action, op->target, op->client_name,
	other->client_name, other->originator,
	op->id, other->id, other->total_timeout);
	report_timeout_period(op, other->total_timeout);
	op->state = st_duplicate;
	}
	}

	static uint32_t fencing_active_peers(void)
	{
	uint32_t count = 0;
	crm_node_t *entry;
	GHashTableIter gIter;

	g_hash_table_iter_init(&gIter, crm_peer_cache);
	while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
	if(fencing_peer_active(entry)) {
	count++;
	}
	}
	return count;
	}

	/*!
	* \internal
	* \brief Process a manual confirmation of a pending fence action
	*
	* \param[in] client IPC client that sent confirmation
	* \param[in,out] msg Request XML with manual confirmation
	*
	* \return Standard Pacemaker return code
	*/
	int
	fenced_handle_manual_confirmation(const pcmk__client_t client, xmlNode msg)
	{
	remote_fencing_op_t *op = NULL;
	xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR);

	CRM_CHECK(dev != NULL, return EPROTO);

	crm_notice("Received manual confirmation that %s has been fenced",
	pcmk__s(crm_element_value(dev, PCMK__XA_ST_TARGET),
	"unknown target"));
	op = initiate_remote_stonith_op(client, msg, TRUE);
	if (op == NULL) {
	return EPROTO;
	}
	op->state = st_done;
	set_fencing_completed(op);
	op->delegate = pcmk__str_copy("a human");

	// For the fencer's purposes, the fencing operation is done
	pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	finalize_op(op, msg, false);

	/* For the requester's purposes, the operation is still pending. The
	* actual result will be sent asynchronously via the operation's done_cb().
	*/
	return EINPROGRESS;
	}

	/*!
	* \internal
	* \brief Create a new remote stonith operation
	*
	* \param[in] client ID of local stonith client that initiated the operation
	* \param[in] request The request from the client that started the operation
	* \param[in] peer TRUE if this operation is owned by another stonith peer
	* (an operation owned by one peer is stored on all peers,
	* but only the owner executes it; all nodes get the results
	* once the owner finishes execution)
	*/
	void *
	create_remote_stonith_op(const char client, xmlNode request, gboolean peer)
	{
	remote_fencing_op_t *op = NULL;
	xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request,
	LOG_NEVER);
	int rc = pcmk_rc_ok;
	const char *operation = NULL;

	init_stonith_remote_op_hash_table(&stonith_remote_op_list);

	/* If this operation is owned by another node, check to make
	* sure we haven't already created this operation. */
	if (peer && dev) {
	const char *op_id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);

	CRM_CHECK(op_id != NULL, return NULL);

	op = g_hash_table_lookup(stonith_remote_op_list, op_id);
	if (op) {
	crm_debug("Reusing existing remote fencing op %.8s for %s",
	op_id, ((client == NULL)? "unknown client" : client));
	return op;
	}
	}

	op = pcmk__assert_alloc(1, sizeof(remote_fencing_op_t));

	crm_element_value_int(request, PCMK__XA_ST_TIMEOUT, &(op->base_timeout));
	// Value -1 means disable any static/random fencing delays
	crm_element_value_int(request, PCMK__XA_ST_DELAY, &(op->client_delay));

	if (peer && dev) {
	op->id = crm_element_value_copy(dev, PCMK__XA_ST_REMOTE_OP);
	} else {
	op->id = crm_generate_uuid();
	}

	g_hash_table_replace(stonith_remote_op_list, op->id, op);

	op->state = st_query;
	op->replies_expected = fencing_active_peers();
	op->action = crm_element_value_copy(dev, PCMK__XA_ST_DEVICE_ACTION);

	/* The node initiating the stonith operation. If an operation is relayed,
	* this is the last node the operation lands on. When in standalone mode,
	* origin is the ID of the client that originated the operation.
	*
	* Or may be the name of the function that created the operation.
	*/
	op->originator = crm_element_value_copy(dev, PCMK__XA_ST_ORIGIN);
	if (op->originator == NULL) {
	/* Local or relayed request */
	op->originator = pcmk__str_copy(stonith_our_uname);
	}

	// Delegate may not be set
	op->delegate = crm_element_value_copy(dev, PCMK__XA_ST_DELEGATE);
	op->created = time(NULL);

	CRM_LOG_ASSERT(client != NULL);
	op->client_id = pcmk__str_copy(client);

	/* For a RELAY operation, set fenced on the client. */
	operation = crm_element_value(request, PCMK__XA_ST_OP);

	if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
	op->client_name = crm_strdup_printf("%s.%lu", crm_system_name,
	(unsigned long) getpid());
	} else {
	op->client_name = crm_element_value_copy(request,
	PCMK__XA_ST_CLIENTNAME);
	}

	op->target = crm_element_value_copy(dev, PCMK__XA_ST_TARGET);

	// @TODO Figure out how to avoid copying XML here
	op->request = pcmk__xml_copy(NULL, request);

	rc = pcmk__xe_get_flags(request, PCMK__XA_ST_CALLOPT, &(op->call_options),
	0U);
	if (rc != pcmk_rc_ok) {
	crm_warn("Couldn't parse options from request %s: %s",
	op->id, pcmk_rc_str(rc));
	}

	crm_element_value_int(request, PCMK__XA_ST_CALLID, &(op->client_callid));

	crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, "
	"base timeout %ds, %u %s expected)",
	(peer && dev)? "Recorded" : "Generated", op->id, op->action,
	op->target, op->client_name, op->base_timeout,
	op->replies_expected,
	pcmk__plural_alt(op->replies_expected, "reply", "replies"));

	if (op->call_options & st_opt_cs_nodeid) {
	int nodeid;
	crm_node_t *node;

	pcmk__scan_min_int(op->target, &nodeid, 0);
	- node = pcmk__search_node_caches(nodeid, NULL,
	+ node = pcmk__search_node_caches(nodeid, NULL, NULL,
	pcmk__node_search_any
	\|pcmk__node_search_cluster_cib);

	/* Ensure the conversion only happens once */
	stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid);

	if (node && node->uname) {
	pcmk__str_update(&(op->target), node->uname);

	} else {
	crm_warn("Could not expand nodeid '%s' into a host name", op->target);
	}
	}

	/* check to see if this is a duplicate operation of another in-flight operation */
	merge_duplicates(op);

	if (op->state != st_duplicate) {
	/* kick history readers */
	fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
	}

	/* safe to trim as long as that doesn't touch pending ops */
	stonith_fence_history_trim();

	return op;
	}

	/*!
	* \internal
	* \brief Create a peer fencing operation from a request, and initiate it
	*
	* \param[in] client IPC client that made request (NULL to get from request)
	* \param[in] request Request XML
	* \param[in] manual_ack Whether this is a manual action confirmation
	*
	* \return Newly created operation on success, otherwise NULL
	*/
	remote_fencing_op_t *
	initiate_remote_stonith_op(const pcmk__client_t client, xmlNode request,
	gboolean manual_ack)
	{
	int query_timeout = 0;
	xmlNode *query = NULL;
	const char *client_id = NULL;
	remote_fencing_op_t *op = NULL;
	const char *relay_op_id = NULL;
	const char *operation = NULL;

	if (client) {
	client_id = client->id;
	} else {
	client_id = crm_element_value(request, PCMK__XA_ST_CLIENTID);
	}

	CRM_LOG_ASSERT(client_id != NULL);
	op = create_remote_stonith_op(client_id, request, FALSE);
	op->owner = TRUE;
	if (manual_ack) {
	return op;
	}

	CRM_CHECK(op->action, return NULL);

	if (advance_topology_level(op, true) != pcmk_rc_ok) {
	op->state = st_failed;
	}

	switch (op->state) {
	case st_failed:
	// advance_topology_level() exhausted levels
	pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
	"All topology levels failed");
	crm_warn("Could not request peer fencing (%s) targeting %s "
	CRM_XS " id=%.8s", op->action, op->target, op->id);
	finalize_op(op, NULL, false);
	return op;

	case st_duplicate:
	crm_info("Requesting peer fencing (%s) targeting %s (duplicate) "
	CRM_XS " id=%.8s", op->action, op->target, op->id);
	return op;

	default:
	crm_notice("Requesting peer fencing (%s) targeting %s "
	CRM_XS " id=%.8s state=%s base_timeout=%ds",
	op->action, op->target, op->id,
	stonith_op_state_str(op->state), op->base_timeout);
	}

	query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
	NULL, op->call_options);

	crm_xml_add(query, PCMK__XA_ST_REMOTE_OP, op->id);
	crm_xml_add(query, PCMK__XA_ST_TARGET, op->target);
	crm_xml_add(query, PCMK__XA_ST_DEVICE_ACTION, op_requested_action(op));
	crm_xml_add(query, PCMK__XA_ST_ORIGIN, op->originator);
	crm_xml_add(query, PCMK__XA_ST_CLIENTID, op->client_id);
	crm_xml_add(query, PCMK__XA_ST_CLIENTNAME, op->client_name);
	crm_xml_add_int(query, PCMK__XA_ST_TIMEOUT, op->base_timeout);

	/* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */
	operation = crm_element_value(request, PCMK__XA_ST_OP);
	if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
	relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP);
	if (relay_op_id) {
	crm_xml_add(query, PCMK__XA_ST_REMOTE_OP_RELAY, relay_op_id);
	}
	}

	pcmk__cluster_send_message(NULL, crm_msg_stonith_ng, query);
	free_xml(query);

	query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
	op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);

	return op;
	}

	enum find_best_peer_options {
	/! Skip checking the target peer for capable fencing devices /
	FIND_PEER_SKIP_TARGET = 0x0001,
	/! Only check the target peer for capable fencing devices /
	FIND_PEER_TARGET_ONLY = 0x0002,
	/! Skip peers and devices that are not verified /
	FIND_PEER_VERIFIED_ONLY = 0x0004,
	};

	static bool
	is_watchdog_fencing(const remote_fencing_op_t op, const char device)
	{
	return (stonith_watchdog_timeout_ms > 0
	// Only an explicit mismatch is considered not a watchdog fencing.
	&& pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_null_matches)
	&& pcmk__is_fencing_action(op->action)
	&& node_does_watchdog_fencing(op->target));
	}

	static peer_device_info_t *
	find_best_peer(const char device, remote_fencing_op_t op, enum find_best_peer_options options)
	{
	GList *iter = NULL;
	gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;

	if (!device && pcmk_is_set(op->call_options, st_opt_topology)) {
	return NULL;
	}

	for (iter = op->query_results; iter != NULL; iter = iter->next) {
	peer_device_info_t *peer = iter->data;

	crm_trace("Testing result from %s targeting %s with %d device%s: %d %x",
	peer->host, op->target, peer->ndevices,
	pcmk__plural_s(peer->ndevices), peer->tried, options);
	if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
	continue;
	}
	if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
	continue;
	}

	if (pcmk_is_set(op->call_options, st_opt_topology)) {

	if (grab_peer_device(op, peer, device, verified_devices_only)) {
	return peer;
	}

	} else if (!peer->tried
	&& count_peer_devices(op, peer, verified_devices_only,
	fenced_support_flag(op->action))) {
	/* No topology: Use the current best peer */
	crm_trace("Simple fencing");
	return peer;
	}
	}

	return NULL;
	}

	static peer_device_info_t *
	stonith_choose_peer(remote_fencing_op_t * op)
	{
	const char *device = NULL;
	peer_device_info_t *peer = NULL;
	uint32_t active = fencing_active_peers();

	do {
	if (op->devices) {
	device = op->devices->data;
	crm_trace("Checking for someone to fence (%s) %s using %s",
	op->action, op->target, device);
	} else {
	crm_trace("Checking for someone to fence (%s) %s",
	op->action, op->target);
	}

	/* Best choice is a peer other than the target with verified access */
	peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET\|FIND_PEER_VERIFIED_ONLY);
	if (peer) {
	crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
	return peer;
	}

	if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
	crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
	return NULL;
	}

	/* If no other peer has verified access, next best is unverified access */
	peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
	if (peer) {
	crm_trace("Found best unverified peer %s", peer->host);
	return peer;
	}

	/* If no other peer can do it, last option is self-fencing
	* (which is never allowed for the "on" phase of a remapped reboot)
	*/
	if (op->phase != st_phase_on) {
	peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
	if (peer) {
	crm_trace("%s will fence itself", peer->host);
	return peer;
	}
	}

	/* Try the next fencing level if there is one (unless we're in the "on"
	* phase of a remapped "reboot", because we ignore errors in that case)
	*/
	} while ((op->phase != st_phase_on)
	&& pcmk_is_set(op->call_options, st_opt_topology)
	&& (advance_topology_level(op, false) == pcmk_rc_ok));

	/* With a simple watchdog fencing configuration without a topology,
	* "device" is NULL here. Consider it should be done with watchdog fencing.
	*/
	if (is_watchdog_fencing(op, device)) {
	crm_info("Couldn't contact watchdog-fencing target-node (%s)",
	op->target);
	/* check_watchdog_fencing_and_wait will log additional info */
	} else {
	crm_notice("Couldn't find anyone to fence (%s) %s using %s",
	op->action, op->target, (device? device : "any device"));
	}
	return NULL;
	}

	static int
	valid_fencing_timeout(int specified_timeout, bool action_specific,
	const remote_fencing_op_t op, const char device)
	{
	int timeout = specified_timeout;

	if (!is_watchdog_fencing(op, device)) {
	return timeout;
	}

	timeout = (int) QB_MIN(QB_MAX(specified_timeout,
	stonith_watchdog_timeout_ms / 1000), INT_MAX);

	if (timeout > specified_timeout) {
	if (action_specific) {
	crm_warn("pcmk_%s_timeout %ds for %s is too short (must be >= "
	PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds "
	"instead",
	op->action, specified_timeout, device? device : "watchdog",
	timeout, timeout);

	} else {
	crm_warn("Fencing timeout %ds is too short (must be >= "
	PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds "
	"instead",
	specified_timeout, timeout, timeout);
	}
	}

	return timeout;
	}

	static int
	get_device_timeout(const remote_fencing_op_t *op,
	const peer_device_info_t peer, const char device,
	bool with_delay)
	{
	int timeout = op->base_timeout;
	device_properties_t *props;

	timeout = valid_fencing_timeout(op->base_timeout, false, op, device);

	if (!peer \|\| !device) {
	return timeout;
	}

	props = g_hash_table_lookup(peer->devices, device);
	if (!props) {
	return timeout;
	}

	if (props->custom_action_timeout[op->phase]) {
	timeout = valid_fencing_timeout(props->custom_action_timeout[op->phase],
	true, op, device);
	}

	// op->client_delay < 0 means disable any static/random fencing delays
	if (with_delay && (op->client_delay >= 0)) {
	// delay_base is eventually limited by delay_max
	timeout += (props->delay_max[op->phase] > 0 ?
	props->delay_max[op->phase] : props->delay_base[op->phase]);
	}

	return timeout;
	}

	struct timeout_data {
	const remote_fencing_op_t *op;
	const peer_device_info_t *peer;
	int total_timeout;
	};

	/*!
	* \internal
	* \brief Add timeout to a total if device has not been executed yet
	*
	* \param[in] key GHashTable key (device ID)
	* \param[in] value GHashTable value (device properties)
	* \param[in,out] user_data Timeout data
	*/
	static void
	add_device_timeout(gpointer key, gpointer value, gpointer user_data)
	{
	const char *device_id = key;
	device_properties_t *props = value;
	struct timeout_data *timeout = user_data;

	if (!props->executed[timeout->op->phase]
	&& !props->disallowed[timeout->op->phase]) {
	timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer,
	device_id, true);
	}
	}

	static int
	get_peer_timeout(const remote_fencing_op_t op, const peer_device_info_t peer)
	{
	struct timeout_data timeout;

	timeout.op = op;
	timeout.peer = peer;
	timeout.total_timeout = 0;

	g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);

	return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
	}

	static int
	get_op_total_timeout(const remote_fencing_op_t *op,
	const peer_device_info_t *chosen_peer)
	{
	long long total_timeout = 0;
	stonith_topology_t *tp = find_topology_for_host(op->target);

	if (pcmk_is_set(op->call_options, st_opt_topology) && tp) {
	int i;
	GList *device_list = NULL;
	GList *iter = NULL;
	GList *auto_list = NULL;

	if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)
	&& (op->automatic_list != NULL)) {
	auto_list = g_list_copy(op->automatic_list);
	}

	/* Yep, this looks scary, nested loops all over the place.
	* Here is what is going on.
	* Loop1: Iterate through fencing levels.
	* Loop2: If a fencing level has devices, loop through each device
	* Loop3: For each device in a fencing level, see what peer owns it
	* and what that peer has reported the timeout is for the device.
	*/
	for (i = 0; i < ST__LEVEL_COUNT; i++) {
	if (!tp->levels[i]) {
	continue;
	}
	for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
	bool found = false;

	for (iter = op->query_results; iter != NULL; iter = iter->next) {
	const peer_device_info_t *peer = iter->data;

	if (auto_list) {
	GList *match = g_list_find_custom(auto_list, device_list->data,
	sort_strings);
	if (match) {
	auto_list = g_list_remove(auto_list, match->data);
	}
	}

	if (find_peer_device(op, peer, device_list->data,
	fenced_support_flag(op->action))) {
	total_timeout += get_device_timeout(op, peer,
	device_list->data,
	true);
	found = true;
	break;
	}
	} /* End Loop3: match device with peer that owns device, find device's timeout period */

	/* in case of watchdog-device we add the timeout to the budget
	if didn't get a reply
	*/
	if (!found && is_watchdog_fencing(op, device_list->data)) {
	total_timeout += stonith_watchdog_timeout_ms / 1000;
	}
	} /* End Loop2: iterate through devices at a specific level */
	} /End Loop1: iterate through fencing levels /

	//Add only exists automatic_list device timeout
	if (auto_list) {
	for (iter = auto_list; iter != NULL; iter = iter->next) {
	GList *iter2 = NULL;

	for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) {
	peer_device_info_t *peer = iter2->data;
	if (find_peer_device(op, peer, iter->data, st_device_supports_on)) {
	total_timeout += get_device_timeout(op, peer,
	iter->data, true);
	break;
	}
	}
	}
	}

	g_list_free(auto_list);

	} else if (chosen_peer) {
	total_timeout = get_peer_timeout(op, chosen_peer);

	} else {
	total_timeout = valid_fencing_timeout(op->base_timeout, false, op,
	NULL);
	}

	if (total_timeout <= 0) {
	total_timeout = op->base_timeout;
	}

	/* Take any requested fencing delay into account to prevent it from eating
	* up the total timeout.
	*/
	if (op->client_delay > 0) {
	total_timeout += op->client_delay;
	}
	return (int) QB_MIN(total_timeout, INT_MAX);
	}

	static void
	report_timeout_period(remote_fencing_op_t * op, int op_timeout)
	{
	GList *iter = NULL;
	xmlNode *update = NULL;
	const char *client_node = NULL;
	const char *client_id = NULL;
	const char *call_id = NULL;

	if (op->call_options & st_opt_sync_call) {
	/* There is no reason to report the timeout for a synchronous call. It
	* is impossible to use the reported timeout to do anything when the client
	* is blocking for the response. This update is only important for
	* async calls that require a callback to report the results in. */
	return;
	} else if (!op->request) {
	return;
	}

	crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id);
	client_node = crm_element_value(op->request, PCMK__XA_ST_CLIENTNODE);
	call_id = crm_element_value(op->request, PCMK__XA_ST_CALLID);
	client_id = crm_element_value(op->request, PCMK__XA_ST_CLIENTID);
	if (!client_node \|\| !call_id \|\| !client_id) {
	return;
	}

	if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) {
	// Client is connected to this node, so send update directly to them
	do_stonith_async_timeout_update(client_id, call_id, op_timeout);
	return;
	}

	/* The client is connected to another node, relay this update to them */
	update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
	crm_xml_add(update, PCMK__XA_ST_REMOTE_OP, op->id);
	crm_xml_add(update, PCMK__XA_ST_CLIENTID, client_id);
	crm_xml_add(update, PCMK__XA_ST_CALLID, call_id);
	crm_xml_add_int(update, PCMK__XA_ST_TIMEOUT, op_timeout);

	pcmk__cluster_send_message(pcmk__get_node(0, client_node, NULL,
	pcmk__node_search_cluster_member),
	crm_msg_stonith_ng, update);

	free_xml(update);

	for (iter = op->duplicates; iter != NULL; iter = iter->next) {
	remote_fencing_op_t *dup = iter->data;

	crm_trace("Reporting timeout for duplicate %.8s to client %s",
	dup->id, dup->client_name);
	report_timeout_period(iter->data, op_timeout);
	}
	}

	/*!
	* \internal
	* \brief Advance an operation to the next device in its topology
	*
	* \param[in,out] op Fencer operation to advance
	* \param[in] device ID of device that just completed
	* \param[in,out] msg If not NULL, XML reply of last delegated operation
	*/
	static void
	advance_topology_device_in_level(remote_fencing_op_t op, const char device,
	xmlNode *msg)
	{
	/* Advance to the next device at this topology level, if any */
	if (op->devices) {
	op->devices = op->devices->next;
	}

	/* Handle automatic unfencing if an "on" action was requested */
	if ((op->phase == st_phase_requested)
	&& pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) {
	/* If the device we just executed was required, it's not anymore */
	remove_required_device(op, device);

	/* If there are no more devices at this topology level, run through any
	* remaining devices with automatic unfencing
	*/
	if (op->devices == NULL) {
	op->devices = op->automatic_list;
	}
	}

	if ((op->devices == NULL) && (op->phase == st_phase_off)) {
	/* We're done with this level and with required devices, but we had
	* remapped "reboot" to "off", so start over with "on". If any devices
	* need to be turned back on, op->devices will be non-NULL after this.
	*/
	op_phase_on(op);
	}

	// This function is only called if the previous device succeeded
	pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);

	if (op->devices) {
	/* Necessary devices remain, so execute the next one */
	crm_trace("Next targeting %s on behalf of %s@%s",
	op->target, op->client_name, op->originator);

	// The requested delay has been applied for the first device
	if (op->client_delay > 0) {
	op->client_delay = 0;
	}

	request_peer_fencing(op, NULL);
	} else {
	/* We're done with all devices and phases, so finalize operation */
	crm_trace("Marking complex fencing op targeting %s as complete",
	op->target);
	op->state = st_done;
	finalize_op(op, msg, false);
	}
	}

	static gboolean
	check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
	{
	if (node_does_watchdog_fencing(op->target)) {
	guint timeout_ms = QB_MIN(stonith_watchdog_timeout_ms, UINT_MAX);

	crm_notice("Waiting %s for %s to self-fence (%s) for "
	"client %s " CRM_XS " id=%.8s",
	pcmk__readable_interval(timeout_ms), op->target, op->action,
	op->client_name, op->id);

	if (op->op_timer_one) {
	g_source_remove(op->op_timer_one);
	}
	op->op_timer_one = g_timeout_add(timeout_ms, remote_op_watchdog_done,
	op);
	return TRUE;
	} else {
	crm_debug("Skipping fallback to watchdog-fencing as %s is "
	"not in host-list", op->target);
	}
	return FALSE;
	}

	/*!
	* \internal
	* \brief Ask a peer to execute a fencing operation
	*
	* \param[in,out] op Fencing operation to be executed
	* \param[in,out] peer If NULL or topology is in use, choose best peer to
	* execute the fencing, otherwise use this peer
	*/
	static void
	request_peer_fencing(remote_fencing_op_t op, peer_device_info_t peer)
	{
	const char *device = NULL;
	int timeout;

	CRM_CHECK(op != NULL, return);

	crm_trace("Action %.8s targeting %s for %s is %s",
	op->id, op->target, op->client_name,
	stonith_op_state_str(op->state));

	if ((op->phase == st_phase_on) && (op->devices != NULL)) {
	/* We are in the "on" phase of a remapped topology reboot. If this
	* device has pcmk_reboot_action="off", or doesn't support the "on"
	* action, skip it.
	*
	* We can't check device properties at this point because we haven't
	* chosen a peer for this stage yet. Instead, we check the local node's
	* knowledge about the device. If different versions of the fence agent
	* are installed on different nodes, there's a chance this could be
	* mistaken, but the worst that could happen is we don't try turning the
	* node back on when we should.
	*/
	device = op->devices->data;
	if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF,
	pcmk__str_none)) {
	crm_info("Not turning %s back on using %s because the device is "
	"configured to stay off (pcmk_reboot_action='off')",
	op->target, device);
	advance_topology_device_in_level(op, device, NULL);
	return;
	}
	if (!fenced_device_supports_on(device)) {
	crm_info("Not turning %s back on using %s because the agent "
	"doesn't support 'on'", op->target, device);
	advance_topology_device_in_level(op, device, NULL);
	return;
	}
	}

	timeout = op->base_timeout;
	if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
	peer = stonith_choose_peer(op);
	}

	if (!op->op_timer_total) {
	op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer);
	op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
	report_timeout_period(op, op->total_timeout);
	crm_info("Total timeout set to %ds for peer's fencing targeting %s for %s "
	CRM_XS " id=%.8s",
	op->total_timeout, op->target, op->client_name, op->id);
	}

	if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
	/* Ignore the caller's peer preference if topology is in use, because
	* that peer might not have access to the required device. With
	* topology, stonith_choose_peer() removes the device from further
	* consideration, so the timeout must be calculated beforehand.
	*
	* @TODO Basing the total timeout on the caller's preferred peer (above)
	* is less than ideal.
	*/
	peer = stonith_choose_peer(op);

	device = op->devices->data;
	/* Fencing timeout sent to peer takes no delay into account.
	* The peer will add a dedicated timer for any delay upon
	* schedule_stonith_command().
	*/
	timeout = get_device_timeout(op, peer, device, false);
	}

	if (peer) {
	int timeout_one = 0;
	xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
	const crm_node_t *peer_node =
	pcmk__get_node(0, peer->host, NULL,
	pcmk__node_search_cluster_member);

	if (op->client_delay > 0) {
	/* Take requested fencing delay into account to prevent it from
	* eating up the timeout.
	*/
	timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay;
	}

	crm_xml_add(remote_op, PCMK__XA_ST_REMOTE_OP, op->id);
	crm_xml_add(remote_op, PCMK__XA_ST_TARGET, op->target);
	crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ACTION, op->action);
	crm_xml_add(remote_op, PCMK__XA_ST_ORIGIN, op->originator);
	crm_xml_add(remote_op, PCMK__XA_ST_CLIENTID, op->client_id);
	crm_xml_add(remote_op, PCMK__XA_ST_CLIENTNAME, op->client_name);
	crm_xml_add_int(remote_op, PCMK__XA_ST_TIMEOUT, timeout);
	crm_xml_add_int(remote_op, PCMK__XA_ST_CALLOPT, op->call_options);
	crm_xml_add_int(remote_op, PCMK__XA_ST_DELAY, op->client_delay);

	if (device) {
	timeout_one += TIMEOUT_MULTIPLY_FACTOR *
	get_device_timeout(op, peer, device, true);
	crm_notice("Requesting that %s perform '%s' action targeting %s "
	"using %s " CRM_XS " for client %s (%ds)",
	peer->host, op->action, op->target, device,
	op->client_name, timeout_one);
	crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ID, device);

	} else {
	timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
	crm_notice("Requesting that %s perform '%s' action targeting %s "
	CRM_XS " for client %s (%ds, %s)",
	peer->host, op->action, op->target, op->client_name,
	timeout_one,
	pcmk__readable_interval(stonith_watchdog_timeout_ms));
	}

	op->state = st_exec;
	if (op->op_timer_one) {
	g_source_remove(op->op_timer_one);
	op->op_timer_one = 0;
	}

	if (!is_watchdog_fencing(op, device)
	\|\| !check_watchdog_fencing_and_wait(op)) {

	/* Some thoughts about self-fencing cases reaching this point:
	- Actually check in check_watchdog_fencing_and_wait
	shouldn't fail if STONITH_WATCHDOG_ID is
	chosen as fencing-device and it being present implies
	watchdog-fencing is enabled anyway
	- If watchdog-fencing is disabled either in general or for
	a specific target - detected in check_watchdog_fencing_and_wait -
	for some other kind of self-fencing we can't expect
	a success answer but timeout is fine if the node doesn't
	come back in between
	- Delicate might be the case where we have watchdog-fencing
	enabled for a node but the watchdog-fencing-device isn't
	explicitly chosen for self-fencing. Local scheduler execution
	in sbd might detect the node as unclean and lead to timely
	self-fencing. Otherwise the selection of
	PCMK_OPT_STONITH_WATCHDOG_TIMEOUT at least is questionable.
	*/

	/* coming here we're not waiting for watchdog timeout -
	thus engage timer with timout evaluated before */
	op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
	}

	pcmk__cluster_send_message(peer_node, crm_msg_stonith_ng, remote_op);
	peer->tried = TRUE;
	free_xml(remote_op);
	return;

	} else if (op->phase == st_phase_on) {
	/* A remapped "on" cannot be executed, but the node was already
	* turned off successfully, so ignore the error and continue.
	*/
	crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
	"after successful 'off'", device, op->target);
	advance_topology_device_in_level(op, device, NULL);
	return;

	} else if (op->owner == FALSE) {
	crm_err("Fencing (%s) targeting %s for client %s is not ours to control",
	op->action, op->target, op->client_name);

	} else if (op->query_timer == 0) {
	/* We've exhausted all available peers */
	crm_info("No remaining peers capable of fencing (%s) %s for client %s "
	CRM_XS " state=%s", op->action, op->target, op->client_name,
	stonith_op_state_str(op->state));
	CRM_CHECK(op->state < st_done, return);
	finalize_timed_out_op(op, "All nodes failed, or are unable, to "
	"fence target");

	} else if(op->replies >= op->replies_expected \|\| op->replies >= fencing_active_peers()) {
	/* if the operation never left the query state,
	* but we have all the expected replies, then no devices
	* are available to execute the fencing operation. */

	if (is_watchdog_fencing(op, device)
	&& check_watchdog_fencing_and_wait(op)) {
	/* Consider a watchdog fencing targeting an offline node executing
	* once it starts waiting for the target to self-fence. So that when
	* the query timer pops, remote_op_query_timeout() considers the
	* fencing already in progress.
	*/
	op->state = st_exec;
	return;
	}

	if (op->state == st_query) {
	crm_info("No peers (out of %d) have devices capable of fencing "
	"(%s) %s for client %s " CRM_XS " state=%s",
	op->replies, op->action, op->target, op->client_name,
	stonith_op_state_str(op->state));

	pcmk__reset_result(&op->result);
	pcmk__set_result(&op->result, CRM_EX_ERROR,
	PCMK_EXEC_NO_FENCE_DEVICE, NULL);
	} else {
	if (pcmk_is_set(op->call_options, st_opt_topology)) {
	pcmk__reset_result(&op->result);
	pcmk__set_result(&op->result, CRM_EX_ERROR,
	PCMK_EXEC_NO_FENCE_DEVICE, NULL);
	}
	/* ... else use existing result from previous failed attempt
	* (topology is not in use, and no devices remain to be attempted).
	* Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would
	* prevent finalize_op() from setting the correct delegate if
	* needed.
	*/

	crm_info("No peers (out of %d) are capable of fencing (%s) %s "
	"for client %s " CRM_XS " state=%s",
	op->replies, op->action, op->target, op->client_name,
	stonith_op_state_str(op->state));
	}

	op->state = st_failed;
	finalize_op(op, NULL, false);

	} else {
	crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
	"for client %s " CRM_XS " id=%.8s",
	op->action, op->target, (device? " using " : ""),
	(device? device : ""), op->client_name, op->id);
	}
	}

	/*!
	* \internal
	* \brief Comparison function for sorting query results
	*
	* \param[in] a GList item to compare
	* \param[in] b GList item to compare
	*
	* \return Per the glib documentation, "a negative integer if the first value
	* comes before the second, 0 if they are equal, or a positive integer
	* if the first value comes after the second."
	*/
	static gint
	sort_peers(gconstpointer a, gconstpointer b)
	{
	const peer_device_info_t *peer_a = a;
	const peer_device_info_t *peer_b = b;

	return (peer_b->ndevices - peer_a->ndevices);
	}

	/*!
	* \internal
	* \brief Determine if all the devices in the topology are found or not
	*
	* \param[in] op Fencing operation with topology to check
	*/
	static gboolean
	all_topology_devices_found(const remote_fencing_op_t *op)
	{
	GList *device = NULL;
	GList *iter = NULL;
	device_properties_t *match = NULL;
	stonith_topology_t *tp = NULL;
	gboolean skip_target = FALSE;
	int i;

	tp = find_topology_for_host(op->target);
	if (!tp) {
	return FALSE;
	}
	if (pcmk__is_fencing_action(op->action)) {
	/* Don't count the devices on the target node if we are killing
	* the target node. */
	skip_target = TRUE;
	}

	for (i = 0; i < ST__LEVEL_COUNT; i++) {
	for (device = tp->levels[i]; device; device = device->next) {
	match = NULL;
	for (iter = op->query_results; iter && !match; iter = iter->next) {
	peer_device_info_t *peer = iter->data;

	if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
	continue;
	}
	match = find_peer_device(op, peer, device->data, st_device_supports_none);
	}
	if (!match) {
	return FALSE;
	}
	}
	}

	return TRUE;
	}

	/*!
	* \internal
	* \brief Parse action-specific device properties from XML
	*
	* \param[in] xml XML element containing the properties
	* \param[in] peer Name of peer that sent XML (for logs)
	* \param[in] device Device ID (for logs)
	* \param[in] action Action the properties relate to (for logs)
	* \param[in,out] op Fencing operation that properties are being parsed for
	* \param[in] phase Phase the properties relate to
	* \param[in,out] props Device properties to update
	*/
	static void
	parse_action_specific(const xmlNode xml, const char peer, const char *device,
	const char action, remote_fencing_op_t op,
	enum st_remap_phase phase, device_properties_t *props)
	{
	props->custom_action_timeout[phase] = 0;
	crm_element_value_int(xml, PCMK__XA_ST_ACTION_TIMEOUT,
	&props->custom_action_timeout[phase]);
	if (props->custom_action_timeout[phase]) {
	crm_trace("Peer %s with device %s returned %s action timeout %ds",
	peer, device, action, props->custom_action_timeout[phase]);
	}

	props->delay_max[phase] = 0;
	crm_element_value_int(xml, PCMK__XA_ST_DELAY_MAX, &props->delay_max[phase]);
	if (props->delay_max[phase]) {
	crm_trace("Peer %s with device %s returned maximum of random delay %ds for %s",
	peer, device, props->delay_max[phase], action);
	}

	props->delay_base[phase] = 0;
	crm_element_value_int(xml, PCMK__XA_ST_DELAY_BASE,
	&props->delay_base[phase]);
	if (props->delay_base[phase]) {
	crm_trace("Peer %s with device %s returned base delay %ds for %s",
	peer, device, props->delay_base[phase], action);
	}

	/* Handle devices with automatic unfencing */
	if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
	int required = 0;

	crm_element_value_int(xml, PCMK__XA_ST_REQUIRED, &required);
	if (required) {
	crm_trace("Peer %s requires device %s to execute for action %s",
	peer, device, action);
	add_required_device(op, device);
	}
	}

	/* If a reboot is remapped to off+on, it's possible that a node is allowed
	* to perform one action but not another.
	*/
	if (pcmk__xe_attr_is_true(xml, PCMK__XA_ST_ACTION_DISALLOWED)) {
	props->disallowed[phase] = TRUE;
	crm_trace("Peer %s is disallowed from executing %s for device %s",
	peer, action, device);
	}
	}

	/*!
	* \internal
	* \brief Parse one device's properties from peer's XML query reply
	*
	* \param[in] xml XML node containing device properties
	* \param[in,out] op Operation that query and reply relate to
	* \param[in,out] peer Peer's device information
	* \param[in] device ID of device being parsed
	*/
	static void
	add_device_properties(const xmlNode xml, remote_fencing_op_t op,
	peer_device_info_t peer, const char device)
	{
	xmlNode *child;
	int verified = 0;
	device_properties_t *props =
	pcmk__assert_alloc(1, sizeof(device_properties_t));
	int rc = pcmk_rc_ok;

	/* Add a new entry to this peer's devices list */
	g_hash_table_insert(peer->devices, pcmk__str_copy(device), props);

	/* Peers with verified (monitored) access will be preferred */
	crm_element_value_int(xml, PCMK__XA_ST_MONITOR_VERIFIED, &verified);
	if (verified) {
	crm_trace("Peer %s has confirmed a verified device %s",
	peer->host, device);
	props->verified = TRUE;
	}

	// Nodes <2.1.5 won't set this, so assume unfencing in that case
	rc = pcmk__xe_get_flags(xml, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS,
	&(props->device_support_flags),
	st_device_supports_on);
	if (rc != pcmk_rc_ok) {
	crm_warn("Couldn't determine device support for %s "
	"(assuming unfencing): %s", device, pcmk_rc_str(rc));
	}

	/* Parse action-specific device properties */
	parse_action_specific(xml, peer->host, device, op_requested_action(op),
	op, st_phase_requested, props);
	for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL;
	child = pcmk__xe_next(child)) {
	/* Replies for "reboot" operations will include the action-specific
	* values for "off" and "on" in child elements, just in case the reboot
	* winds up getting remapped.
	*/
	if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_OFF, pcmk__str_none)) {
	parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF,
	op, st_phase_off, props);

	} else if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_ON,
	pcmk__str_none)) {
	parse_action_specific(child, peer->host, device, PCMK_ACTION_ON,
	op, st_phase_on, props);
	}
	}
	}

	/*!
	* \internal
	* \brief Parse a peer's XML query reply and add it to operation's results
	*
	* \param[in,out] op Operation that query and reply relate to
	* \param[in] host Name of peer that sent this reply
	* \param[in] ndevices Number of devices expected in reply
	* \param[in] xml XML node containing device list
	*
	* \return Newly allocated result structure with parsed reply
	*/
	static peer_device_info_t *
	add_result(remote_fencing_op_t op, const char host, int ndevices,
	const xmlNode *xml)
	{
	peer_device_info_t *peer = pcmk__assert_alloc(1,
	sizeof(peer_device_info_t));
	xmlNode *child;

	peer->host = pcmk__str_copy(host);
	peer->devices = pcmk__strkey_table(free, free);

	/* Each child element describes one capable device available to the peer */
	for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL;
	child = pcmk__xe_next(child)) {
	const char *device = pcmk__xe_id(child);

	if (device) {
	add_device_properties(child, op, peer, device);
	}
	}

	peer->ndevices = g_hash_table_size(peer->devices);
	CRM_CHECK(ndevices == peer->ndevices,
	crm_err("Query claimed to have %d device%s but %d found",
	ndevices, pcmk__plural_s(ndevices), peer->ndevices));

	op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers);
	return peer;
	}

	/*!
	* \internal
	* \brief Handle a peer's reply to our fencing query
	*
	* Parse a query result from XML and store it in the remote operation
	* table, and when enough replies have been received, issue a fencing request.
	*
	* \param[in] msg XML reply received
	*
	* \return pcmk_ok on success, -errno on error
	*
	* \note See initiate_remote_stonith_op() for how the XML query was initially
	* formed, and stonith_query() for how the peer formed its XML reply.
	*/
	int
	process_remote_stonith_query(xmlNode *msg)
	{
	int ndevices = 0;
	gboolean host_is_target = FALSE;
	gboolean have_all_replies = FALSE;
	const char *id = NULL;
	const char *host = NULL;
	remote_fencing_op_t *op = NULL;
	peer_device_info_t *peer = NULL;
	uint32_t replies_expected;
	xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_REMOTE_OP, msg, LOG_ERR);

	CRM_CHECK(dev != NULL, return -EPROTO);

	id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
	CRM_CHECK(id != NULL, return -EPROTO);

	dev = get_xpath_object("//@" PCMK__XA_ST_AVAILABLE_DEVICES, msg, LOG_ERR);
	CRM_CHECK(dev != NULL, return -EPROTO);
	crm_element_value_int(dev, PCMK__XA_ST_AVAILABLE_DEVICES, &ndevices);

	op = g_hash_table_lookup(stonith_remote_op_list, id);
	if (op == NULL) {
	crm_debug("Received query reply for unknown or expired operation %s",
	id);
	return -EOPNOTSUPP;
	}

	replies_expected = fencing_active_peers();
	if (op->replies_expected < replies_expected) {
	replies_expected = op->replies_expected;
	}
	if ((++op->replies >= replies_expected) && (op->state == st_query)) {
	have_all_replies = TRUE;
	}
	host = crm_element_value(msg, PCMK__XA_SRC);
	host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei);

	crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s",
	op->replies, replies_expected, host,
	op->target, op->action, ndevices, pcmk__plural_s(ndevices), id);
	if (ndevices > 0) {
	peer = add_result(op, host, ndevices, dev);
	}

	pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);

	if (pcmk_is_set(op->call_options, st_opt_topology)) {
	/* If we start the fencing before all the topology results are in,
	* it is possible fencing levels will be skipped because of the missing
	* query results. */
	if (op->state == st_query && all_topology_devices_found(op)) {
	/* All the query results are in for the topology, start the fencing ops. */
	crm_trace("All topology devices found");
	request_peer_fencing(op, peer);

	} else if (have_all_replies) {
	crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
	replies_expected, op->replies);
	request_peer_fencing(op, NULL);
	}

	} else if (op->state == st_query) {
	int nverified = count_peer_devices(op, peer, TRUE,
	fenced_support_flag(op->action));

	/* We have a result for a non-topology fencing op that looks promising,
	* go ahead and start fencing before query timeout */
	if ((peer != NULL) && !host_is_target && nverified) {
	/* we have a verified device living on a peer that is not the target */
	crm_trace("Found %d verified device%s",
	nverified, pcmk__plural_s(nverified));
	request_peer_fencing(op, peer);

	} else if (have_all_replies) {
	crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
	replies_expected, op->replies);
	request_peer_fencing(op, NULL);

	} else {
	crm_trace("Waiting for more peer results before launching fencing operation");
	}

	} else if ((peer != NULL) && (op->state == st_done)) {
	crm_info("Discarding query result from %s (%d device%s): "
	"Operation is %s", peer->host,
	peer->ndevices, pcmk__plural_s(peer->ndevices),
	stonith_op_state_str(op->state));
	}

	return pcmk_ok;
	}

	/*!
	* \internal
	* \brief Handle a peer's reply to a fencing request
	*
	* Parse a fencing reply from XML, and either finalize the operation
	* or attempt another device as appropriate.
	*
	* \param[in] msg XML reply received
	*/
	void
	fenced_process_fencing_reply(xmlNode *msg)
	{
	const char *id = NULL;
	const char *device = NULL;
	remote_fencing_op_t *op = NULL;
	xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_REMOTE_OP, msg, LOG_ERR);
	pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;

	CRM_CHECK(dev != NULL, return);

	id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
	CRM_CHECK(id != NULL, return);

	dev = stonith__find_xe_with_result(msg);
	CRM_CHECK(dev != NULL, return);

	stonith__xe_get_result(dev, &result);

	device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);

	if (stonith_remote_op_list) {
	op = g_hash_table_lookup(stonith_remote_op_list, id);
	}

	if ((op == NULL) && pcmk__result_ok(&result)) {
	/* Record successful fencing operations */
	const char *client_id = crm_element_value(dev, PCMK__XA_ST_CLIENTID);

	op = create_remote_stonith_op(client_id, dev, TRUE);
	}

	if (op == NULL) {
	/* Could be for an event that began before we started */
	/* TODO: Record the op for later querying */
	crm_info("Received peer result of unknown or expired operation %s", id);
	pcmk__reset_result(&result);
	return;
	}

	pcmk__reset_result(&op->result);
	op->result = result; // The operation takes ownership of the result

	if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
	crm_err("Received outdated reply for device %s (instead of %s) to "
	"fence (%s) %s. Operation already timed out at peer level.",
	device, (const char *) op->devices->data, op->action, op->target);
	return;
	}

	if (pcmk__str_eq(crm_element_value(msg, PCMK__XA_SUBT),
	PCMK__VALUE_BROADCAST, pcmk__str_none)) {

	if (pcmk__result_ok(&op->result)) {
	op->state = st_done;
	} else {
	op->state = st_failed;
	}
	finalize_op(op, msg, false);
	return;

	} else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
	/* If this isn't a remote level broadcast, and we are not the
	* originator of the operation, we should not be receiving this msg. */
	crm_err("Received non-broadcast fencing result for operation %.8s "
	"we do not own (device %s targeting %s)",
	op->id, device, op->target);
	return;
	}

	if (pcmk_is_set(op->call_options, st_opt_topology)) {
	const char *device = NULL;
	const char *reason = op->result.exit_reason;

	/* We own the op, and it is complete. broadcast the result to all nodes
	* and notify our local clients. */
	if (op->state == st_done) {
	finalize_op(op, msg, false);
	return;
	}

	device = crm_element_value(msg, PCMK__XA_ST_DEVICE_ID);

	if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
	/* A remapped "on" failed, but the node was already turned off
	* successfully, so ignore the error and continue.
	*/
	crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
	"after successful 'off'",
	device, pcmk_exec_status_str(op->result.execution_status),
	(reason == NULL)? "" : ": ",
	(reason == NULL)? "" : reason,
	op->target);
	pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	} else {
	crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: "
	"%s%s%s%s",
	op->action, op->target,
	((device == NULL)? "" : " using "),
	((device == NULL)? "" : device),
	op->client_name,
	op->originator,
	pcmk_exec_status_str(op->result.execution_status),
	(reason == NULL)? "" : " (",
	(reason == NULL)? "" : reason,
	(reason == NULL)? "" : ")");
	}

	if (pcmk__result_ok(&op->result)) {
	/* An operation completed successfully. Try another device if
	* necessary, otherwise mark the operation as done. */
	advance_topology_device_in_level(op, device, msg);
	return;
	} else {
	/* This device failed, time to try another topology level. If no other
	* levels are available, mark this operation as failed and report results. */
	if (advance_topology_level(op, false) != pcmk_rc_ok) {
	op->state = st_failed;
	finalize_op(op, msg, false);
	return;
	}
	}

	} else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
	op->state = st_done;
	finalize_op(op, msg, false);
	return;

	} else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
	&& (op->devices == NULL)) {
	/* If the operation timed out don't bother retrying other peers. */
	op->state = st_failed;
	finalize_op(op, msg, false);
	return;

	} else {
	/* fall-through and attempt other fencing action using another peer */
	}

	/* Retry on failure */
	crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
	op->target, op->originator, op->client_name,
	pcmk_exec_status_str(op->result.execution_status));
	request_peer_fencing(op, NULL);
	}

	gboolean
	stonith_check_fence_tolerance(int tolerance, const char target, const char action)
	{
	GHashTableIter iter;
	time_t now = time(NULL);
	remote_fencing_op_t *rop = NULL;

	if (tolerance <= 0 \|\| !stonith_remote_op_list \|\| target == NULL \|\|
	action == NULL) {
	return FALSE;
	}

	g_hash_table_iter_init(&iter, stonith_remote_op_list);
	while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
	if (strcmp(rop->target, target) != 0) {
	continue;
	} else if (rop->state != st_done) {
	continue;
	/* We don't have to worry about remapped reboots here
	* because if state is done, any remapping has been undone
	*/
	} else if (strcmp(rop->action, action) != 0) {
	continue;
	} else if ((rop->completed + tolerance) < now) {
	continue;
	}

	crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
	target, action, tolerance, rop->delegate, rop->originator);
	return TRUE;
	}
	return FALSE;
	}
	diff --git a/include/crm/cluster/compat.h b/include/crm/cluster/compat.h
	index 6802edfb5c..b197849495 100644
	--- a/include/crm/cluster/compat.h
	+++ b/include/crm/cluster/compat.h
	@@ -1,194 +1,190 @@
	/*
	- * Copyright 2004-2024 the Pacemaker project contributors
	+ * Copyright 2004-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#ifndef PCMK__CRM_CLUSTER_COMPAT__H
	# define PCMK__CRM_CLUSTER_COMPAT__H

	#include <stdint.h> // uint32_t
	#include <sys/types.h> // size_t

	#include <glib.h> // gboolean, guint
	#include <libxml/tree.h> // xmlNode

	#if SUPPORT_COROSYNC
	#include <corosync/cpg.h> // cpg_handle_t
	#endif // SUPPORT_COROSYNC

	#include <crm/cluster.h> // crm_node_t

	#ifdef __cplusplus
	extern "C" {
	#endif

	/**
	* \file
	* \brief Deprecated Pacemaker cluster API
	* \ingroup cluster
	* \deprecated Do not include this header directly. The cluster APIs in this
	* header, and the header itself, will be removed in a future
	* release.
	*/

	//! \deprecated Do not use
	enum crm_get_peer_flags {
	CRM_GET_PEER_CLUSTER = 0x0001,
	CRM_GET_PEER_REMOTE = 0x0002,
	CRM_GET_PEER_ANY = CRM_GET_PEER_CLUSTER\|CRM_GET_PEER_REMOTE,
	};

	// NOTE: sbd (as of at least 1.5.2) uses this
	//! \deprecated Use \c pcmk_cluster_t instead
	typedef pcmk_cluster_t crm_cluster_t;

	//! \deprecated Do not use Pacemaker for cluster node cacheing
	crm_node_t crm_get_peer(unsigned int id, const char uname);

	//! \deprecated Do not use Pacemaker for cluster node cacheing
	crm_node_t crm_get_peer_full(unsigned int id, const char uname, int flags);

	//! \deprecated Use stonith_api_kick() from libstonithd instead
	int crm_terminate_member(int nodeid, const char uname, void unused);

	//! \deprecated Use \c stonith_api_kick() from libstonithd instead
	int crm_terminate_member_no_mainloop(int nodeid, const char *uname,
	int *connection);

	-/*!
	- * \deprecated Use
	- * <tt>crm_xml_add(xml, attr, pcmk__cluster_node_uuid(node))</tt>
	- * instead
	- */
	+//! \deprecated Do not use
	void set_uuid(xmlNode xml, const char attr, crm_node_t *node);

	#if SUPPORT_COROSYNC

	//! \deprecated Do not use
	gboolean cluster_connect_cpg(pcmk_cluster_t *cluster);

	//! \deprecated Do not use
	void cluster_disconnect_cpg(pcmk_cluster_t *cluster);

	//! \deprecated Do not use
	uint32_t get_local_nodeid(cpg_handle_t handle);

	//! \deprecated Do not use
	void pcmk_cpg_membership(cpg_handle_t handle,
	const struct cpg_name *group_name,
	const struct cpg_address *member_list,
	size_t member_list_entries,
	const struct cpg_address *left_list,
	size_t left_list_entries,
	const struct cpg_address *joined_list,
	size_t joined_list_entries);

	//! \deprecated Do not use
	gboolean crm_is_corosync_peer_active(const crm_node_t * node);

	//! \deprecated Do not use
	gboolean send_cluster_text(enum crm_ais_msg_class msg_class, const char *data,
	gboolean local, const crm_node_t *node,
	enum crm_ais_msg_types dest);

	//! \deprecated Do not use
	char *pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid,
	void msg, uint32_t kind, const char **from);

	#endif // SUPPORT_COROSYNC

	// NOTE: sbd (as of at least 1.5.2) uses this
	//! \deprecated Use \c pcmk_cluster_connect() instead
	gboolean crm_cluster_connect(pcmk_cluster_t *cluster);

	//! \deprecated Use \c pcmk_cluster_disconnect() instead
	void crm_cluster_disconnect(pcmk_cluster_t *cluster);

	//! \deprecated Do not use
	int crm_remote_peer_cache_size(void);

	//! \deprecated Do not use
	void crm_remote_peer_cache_refresh(xmlNode *cib);

	//! \deprecated Do not use
	crm_node_t crm_remote_peer_get(const char node_name);

	//! \deprecated Do not use
	void crm_remote_peer_cache_remove(const char *node_name);

	//! \deprecated Do not use
	gboolean crm_is_peer_active(const crm_node_t *node);

	//! \deprecated Do not use
	guint crm_active_peers(void);

	//! \deprecated Do not use
	guint reap_crm_member(uint32_t id, const char *name);

	// NOTE: sbd (as of at least 1.5.2) uses this enum
	//!@{
	//! \deprecated Use <tt>enum pcmk_cluster_layer</tt> instead
	enum cluster_type_e {
	// NOTE: sbd (as of at least 1.5.2) uses this value
	pcmk_cluster_unknown = pcmk_cluster_layer_unknown,

	pcmk_cluster_invalid = pcmk_cluster_layer_invalid,

	// NOTE: sbd (as of at least 1.5.2) uses this value
	pcmk_cluster_corosync = pcmk_cluster_layer_corosync,
	};
	//!@}

	// NOTE: sbd (as of at least 1.5.2) uses this
	//! \deprecated Use \c pcmk_cluster_layer_text() instead
	const char *name_for_cluster_type(enum cluster_type_e type);

	// NOTE: sbd (as of at least 1.5.2) uses this
	//! \deprecated Use \c pcmk_get_cluster_layer() instead
	enum cluster_type_e get_cluster_type(void);

	//! \deprecated Use \c pcmk_get_cluster_layer() instead
	gboolean is_corosync_cluster(void);

	//! \deprecated Do not use
	void crm_peer_init(void);

	//! \deprecated Do not use
	void crm_peer_destroy(void);

	//! \deprecated Do not use
	gboolean send_cluster_message(const crm_node_t *node,
	enum crm_ais_msg_types service,
	const xmlNode *data, gboolean ordered);

	//! \deprecated Do not use
	const char crm_peer_uuid(crm_node_t node);

	//! \deprecated Do not use
	enum crm_ais_msg_types text2msg_type(const char *text);

	//! \deprecated Do not use
	char *get_node_name(uint32_t nodeid);

	//! \deprecated Do not use
	const char *get_local_node_name(void);

	//! \deprecated Do not use
	void crm_set_autoreap(gboolean enable);

	//! \deprecated Do not use
	void crm_set_status_callback(void (*dispatch)(enum crm_status_type,
	crm_node_t , const void ));

	//! \deprecated Do not use
	const char crm_peer_uname(const char uuid);

	#ifdef __cplusplus
	}
	#endif

	#endif // PCMK_CLUSTER_COMPAT__H
	diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
	index fc24c77310..48760de3fb 100644
	--- a/include/crm/cluster/internal.h
	+++ b/include/crm/cluster/internal.h
	@@ -1,197 +1,197 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#ifndef PCMK__CRM_CLUSTER_INTERNAL__H
	# define PCMK__CRM_CLUSTER_INTERNAL__H

	# include <stdbool.h>
	# include <stdint.h> // uint32_t, uint64_t

	# include <glib.h> // gboolean

	# include <crm/cluster.h>

	enum crm_proc_flag {
	/* @COMPAT When crm_node_t:processes is made internal, we can merge this
	* into node flags or turn it into a boolean. Until then, in theory
	* something could depend on these particular numeric values.
	*/
	crm_proc_none = 0x00000001,

	// Cluster layers
	crm_proc_cpg = 0x04000000,
	};

	// Used with node cache search functions
	enum pcmk__node_search_flags {
	//! Does not affect search
	pcmk__node_search_none = 0,

	//! Search for cluster nodes from membership cache
	pcmk__node_search_cluster_member = (1 << 0),

	//! Search for remote nodes
	pcmk__node_search_remote = (1 << 1),

	//! Search for cluster member nodes and remote nodes
	pcmk__node_search_any = pcmk__node_search_cluster_member
	\|pcmk__node_search_remote,

	/* @COMPAT The values before this must stay the same until we can drop
	* support for enum crm_get_peer_flags
	*/

	//! Search for cluster nodes from CIB (as of last cache refresh)
	pcmk__node_search_cluster_cib = (1 << 2),
	};

	/*!
	* \internal
	* \brief Return the process bit corresponding to the current cluster stack
	*
	* \return Process flag if detectable, otherwise 0
	*/
	static inline uint32_t
	crm_get_cluster_proc(void)
	{
	switch (pcmk_get_cluster_layer()) {
	case pcmk_cluster_layer_corosync:
	return crm_proc_cpg;

	default:
	break;
	}
	return crm_proc_none;
	}

	/*!
	* \internal
	* \brief Get log-friendly string description of a Corosync return code
	*
	* \param[in] error Corosync return code
	*
	* \return Log-friendly string description corresponding to \p error
	*/
	static inline const char *
	pcmk__cs_err_str(int error)
	{
	# if SUPPORT_COROSYNC
	switch (error) {
	case CS_OK: return "OK";
	case CS_ERR_LIBRARY: return "Library error";
	case CS_ERR_VERSION: return "Version error";
	case CS_ERR_INIT: return "Initialization error";
	case CS_ERR_TIMEOUT: return "Timeout";
	case CS_ERR_TRY_AGAIN: return "Try again";
	case CS_ERR_INVALID_PARAM: return "Invalid parameter";
	case CS_ERR_NO_MEMORY: return "No memory";
	case CS_ERR_BAD_HANDLE: return "Bad handle";
	case CS_ERR_BUSY: return "Busy";
	case CS_ERR_ACCESS: return "Access error";
	case CS_ERR_NOT_EXIST: return "Doesn't exist";
	case CS_ERR_NAME_TOO_LONG: return "Name too long";
	case CS_ERR_EXIST: return "Exists";
	case CS_ERR_NO_SPACE: return "No space";
	case CS_ERR_INTERRUPT: return "Interrupt";
	case CS_ERR_NAME_NOT_FOUND: return "Name not found";
	case CS_ERR_NO_RESOURCES: return "No resources";
	case CS_ERR_NOT_SUPPORTED: return "Not supported";
	case CS_ERR_BAD_OPERATION: return "Bad operation";
	case CS_ERR_FAILED_OPERATION: return "Failed operation";
	case CS_ERR_MESSAGE_ERROR: return "Message error";
	case CS_ERR_QUEUE_FULL: return "Queue full";
	case CS_ERR_QUEUE_NOT_AVAILABLE: return "Queue not available";
	case CS_ERR_BAD_FLAGS: return "Bad flags";
	case CS_ERR_TOO_BIG: return "Too big";
	case CS_ERR_NO_SECTIONS: return "No sections";
	}
	# endif
	return "Corosync error";
	}

	# if SUPPORT_COROSYNC

	#if 0
	/* This is the new way to do it, but we still support all Corosync 2 versions,
	* and this isn't always available. A better alternative here would be to check
	* for support in the configure script and enable this conditionally.
	*/
	#define pcmk__init_cmap(handle) cmap_initialize_map((handle), CMAP_MAP_ICMAP)
	#else
	#define pcmk__init_cmap(handle) cmap_initialize(handle)
	#endif

	char *pcmk__corosync_cluster_name(void);
	bool pcmk__corosync_add_nodes(xmlNode *xml_parent);

	void pcmk__cpg_confchg_cb(cpg_handle_t handle,
	const struct cpg_name *group_name,
	const struct cpg_address *member_list,
	size_t member_list_entries,
	const struct cpg_address *left_list,
	size_t left_list_entries,
	const struct cpg_address *joined_list,
	size_t joined_list_entries);

	char *pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id,
	uint32_t pid, void content, uint32_t kind,
	const char **from);

	# endif

	-const char pcmk__cluster_node_uuid(crm_node_t node);
	+const char pcmk__cluster_get_xml_id(crm_node_t node);
	char *pcmk__cluster_node_name(uint32_t nodeid);
	const char *pcmk__cluster_local_node_name(void);
	const char pcmk__node_name_from_uuid(const char uuid);

	crm_node_t crm_update_peer_proc(const char source, crm_node_t * peer,
	uint32_t flag, const char *status);
	crm_node_t pcmk__update_peer_state(const char source, crm_node_t *node,
	const char *state, uint64_t membership);

	void pcmk__update_peer_expected(const char source, crm_node_t node,
	const char *expected);
	void pcmk__reap_unseen_nodes(uint64_t ring_id);

	void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
	gboolean),
	void (*destroy) (gpointer));

	enum crm_ais_msg_types pcmk__cluster_parse_msg_type(const char *text);
	bool pcmk__cluster_send_message(const crm_node_t *node,
	enum crm_ais_msg_types service,
	const xmlNode *data);

	// Membership

	void pcmk__cluster_init_node_caches(void);
	void pcmk__cluster_destroy_node_caches(void);

	void pcmk__cluster_set_autoreap(bool enable);
	void pcmk__cluster_set_status_callback(void (*dispatch)(enum crm_status_type,
	crm_node_t *,
	const void *));

	bool pcmk__cluster_is_node_active(const crm_node_t *node);
	unsigned int pcmk__cluster_num_active_nodes(void);
	unsigned int pcmk__cluster_num_remote_nodes(void);

	crm_node_t pcmk__cluster_lookup_remote_node(const char node_name);
	void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name);
	void pcmk__cluster_forget_remote_node(const char *node_name);
	crm_node_t pcmk__search_node_caches(unsigned int id, const char uname,
	- uint32_t flags);
	+ const char *xml_id, uint32_t flags);
	void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id);

	void pcmk__refresh_node_caches_from_cib(xmlNode *cib);

	crm_node_t pcmk__get_node(unsigned int id, const char uname,
	const char *uuid, uint32_t flags);

	#endif // PCMK__CRM_CLUSTER_INTERNAL__H
	diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
	index 0447031dc4..7a6577f074 100644
	--- a/lib/cluster/cluster.c
	+++ b/lib/cluster/cluster.c
	@@ -1,547 +1,551 @@
	/*
	- * Copyright 2004-2024 the Pacemaker project contributors
	+ * Copyright 2004-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>
	#include <dlfcn.h>

	#include <inttypes.h> // PRIu32
	#include <stdbool.h>
	#include <stdio.h>
	#include <unistd.h>
	#include <string.h>
	#include <stdlib.h>
	#include <time.h>
	#include <sys/param.h>
	#include <sys/types.h>
	#include <sys/utsname.h> // uname()

	#include <glib.h> // gboolean

	#include <crm/crm.h>

	#include <crm/common/ipc.h>
	#include <crm/common/xml.h>
	#include <crm/cluster/internal.h>
	#include "crmcluster_private.h"

	CRM_TRACE_INIT_DATA(cluster);

	/*!
	* \internal
	* \brief Get the message type equivalent of a string
	*
	* \param[in] text String of message type
	*
	* \return Message type equivalent of \p text
	*/
	enum crm_ais_msg_types
	pcmk__cluster_parse_msg_type(const char *text)
	{
	CRM_CHECK(text != NULL, return crm_msg_none);

	text = pcmk__message_name(text);

	if (pcmk__str_eq(text, "ais", pcmk__str_none)) {
	return crm_msg_ais;
	}
	if (pcmk__str_eq(text, CRM_SYSTEM_CIB, pcmk__str_none)) {
	return crm_msg_cib;
	}
	if (pcmk__str_any_of(text, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL)) {
	return crm_msg_crmd;
	}
	if (pcmk__str_eq(text, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
	return crm_msg_te;
	}
	if (pcmk__str_eq(text, CRM_SYSTEM_PENGINE, pcmk__str_none)) {
	return crm_msg_pe;
	}
	if (pcmk__str_eq(text, CRM_SYSTEM_LRMD, pcmk__str_none)) {
	return crm_msg_lrmd;
	}
	if (pcmk__str_eq(text, CRM_SYSTEM_STONITHD, pcmk__str_none)) {
	return crm_msg_stonithd;
	}
	if (pcmk__str_eq(text, "stonith-ng", pcmk__str_none)) {
	return crm_msg_stonith_ng;
	}
	if (pcmk__str_eq(text, "attrd", pcmk__str_none)) {
	return crm_msg_attrd;
	}
	return crm_msg_none;
	}

	/*!
	* \internal
	- * \brief Get a node's cluster-layer UUID, setting it if not already set
	+ * \brief Get a node's XML ID in the CIB, setting it if not already set
	*
	* \param[in,out] node Node to check
	*
	- * \return Cluster-layer node UUID of \p node, or \c NULL if unknown
	+ * \return CIB XML ID of \p node if known, otherwise \c NULL
	*/
	const char *
	-pcmk__cluster_node_uuid(crm_node_t *node)
	+pcmk__cluster_get_xml_id(crm_node_t *node)
	{
	const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();

	if (node == NULL) {
	return NULL;
	}
	if (node->uuid != NULL) {
	return node->uuid;
	}

	+ // xml_id is always set when a Pacemaker Remote node entry is created
	+ CRM_CHECK(!pcmk_is_set(node->flags, crm_remote_node), return NULL);
	+
	switch (cluster_layer) {
	#if SUPPORT_COROSYNC
	case pcmk_cluster_layer_corosync:
	node->uuid = pcmk__corosync_uuid(node);
	return node->uuid;
	#endif // SUPPORT_COROSYNC

	default:
	crm_err("Unsupported cluster layer %s",
	pcmk_cluster_layer_text(cluster_layer));
	return NULL;
	}
	}

	/*!
	* \internal
	* \brief Connect to the cluster layer
	*
	* \param[in,out] cluster Initialized cluster object to connect
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk_cluster_connect(pcmk_cluster_t *cluster)
	{
	const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
	const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);

	// cts-lab looks for this message
	crm_notice("Connecting to %s cluster layer", cluster_layer_s);

	switch (cluster_layer) {
	#if SUPPORT_COROSYNC
	case pcmk_cluster_layer_corosync:
	return pcmk__corosync_connect(cluster);
	#endif // SUPPORT_COROSYNC

	default:
	break;
	}

	crm_err("Failed to connect to unsupported cluster layer %s",
	cluster_layer_s);
	return EPROTONOSUPPORT;
	}

	/*!
	* \brief Disconnect from the cluster layer
	*
	* \param[in,out] cluster Cluster object to disconnect
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk_cluster_disconnect(pcmk_cluster_t *cluster)
	{
	const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
	const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);

	crm_info("Disconnecting from %s cluster layer", cluster_layer_s);

	switch (cluster_layer) {
	#if SUPPORT_COROSYNC
	case pcmk_cluster_layer_corosync:
	pcmk__corosync_disconnect(cluster);
	pcmk__cluster_destroy_node_caches();
	return pcmk_rc_ok;
	#endif // SUPPORT_COROSYNC

	default:
	break;
	}

	crm_err("Failed to disconnect from unsupported cluster layer %s",
	cluster_layer_s);
	return EPROTONOSUPPORT;
	}

	/*!
	* \brief Allocate a new \p pcmk_cluster_t object
	*
	* \return A newly allocated \p pcmk_cluster_t object (guaranteed not \c NULL)
	* \note The caller is responsible for freeing the return value using
	* \p pcmk_cluster_free().
	*/
	pcmk_cluster_t *
	pcmk_cluster_new(void)
	{
	return (pcmk_cluster_t *) pcmk__assert_alloc(1, sizeof(pcmk_cluster_t));
	}

	/*!
	* \brief Free a \p pcmk_cluster_t object and its dynamically allocated members
	*
	* \param[in,out] cluster Cluster object to free
	*/
	void
	pcmk_cluster_free(pcmk_cluster_t *cluster)
	{
	if (cluster == NULL) {
	return;
	}
	free(cluster->uuid);
	free(cluster->uname);
	free(cluster);
	}

	/*!
	* \brief Set the destroy function for a cluster object
	*
	* \param[in,out] cluster Cluster object
	* \param[in] fn Destroy function to set
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk_cluster_set_destroy_fn(pcmk_cluster_t cluster, void (fn)(gpointer))
	{
	if (cluster == NULL) {
	return EINVAL;
	}
	cluster->destroy = fn;
	return pcmk_rc_ok;
	}

	/*!
	* \internal
	* \brief Send an XML message via the cluster messaging layer
	*
	* \param[in] node Cluster node to send message to
	* \param[in] service Message type to use in message host info
	* \param[in] data XML message to send
	*
	* \return \c true on success, or \c false otherwise
	*/
	bool
	pcmk__cluster_send_message(const crm_node_t *node,
	enum crm_ais_msg_types service, const xmlNode *data)
	{
	// @TODO Return standard Pacemaker return code
	switch (pcmk_get_cluster_layer()) {
	#if SUPPORT_COROSYNC
	case pcmk_cluster_layer_corosync:
	return pcmk__cpg_send_xml(data, node, service);
	#endif // SUPPORT_COROSYNC

	default:
	break;
	}
	return false;
	}

	/*!
	* \internal
	* \brief Get the node name corresponding to a cluster-layer node ID
	*
	* Get the node name from the cluster layer if possible. Otherwise, if for the
	* local node, call \c uname() and get the \c nodename member from the
	* <tt>struct utsname</tt> object.
	*
	* \param[in] nodeid Node ID to check (or 0 for the local node)
	*
	* \return Node name corresponding to \p nodeid
	*
	* \note This will fatally exit if \c uname() fails to get the local node name
	* or we run out of memory.
	* \note The caller is responsible for freeing the return value using \c free().
	*/
	char *
	pcmk__cluster_node_name(uint32_t nodeid)
	{
	char *name = NULL;
	const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
	const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);

	switch (cluster_layer) {
	#if SUPPORT_COROSYNC
	case pcmk_cluster_layer_corosync:
	name = pcmk__corosync_name(0, nodeid);
	if (name != NULL) {
	return name;
	}
	break;
	#endif // SUPPORT_COROSYNC

	default:
	crm_err("Unsupported cluster layer: %s", cluster_layer_s);
	break;
	}

	if (nodeid == 0) {
	struct utsname hostinfo;

	crm_notice("Could not get local node name from %s cluster layer, "
	"defaulting to local hostname",
	cluster_layer_s);

	if (uname(&hostinfo) < 0) {
	// @TODO Maybe let the caller decide what to do
	crm_err("Failed to get the local hostname");
	crm_exit(CRM_EX_FATAL);
	}
	return pcmk__str_copy(hostinfo.nodename);
	}

	crm_notice("Could not obtain a node name for node with "
	PCMK_XA_ID "=" PRIu32,
	nodeid);
	return NULL;
	}

	/*!
	* \internal
	* \brief Get the local node's cluster-layer node name
	*
	* If getting the node name from the cluster layer is impossible, call
	* \c uname() and get the \c nodename member from the <tt>struct utsname</tt>
	* object.
	*
	* \return Local node's name
	*
	* \note This will fatally exit if \c uname() fails to get the local node name
	* or we run out of memory.
	*/
	const char *
	pcmk__cluster_local_node_name(void)
	{
	// @TODO Refactor to avoid trivially leaking name at exit
	static char *name = NULL;

	if (name == NULL) {
	name = pcmk__cluster_node_name(0);
	}
	return name;
	}

	/*!
	* \internal
	* \brief Get the node name corresonding to a node UUID
	*
	* Look for the UUID in both the remote node cache and the cluster member cache.
	*
	* \param[in] uuid UUID to search for
	*
	* \return Node name corresponding to \p uuid if found, or \c NULL otherwise
	*/
	const char *
	pcmk__node_name_from_uuid(const char *uuid)
	{
	/* @TODO There are too many functions in libcrmcluster that look up a node
	* from the node caches (possibly creating a cache entry if none exists).
	* There are at least the following:
	* * pcmk__cluster_lookup_remote_node()
	* * pcmk__get_node()
	* * pcmk__node_name_from_uuid()
	* * pcmk__search_node_caches()
	*
	* There's a lot of duplication among them, but they all do slightly
	* different things. We should try to clean them up and consolidate them to
	* the extent possible, likely with new helper functions.
	*/
	GHashTableIter iter;
	crm_node_t *node = NULL;

	CRM_CHECK(uuid != NULL, return NULL);

	// Remote nodes have the same uname and uuid
	if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
	return uuid;
	}

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	- if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
	+ if (pcmk__str_eq(uuid, pcmk__cluster_get_xml_id(node),
	+ pcmk__str_none)) {
	return node->uname;
	}
	}
	return NULL;
	}

	/*!
	* \brief Get a log-friendly string equivalent of a cluster layer
	*
	* \param[in] layer Cluster layer
	*
	* \return Log-friendly string corresponding to \p layer
	*/
	const char *
	pcmk_cluster_layer_text(enum pcmk_cluster_layer layer)
	{
	switch (layer) {
	case pcmk_cluster_layer_corosync:
	return "corosync";
	case pcmk_cluster_layer_unknown:
	return "unknown";
	case pcmk_cluster_layer_invalid:
	return "invalid";
	default:
	crm_err("Invalid cluster layer: %d", layer);
	return "invalid";
	}
	}

	/*!
	* \brief Get and validate the local cluster layer
	*
	* If a cluster layer is not configured via the \c PCMK__ENV_CLUSTER_TYPE local
	* option, this will try to detect an active cluster from among the supported
	* cluster layers.
	*
	* \return Local cluster layer
	*
	* \note This will fatally exit if the configured cluster layer is invalid.
	*/
	enum pcmk_cluster_layer
	pcmk_get_cluster_layer(void)
	{
	static enum pcmk_cluster_layer cluster_layer = pcmk_cluster_layer_unknown;
	const char *cluster = NULL;

	// Cluster layer is stable once set
	if (cluster_layer != pcmk_cluster_layer_unknown) {
	return cluster_layer;
	}

	cluster = pcmk__env_option(PCMK__ENV_CLUSTER_TYPE);

	if (cluster != NULL) {
	crm_info("Verifying configured cluster layer '%s'", cluster);
	cluster_layer = pcmk_cluster_layer_invalid;

	#if SUPPORT_COROSYNC
	if (pcmk__str_eq(cluster, PCMK_VALUE_COROSYNC, pcmk__str_casei)) {
	cluster_layer = pcmk_cluster_layer_corosync;
	}
	#endif // SUPPORT_COROSYNC

	if (cluster_layer == pcmk_cluster_layer_invalid) {
	crm_notice("This installation does not support the '%s' cluster "
	"infrastructure: terminating",
	cluster);
	crm_exit(CRM_EX_FATAL);
	}
	crm_info("Assuming an active '%s' cluster", cluster);

	} else {
	// Nothing configured, so test supported cluster layers
	#if SUPPORT_COROSYNC
	crm_debug("Testing with Corosync");
	if (pcmk__corosync_is_active()) {
	cluster_layer = pcmk_cluster_layer_corosync;
	}
	#endif // SUPPORT_COROSYNC

	if (cluster_layer == pcmk_cluster_layer_unknown) {
	crm_notice("Could not determine the current cluster layer");
	} else {
	crm_info("Detected an active '%s' cluster",
	pcmk_cluster_layer_text(cluster_layer));
	}
	}

	return cluster_layer;
	}

	// Deprecated functions kept only for backward API compatibility
	// LCOV_EXCL_START

	#include <crm/cluster/compat.h>

	void
	set_uuid(xmlNode xml, const char attr, crm_node_t *node)
	{
	- crm_xml_add(xml, attr, pcmk__cluster_node_uuid(node));
	+ crm_xml_add(xml, attr, pcmk__cluster_get_xml_id(node));
	}

	gboolean
	crm_cluster_connect(pcmk_cluster_t *cluster)
	{
	return pcmk_cluster_connect(cluster) == pcmk_rc_ok;
	}

	void
	crm_cluster_disconnect(pcmk_cluster_t *cluster)
	{
	pcmk_cluster_disconnect(cluster);
	}

	const char *
	name_for_cluster_type(enum cluster_type_e type)
	{
	switch (type) {
	case pcmk_cluster_corosync:
	return "corosync";
	case pcmk_cluster_unknown:
	return "unknown";
	case pcmk_cluster_invalid:
	return "invalid";
	}
	crm_err("Invalid cluster type: %d", type);
	return "invalid";
	}

	enum cluster_type_e
	get_cluster_type(void)
	{
	return (enum cluster_type_e) pcmk_get_cluster_layer();
	}

	gboolean
	is_corosync_cluster(void)
	{
	return pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync;
	}

	gboolean
	send_cluster_message(const crm_node_t *node, enum crm_ais_msg_types service,
	const xmlNode *data, gboolean ordered)
	{
	return pcmk__cluster_send_message(node, service, data);
	}

	const char *
	crm_peer_uuid(crm_node_t *peer)
	{
	- return pcmk__cluster_node_uuid(peer);
	+ return pcmk__cluster_get_xml_id(peer);
	}

	char *
	get_node_name(uint32_t nodeid)
	{
	return pcmk__cluster_node_name(nodeid);
	}

	const char *
	get_local_node_name(void)
	{
	return pcmk__cluster_local_node_name();
	}

	const char *
	crm_peer_uname(const char *uuid)
	{
	return pcmk__node_name_from_uuid(uuid);
	}

	// LCOV_EXCL_STOP
	// End deprecated API
	diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c
	index 812311b75d..e9efcb795d 100644
	--- a/lib/cluster/cpg.c
	+++ b/lib/cluster/cpg.c
	@@ -1,1204 +1,1204 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <arpa/inet.h>
	#include <inttypes.h> // PRIu32
	#include <netdb.h>
	#include <netinet/in.h>
	#include <stdbool.h>
	#include <stdint.h> // uint32_t
	#include <sys/socket.h>
	#include <sys/types.h> // size_t
	#include <sys/utsname.h>

	#include <bzlib.h>
	#include <corosync/corodefs.h>
	#include <corosync/corotypes.h>
	#include <corosync/hdb.h>
	#include <corosync/cpg.h>
	#include <qb/qbipc_common.h>
	#include <qb/qbipcc.h>
	#include <qb/qbutil.h>

	#include <crm/cluster/internal.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipc_internal.h> // PCMK__SPECIAL_PID
	#include <crm/common/mainloop.h>
	#include <crm/common/xml.h>

	#include "crmcluster_private.h"

	/* @TODO Once we can update the public API to require pcmk_cluster_t* in more
	* functions, we can ditch this in favor of cluster->cpg_handle.
	*/
	static cpg_handle_t pcmk_cpg_handle = 0;

	// @TODO These could be moved to pcmk_cluster_t* at that time as well
	static bool cpg_evicted = false;
	static GList *cs_message_queue = NULL;
	static int cs_message_timer = 0;

	struct pcmk__cpg_host_s {
	uint32_t id;
	uint32_t pid;
	gboolean local;
	enum crm_ais_msg_types type;
	uint32_t size;
	char uname[MAX_NAME];
	} __attribute__ ((packed));

	typedef struct pcmk__cpg_host_s pcmk__cpg_host_t;

	struct pcmk__cpg_msg_s {
	struct qb_ipc_response_header header __attribute__ ((aligned(8)));
	uint32_t id;
	gboolean is_compressed;

	pcmk__cpg_host_t host;
	pcmk__cpg_host_t sender;

	uint32_t size;
	uint32_t compressed_size;
	/* 584 bytes */
	char data[0];

	} __attribute__ ((packed));

	typedef struct pcmk__cpg_msg_s pcmk__cpg_msg_t;

	static void crm_cs_flush(gpointer data);

	#define msg_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size)

	#define cs_repeat(rc, counter, max, code) do { \
	rc = code; \
	if ((rc == CS_ERR_TRY_AGAIN) \|\| (rc == CS_ERR_QUEUE_FULL)) { \
	counter++; \
	crm_debug("Retrying operation after %ds", counter); \
	sleep(counter); \
	} else { \
	break; \
	} \
	} while (counter < max)

	/*!
	* \internal
	* \brief Get the local Corosync node ID (via CPG)
	*
	* \param[in] handle CPG connection to use (or 0 to use new connection)
	*
	* \return Corosync ID of local node (or 0 if not known)
	*/
	uint32_t
	pcmk__cpg_local_nodeid(cpg_handle_t handle)
	{
	cs_error_t rc = CS_OK;
	int retries = 0;
	static uint32_t local_nodeid = 0;
	cpg_handle_t local_handle = handle;
	cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0};
	int fd = -1;
	uid_t found_uid = 0;
	gid_t found_gid = 0;
	pid_t found_pid = 0;
	int rv = 0;

	if (local_nodeid != 0) {
	return local_nodeid;
	}

	if (handle == 0) {
	crm_trace("Creating connection");
	cs_repeat(rc, retries, 5,
	cpg_model_initialize(&local_handle, CPG_MODEL_V1,
	(cpg_model_data_t *) &cpg_model_info,
	NULL));
	if (rc != CS_OK) {
	crm_err("Could not connect to the CPG API: %s (%d)",
	cs_strerror(rc), rc);
	return 0;
	}

	rc = cpg_fd_get(local_handle, &fd);
	if (rc != CS_OK) {
	crm_err("Could not obtain the CPG API connection: %s (%d)",
	cs_strerror(rc), rc);
	goto bail;
	}

	// CPG provider run as root (at least in given user namespace)?
	rv = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0, &found_pid,
	&found_uid, &found_gid);
	if (rv == 0) {
	crm_err("CPG provider is not authentic:"
	" process %lld (uid: %lld, gid: %lld)",
	(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
	(long long) found_uid, (long long) found_gid);
	goto bail;

	} else if (rv < 0) {
	crm_err("Could not verify authenticity of CPG provider: %s (%d)",
	strerror(-rv), -rv);
	goto bail;
	}
	}

	if (rc == CS_OK) {
	retries = 0;
	crm_trace("Performing lookup");
	cs_repeat(rc, retries, 5, cpg_local_get(local_handle, &local_nodeid));
	}

	if (rc != CS_OK) {
	crm_err("Could not get local node id from the CPG API: %s (%d)",
	pcmk__cs_err_str(rc), rc);
	}

	bail:
	if (handle == 0) {
	crm_trace("Closing connection");
	cpg_finalize(local_handle);
	}
	crm_debug("Local nodeid is %u", local_nodeid);
	return local_nodeid;
	}

	/*!
	* \internal
	* \brief Callback function for Corosync message queue timer
	*
	* \param[in] data CPG handle
	*
	* \return FALSE (to indicate to glib that timer should not be removed)
	*/
	static gboolean
	crm_cs_flush_cb(gpointer data)
	{
	cs_message_timer = 0;
	crm_cs_flush(data);
	return FALSE;
	}

	// Send no more than this many CPG messages in one flush
	#define CS_SEND_MAX 200

	/*!
	* \internal
	* \brief Send messages in Corosync CPG message queue
	*
	* \param[in] data CPG handle
	*/
	static void
	crm_cs_flush(gpointer data)
	{
	unsigned int sent = 0;
	guint queue_len = 0;
	cs_error_t rc = 0;
	cpg_handle_t handle = (cpg_handle_t ) data;

	if (*handle == 0) {
	crm_trace("Connection is dead");
	return;
	}

	queue_len = g_list_length(cs_message_queue);
	if (((queue_len % 1000) == 0) && (queue_len > 1)) {
	crm_err("CPG queue has grown to %d", queue_len);

	} else if (queue_len == CS_SEND_MAX) {
	crm_warn("CPG queue has grown to %d", queue_len);
	}

	if (cs_message_timer != 0) {
	/* There is already a timer, wait until it goes off */
	crm_trace("Timer active %d", cs_message_timer);
	return;
	}

	while ((cs_message_queue != NULL) && (sent < CS_SEND_MAX)) {
	struct iovec *iov = cs_message_queue->data;

	rc = cpg_mcast_joined(*handle, CPG_TYPE_AGREED, iov, 1);
	if (rc != CS_OK) {
	break;
	}

	sent++;
	crm_trace("CPG message sent, size=%llu",
	(unsigned long long) iov->iov_len);

	cs_message_queue = g_list_remove(cs_message_queue, iov);
	free(iov->iov_base);
	free(iov);
	}

	queue_len -= sent;
	do_crm_log((queue_len > 5)? LOG_INFO : LOG_TRACE,
	"Sent %u CPG message%s (%d still queued): %s (rc=%d)",
	sent, pcmk__plural_s(sent), queue_len, pcmk__cs_err_str(rc),
	(int) rc);

	if (cs_message_queue) {
	uint32_t delay_ms = 100;
	if (rc != CS_OK) {
	/* Proportionally more if sending failed but cap at 1s */
	delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len));
	}
	cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, data);
	}
	}

	/*!
	* \internal
	* \brief Dispatch function for CPG handle
	*
	* \param[in,out] user_data Cluster object
	*
	* \return 0 on success, -1 on error (per mainloop_io_t interface)
	*/
	static int
	pcmk_cpg_dispatch(gpointer user_data)
	{
	cs_error_t rc = CS_OK;
	pcmk_cluster_t cluster = (pcmk_cluster_t ) user_data;

	rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ONE);
	if (rc != CS_OK) {
	crm_err("Connection to the CPG API failed: %s (%d)",
	pcmk__cs_err_str(rc), rc);
	cpg_finalize(cluster->cpg_handle);
	cluster->cpg_handle = 0;
	return -1;

	} else if (cpg_evicted) {
	crm_err("Evicted from CPG membership");
	return -1;
	}
	return 0;
	}

	static inline const char *
	ais_dest(const pcmk__cpg_host_t *host)
	{
	if (host->local) {
	return "local";
	} else if (host->size > 0) {
	return host->uname;
	} else {
	return "<all>";
	}
	}

	static inline const char *
	msg_type2text(enum crm_ais_msg_types type)
	{
	const char *text = "unknown";

	switch (type) {
	case crm_msg_none:
	text = "unknown";
	break;
	case crm_msg_ais:
	text = "ais";
	break;
	case crm_msg_cib:
	text = "cib";
	break;
	case crm_msg_crmd:
	text = "crmd";
	break;
	case crm_msg_pe:
	text = "pengine";
	break;
	case crm_msg_te:
	text = "tengine";
	break;
	case crm_msg_lrmd:
	text = "lrmd";
	break;
	case crm_msg_attrd:
	text = "attrd";
	break;
	case crm_msg_stonithd:
	text = "stonithd";
	break;
	case crm_msg_stonith_ng:
	text = "stonith-ng";
	break;
	}
	return text;
	}

	/*!
	* \internal
	* \brief Check whether a Corosync CPG message is valid
	*
	* \param[in] msg Corosync CPG message to check
	*
	* \return true if \p msg is valid, otherwise false
	*/
	static bool
	check_message_sanity(const pcmk__cpg_msg_t *msg)
	{
	int32_t payload_size = msg->header.size - sizeof(pcmk__cpg_msg_t);

	if (payload_size < 1) {
	crm_err("%sCPG message %d from %s invalid: "
	"Claimed size of %d bytes is too small "
	CRM_XS " from %s[%u] to %s@%s",
	(msg->is_compressed? "Compressed " : ""),
	msg->id, ais_dest(&(msg->sender)),
	(int) msg->header.size,
	msg_type2text(msg->sender.type), msg->sender.pid,
	msg_type2text(msg->host.type), ais_dest(&(msg->host)));
	return false;
	}

	if (msg->header.error != CS_OK) {
	crm_err("%sCPG message %d from %s invalid: "
	"Sender indicated error %d "
	CRM_XS " from %s[%u] to %s@%s",
	(msg->is_compressed? "Compressed " : ""),
	msg->id, ais_dest(&(msg->sender)),
	msg->header.error,
	msg_type2text(msg->sender.type), msg->sender.pid,
	msg_type2text(msg->host.type), ais_dest(&(msg->host)));
	return false;
	}

	if (msg_data_len(msg) != payload_size) {
	crm_err("%sCPG message %d from %s invalid: "
	"Total size %d inconsistent with payload size %d "
	CRM_XS " from %s[%u] to %s@%s",
	(msg->is_compressed? "Compressed " : ""),
	msg->id, ais_dest(&(msg->sender)),
	(int) msg->header.size, (int) msg_data_len(msg),
	msg_type2text(msg->sender.type), msg->sender.pid,
	msg_type2text(msg->host.type), ais_dest(&(msg->host)));
	return false;
	}

	if (!msg->is_compressed &&
	/* msg->size != (strlen(msg->data) + 1) would be a stronger check,
	* but checking the last byte or two should be quick
	*/
	(((msg->size > 1) && (msg->data[msg->size - 2] == '\0'))
	\|\| (msg->data[msg->size - 1] != '\0'))) {
	crm_err("CPG message %d from %s invalid: "
	"Payload does not end at byte %llu "
	CRM_XS " from %s[%u] to %s@%s",
	msg->id, ais_dest(&(msg->sender)),
	(unsigned long long) msg->size,
	msg_type2text(msg->sender.type), msg->sender.pid,
	msg_type2text(msg->host.type), ais_dest(&(msg->host)));
	return false;
	}

	crm_trace("Verified %d-byte %sCPG message %d from %s[%u]@%s to %s@%s",
	(int) msg->header.size, (msg->is_compressed? "compressed " : ""),
	msg->id, msg_type2text(msg->sender.type), msg->sender.pid,
	ais_dest(&(msg->sender)),
	msg_type2text(msg->host.type), ais_dest(&(msg->host)));
	return true;
	}

	/*!
	* \internal
	* \brief Extract text data from a Corosync CPG message
	*
	* \param[in] handle CPG connection (to get local node ID if not known)
	* \param[in] sender_id Corosync ID of node that sent message
	* \param[in] pid Process ID of message sender (for logging only)
	* \param[in,out] content CPG message
	* \param[out] kind If not \c NULL, will be set to CPG header ID
	* (which should be an <tt>enum crm_ais_msg_class</tt>
	* value, currently always \c crm_class_cluster)
	* \param[out] from If not \c NULL, will be set to sender uname
	* (valid for the lifetime of \p content)
	*
	* \return Newly allocated string with message data
	*
	* \note The caller is responsible for freeing the return value using \c free().
	*/
	char *
	pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id, uint32_t pid,
	void content, uint32_t kind, const char **from)
	{
	char *data = NULL;
	pcmk__cpg_msg_t *msg = content;

	if (handle != 0) {
	// Do filtering and field massaging
	uint32_t local_nodeid = pcmk__cpg_local_nodeid(handle);
	const char *local_name = pcmk__cluster_local_node_name();

	if ((msg->sender.id != 0) && (msg->sender.id != sender_id)) {
	crm_err("Nodeid mismatch from %" PRIu32 ".%" PRIu32
	": claimed nodeid=%" PRIu32,
	sender_id, pid, msg->sender.id);
	return NULL;
	}
	if ((msg->host.id != 0) && (local_nodeid != msg->host.id)) {
	crm_trace("Not for us: %" PRIu32" != %" PRIu32,
	msg->host.id, local_nodeid);
	return NULL;
	}
	if ((msg->host.size > 0)
	&& !pcmk__str_eq(msg->host.uname, local_name, pcmk__str_casei)) {

	crm_trace("Not for us: %s != %s", msg->host.uname, local_name);
	return NULL;
	}

	msg->sender.id = sender_id;
	if (msg->sender.size == 0) {
	const crm_node_t *peer =
	pcmk__get_node(sender_id, NULL, NULL,
	pcmk__node_search_cluster_member);

	if (peer->uname == NULL) {
	crm_err("No uname for peer with nodeid=%u", sender_id);

	} else {
	crm_notice("Fixing uname for peer with nodeid=%u", sender_id);
	msg->sender.size = strlen(peer->uname);
	memset(msg->sender.uname, 0, MAX_NAME);
	memcpy(msg->sender.uname, peer->uname, msg->sender.size);
	}
	}
	}

	crm_trace("Got new%s message (size=%d, %d, %d)",
	msg->is_compressed ? " compressed" : "",
	msg_data_len(msg), msg->size, msg->compressed_size);

	if (kind != NULL) {
	*kind = msg->header.id;
	}
	if (from != NULL) {
	*from = msg->sender.uname;
	}

	if (msg->is_compressed && (msg->size > 0)) {
	int rc = BZ_OK;
	char *uncompressed = NULL;
	unsigned int new_size = msg->size + 1;

	if (!check_message_sanity(msg)) {
	goto badmsg;
	}

	crm_trace("Decompressing message data");
	uncompressed = pcmk__assert_alloc(1, new_size);
	rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data,
	msg->compressed_size, 1, 0);

	rc = pcmk__bzlib2rc(rc);

	if (rc != pcmk_rc_ok) {
	crm_err("Decompression failed: %s " CRM_XS " rc=%d",
	pcmk_rc_str(rc), rc);
	free(uncompressed);
	goto badmsg;
	}

	pcmk__assert(new_size == msg->size);

	data = uncompressed;

	} else if (!check_message_sanity(msg)) {
	goto badmsg;

	} else {
	data = strdup(msg->data);
	}

	// Is this necessary?
	pcmk__get_node(msg->sender.id, msg->sender.uname, NULL,
	pcmk__node_search_cluster_member);

	crm_trace("Payload: %.200s", data);
	return data;

	badmsg:
	crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):"
	" min=%d, total=%d, size=%d, bz2_size=%d",
	msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type),
	ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
	msg->sender.pid, (int)sizeof(pcmk__cpg_msg_t),
	msg->header.size, msg->size, msg->compressed_size);

	free(data);
	return NULL;
	}

	/*!
	* \internal
	* \brief Compare cpg_address objects by node ID
	*
	* \param[in] first First cpg_address structure to compare
	* \param[in] second Second cpg_address structure to compare
	*
	* \return Negative number if first's node ID is lower,
	* positive number if first's node ID is greater,
	* or 0 if both node IDs are equal
	*/
	static int
	cmp_member_list_nodeid(const void first, const void second)
	{
	const struct cpg_address const a = ((const struct cpg_address **) first),
	const b = ((const struct cpg_address **) second);
	if (a->nodeid < b->nodeid) {
	return -1;
	} else if (a->nodeid > b->nodeid) {
	return 1;
	}
	/* don't bother with "reason" nor "pid" */
	return 0;
	}

	/*!
	* \internal
	* \brief Get a readable string equivalent of a cpg_reason_t value
	*
	* \param[in] reason CPG reason value
	*
	* \return Readable string suitable for logging
	*/
	static const char *
	cpgreason2str(cpg_reason_t reason)
	{
	switch (reason) {
	case CPG_REASON_JOIN: return " via cpg_join";
	case CPG_REASON_LEAVE: return " via cpg_leave";
	case CPG_REASON_NODEDOWN: return " via cluster exit";
	case CPG_REASON_NODEUP: return " via cluster join";
	case CPG_REASON_PROCDOWN: return " for unknown reason";
	default: break;
	}
	return "";
	}

	/*!
	* \internal
	* \brief Get a log-friendly node name
	*
	* \param[in] peer Node to check
	*
	* \return Node's uname, or readable string if not known
	*/
	static inline const char *
	peer_name(const crm_node_t *peer)
	{
	if (peer == NULL) {
	return "unknown node";
	} else if (peer->uname == NULL) {
	return "peer node";
	} else {
	return peer->uname;
	}
	}

	/*!
	* \internal
	* \brief Process a CPG peer's leaving the cluster
	*
	* \param[in] cpg_group_name CPG group name (for logging)
	* \param[in] event_counter Event number (for logging)
	* \param[in] local_nodeid Node ID of local node
	* \param[in] cpg_peer CPG peer that left
	* \param[in] sorted_member_list List of remaining members, qsort()-ed by ID
	* \param[in] member_list_entries Number of entries in \p sorted_member_list
	*/
	static void
	node_left(const char *cpg_group_name, int event_counter,
	uint32_t local_nodeid, const struct cpg_address *cpg_peer,
	const struct cpg_address **sorted_member_list,
	size_t member_list_entries)
	{
	crm_node_t *peer =
	- pcmk__search_node_caches(cpg_peer->nodeid, NULL,
	+ pcmk__search_node_caches(cpg_peer->nodeid, NULL, NULL,
	pcmk__node_search_cluster_member);
	const struct cpg_address **rival = NULL;

	/* Most CPG-related Pacemaker code assumes that only one process on a node
	* can be in the process group, but Corosync does not impose this
	* limitation, and more than one can be a member in practice due to a
	* daemon attempting to start while another instance is already running.
	*
	* Check for any such duplicate instances, because we don't want to process
	* their leaving as if our actual peer left. If the peer that left still has
	* an entry in sorted_member_list (with a different PID), we will ignore the
	* leaving.
	*
	* @TODO Track CPG members' PIDs so we can tell exactly who left.
	*/
	if (peer != NULL) {
	rival = bsearch(&cpg_peer, sorted_member_list, member_list_entries,
	sizeof(const struct cpg_address *),
	cmp_member_list_nodeid);
	}

	if (rival == NULL) {
	crm_info("Group %s event %d: %s (node %u pid %u) left%s",
	cpg_group_name, event_counter, peer_name(peer),
	cpg_peer->nodeid, cpg_peer->pid,
	cpgreason2str(cpg_peer->reason));
	if (peer != NULL) {
	crm_update_peer_proc(__func__, peer, crm_proc_cpg,
	PCMK_VALUE_OFFLINE);
	}
	} else if (cpg_peer->nodeid == local_nodeid) {
	crm_warn("Group %s event %d: duplicate local pid %u left%s",
	cpg_group_name, event_counter,
	cpg_peer->pid, cpgreason2str(cpg_peer->reason));
	} else {
	crm_warn("Group %s event %d: "
	"%s (node %u) duplicate pid %u left%s (%u remains)",
	cpg_group_name, event_counter, peer_name(peer),
	cpg_peer->nodeid, cpg_peer->pid,
	cpgreason2str(cpg_peer->reason), (*rival)->pid);
	}
	}

	/*!
	* \internal
	* \brief Handle a CPG configuration change event
	*
	* \param[in] handle CPG connection
	* \param[in] group_name CPG group name
	* \param[in] member_list List of current CPG members
	* \param[in] member_list_entries Number of entries in \p member_list
	* \param[in] left_list List of CPG members that left
	* \param[in] left_list_entries Number of entries in \p left_list
	* \param[in] joined_list List of CPG members that joined
	* \param[in] joined_list_entries Number of entries in \p joined_list
	*
	* \note This is of type \c cpg_confchg_fn_t, intended to be used in a
	* \c cpg_callbacks_t object.
	*/
	void
	pcmk__cpg_confchg_cb(cpg_handle_t handle,
	const struct cpg_name *group_name,
	const struct cpg_address *member_list,
	size_t member_list_entries,
	const struct cpg_address *left_list,
	size_t left_list_entries,
	const struct cpg_address *joined_list,
	size_t joined_list_entries)
	{
	static int counter = 0;

	bool found = false;
	uint32_t local_nodeid = pcmk__cpg_local_nodeid(handle);
	const struct cpg_address **sorted = NULL;

	sorted = pcmk__assert_alloc(member_list_entries,
	sizeof(const struct cpg_address *));

	for (size_t iter = 0; iter < member_list_entries; iter++) {
	sorted[iter] = member_list + iter;
	}

	// So that the cross-matching of multiply-subscribed nodes is then cheap
	qsort(sorted, member_list_entries, sizeof(const struct cpg_address *),
	cmp_member_list_nodeid);

	for (int i = 0; i < left_list_entries; i++) {
	node_left(group_name->value, counter, local_nodeid, &left_list[i],
	sorted, member_list_entries);
	}
	free(sorted);
	sorted = NULL;

	for (int i = 0; i < joined_list_entries; i++) {
	crm_info("Group %s event %d: node %u pid %u joined%s",
	group_name->value, counter, joined_list[i].nodeid,
	joined_list[i].pid, cpgreason2str(joined_list[i].reason));
	}

	for (int i = 0; i < member_list_entries; i++) {
	crm_node_t *peer = pcmk__get_node(member_list[i].nodeid, NULL, NULL,
	pcmk__node_search_cluster_member);

	if (member_list[i].nodeid == local_nodeid
	&& member_list[i].pid != getpid()) {
	// See the note in node_left()
	crm_warn("Group %s event %d: detected duplicate local pid %u",
	group_name->value, counter, member_list[i].pid);
	continue;
	}
	crm_info("Group %s event %d: %s (node %u pid %u) is member",
	group_name->value, counter, peer_name(peer),
	member_list[i].nodeid, member_list[i].pid);

	/* If the caller left auto-reaping enabled, this will also update the
	* state to member.
	*/
	peer = crm_update_peer_proc(__func__, peer, crm_proc_cpg,
	PCMK_VALUE_ONLINE);

	if (peer && peer->state && strcmp(peer->state, CRM_NODE_MEMBER)) {
	/* The node is a CPG member, but we currently think it's not a
	* cluster member. This is possible only if auto-reaping was
	* disabled. The node may be joining, and we happened to get the CPG
	* notification before the quorum notification; or the node may have
	* just died, and we are processing its final messages; or a bug
	* has affected the peer cache.
	*/
	time_t now = time(NULL);

	if (peer->when_lost == 0) {
	// Track when we first got into this contradictory state
	peer->when_lost = now;

	} else if (now > (peer->when_lost + 60)) {
	// If it persists for more than a minute, update the state
	crm_warn("Node %u is member of group %s but was believed "
	"offline",
	member_list[i].nodeid, group_name->value);
	pcmk__update_peer_state(__func__, peer, CRM_NODE_MEMBER, 0);
	}
	}

	if (local_nodeid == member_list[i].nodeid) {
	found = true;
	}
	}

	if (!found) {
	crm_err("Local node was evicted from group %s", group_name->value);
	cpg_evicted = true;
	}

	counter++;
	}

	/*!
	* \brief Set the CPG deliver callback function for a cluster object
	*
	* \param[in,out] cluster Cluster object
	* \param[in] fn Deliver callback function to set
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk_cpg_set_deliver_fn(pcmk_cluster_t *cluster, cpg_deliver_fn_t fn)
	{
	if (cluster == NULL) {
	return EINVAL;
	}
	cluster->cpg.cpg_deliver_fn = fn;
	return pcmk_rc_ok;
	}

	/*!
	* \brief Set the CPG config change callback function for a cluster object
	*
	* \param[in,out] cluster Cluster object
	* \param[in] fn Configuration change callback function to set
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk_cpg_set_confchg_fn(pcmk_cluster_t *cluster, cpg_confchg_fn_t fn)
	{
	if (cluster == NULL) {
	return EINVAL;
	}
	cluster->cpg.cpg_confchg_fn = fn;
	return pcmk_rc_ok;
	}

	/*!
	* \brief Connect to Corosync CPG
	*
	* \param[in,out] cluster Initialized cluster object to connect
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk__cpg_connect(pcmk_cluster_t *cluster)
	{
	cs_error_t rc;
	int fd = -1;
	int retries = 0;
	uint32_t id = 0;
	crm_node_t *peer = NULL;
	cpg_handle_t handle = 0;
	const char *message_name = pcmk__message_name(crm_system_name);
	uid_t found_uid = 0;
	gid_t found_gid = 0;
	pid_t found_pid = 0;
	int rv;

	struct mainloop_fd_callbacks cpg_fd_callbacks = {
	.dispatch = pcmk_cpg_dispatch,
	.destroy = cluster->destroy,
	};

	cpg_model_v1_data_t cpg_model_info = {
	.model = CPG_MODEL_V1,
	.cpg_deliver_fn = cluster->cpg.cpg_deliver_fn,
	.cpg_confchg_fn = cluster->cpg.cpg_confchg_fn,
	.cpg_totem_confchg_fn = NULL,
	.flags = 0,
	};

	cpg_evicted = false;
	cluster->group.length = 0;
	cluster->group.value[0] = 0;

	/* group.value is char[128] */
	strncpy(cluster->group.value, message_name, 127);
	cluster->group.value[127] = 0;
	cluster->group.length = 1 + QB_MIN(127, strlen(cluster->group.value));

	cs_repeat(rc, retries, 30, cpg_model_initialize(&handle, CPG_MODEL_V1, (cpg_model_data_t *)&cpg_model_info, NULL));
	if (rc != CS_OK) {
	crm_err("Could not connect to the CPG API: %s (%d)",
	cs_strerror(rc), rc);
	goto bail;
	}

	rc = cpg_fd_get(handle, &fd);
	if (rc != CS_OK) {
	crm_err("Could not obtain the CPG API connection: %s (%d)",
	cs_strerror(rc), rc);
	goto bail;
	}

	/* CPG provider run as root (in given user namespace, anyway)? */
	if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
	&found_uid, &found_gid))) {
	crm_err("CPG provider is not authentic:"
	" process %lld (uid: %lld, gid: %lld)",
	(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
	(long long) found_uid, (long long) found_gid);
	rc = CS_ERR_ACCESS;
	goto bail;
	} else if (rv < 0) {
	crm_err("Could not verify authenticity of CPG provider: %s (%d)",
	strerror(-rv), -rv);
	rc = CS_ERR_ACCESS;
	goto bail;
	}

	id = pcmk__cpg_local_nodeid(handle);
	if (id == 0) {
	crm_err("Could not get local node id from the CPG API");
	goto bail;

	}
	cluster->nodeid = id;

	retries = 0;
	cs_repeat(rc, retries, 30, cpg_join(handle, &cluster->group));
	if (rc != CS_OK) {
	crm_err("Could not join the CPG group '%s': %d", message_name, rc);
	goto bail;
	}

	pcmk_cpg_handle = handle;
	cluster->cpg_handle = handle;
	mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks);

	bail:
	if (rc != CS_OK) {
	cpg_finalize(handle);
	// @TODO Map rc to more specific Pacemaker return code
	return ENOTCONN;
	}

	peer = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member);
	crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_ONLINE);
	return pcmk_rc_ok;
	}

	/*!
	* \internal
	* \brief Disconnect from Corosync CPG
	*
	* \param[in,out] cluster Cluster object to disconnect
	*/
	void
	pcmk__cpg_disconnect(pcmk_cluster_t *cluster)
	{
	pcmk_cpg_handle = 0;
	if (cluster->cpg_handle != 0) {
	crm_trace("Disconnecting CPG");
	cpg_leave(cluster->cpg_handle, &cluster->group);
	cpg_finalize(cluster->cpg_handle);
	cluster->cpg_handle = 0;

	} else {
	crm_info("No CPG connection");
	}
	}

	/*!
	* \internal
	* \brief Send string data via Corosync CPG
	*
	* \param[in] data Data to send
	* \param[in] local What to set as host "local" value (which is never used)
	* \param[in] node Cluster node to send message to
	* \param[in] dest Type of message to send
	*
	* \return \c true on success, or \c false otherwise
	*/
	static bool
	send_cpg_text(const char data, bool local, const crm_node_t node,
	enum crm_ais_msg_types dest)
	{
	// @COMPAT Drop local argument when send_cluster_text is dropped
	static int msg_id = 0;
	static int local_pid = 0;
	static int local_name_len = 0;
	static const char *local_name = NULL;

	char *target = NULL;
	struct iovec *iov;
	pcmk__cpg_msg_t *msg = NULL;

	CRM_CHECK(dest != crm_msg_ais, return false);

	if (local_name == NULL) {
	local_name = pcmk__cluster_local_node_name();
	}
	if ((local_name_len == 0) && (local_name != NULL)) {
	local_name_len = strlen(local_name);
	}

	if (data == NULL) {
	data = "";
	}

	if (local_pid == 0) {
	local_pid = getpid();
	}

	msg = pcmk__assert_alloc(1, sizeof(pcmk__cpg_msg_t));

	msg_id++;
	msg->id = msg_id;
	msg->header.id = crm_class_cluster;
	msg->header.error = CS_OK;

	msg->host.type = dest;
	msg->host.local = local;

	if (node != NULL) {
	if (node->uname != NULL) {
	target = pcmk__str_copy(node->uname);
	msg->host.size = strlen(node->uname);
	memset(msg->host.uname, 0, MAX_NAME);
	memcpy(msg->host.uname, node->uname, msg->host.size);

	} else {
	target = crm_strdup_printf("%u", node->id);
	}
	msg->host.id = node->id;

	} else {
	target = pcmk__str_copy("all");
	}

	msg->sender.id = 0;
	msg->sender.type = pcmk__cluster_parse_msg_type(crm_system_name);
	msg->sender.pid = local_pid;
	msg->sender.size = local_name_len;
	memset(msg->sender.uname, 0, MAX_NAME);

	if ((local_name != NULL) && (msg->sender.size != 0)) {
	memcpy(msg->sender.uname, local_name, msg->sender.size);
	}

	msg->size = 1 + strlen(data);
	msg->header.size = sizeof(pcmk__cpg_msg_t) + msg->size;

	if (msg->size < CRM_BZ2_THRESHOLD) {
	msg = pcmk__realloc(msg, msg->header.size);
	memcpy(msg->data, data, msg->size);

	} else {
	char *compressed = NULL;
	unsigned int new_size = 0;

	if (pcmk__compress(data, (unsigned int) msg->size, 0, &compressed,
	&new_size) == pcmk_rc_ok) {

	msg->header.size = sizeof(pcmk__cpg_msg_t) + new_size;
	msg = pcmk__realloc(msg, msg->header.size);
	memcpy(msg->data, compressed, new_size);

	msg->is_compressed = TRUE;
	msg->compressed_size = new_size;

	} else {
	// cppcheck seems not to understand the abort logic in pcmk__realloc
	// cppcheck-suppress memleak
	msg = pcmk__realloc(msg, msg->header.size);
	memcpy(msg->data, data, msg->size);
	}

	free(compressed);
	}

	iov = pcmk__assert_alloc(1, sizeof(struct iovec));
	iov->iov_base = msg;
	iov->iov_len = msg->header.size;

	if (msg->compressed_size > 0) {
	crm_trace("Queueing CPG message %u to %s "
	"(%llu bytes, %d bytes compressed payload): %.200s",
	msg->id, target, (unsigned long long) iov->iov_len,
	msg->compressed_size, data);
	} else {
	crm_trace("Queueing CPG message %u to %s "
	"(%llu bytes, %d bytes payload): %.200s",
	msg->id, target, (unsigned long long) iov->iov_len,
	msg->size, data);
	}

	free(target);

	cs_message_queue = g_list_append(cs_message_queue, iov);
	crm_cs_flush(&pcmk_cpg_handle);

	return true;
	}

	/*!
	* \internal
	* \brief Send an XML message via Corosync CPG
	*
	* \param[in] msg XML message to send
	* \param[in] node Cluster node to send message to
	* \param[in] dest Type of message to send
	*
	* \return TRUE on success, otherwise FALSE
	*/
	bool
	pcmk__cpg_send_xml(const xmlNode msg, const crm_node_t node,
	enum crm_ais_msg_types dest)
	{
	bool rc = true;
	GString *data = g_string_sized_new(1024);

	pcmk__xml_string(msg, 0, data, 0);

	rc = send_cpg_text(data->str, false, node, dest);
	g_string_free(data, TRUE);
	return rc;
	}

	// Deprecated functions kept only for backward API compatibility
	// LCOV_EXCL_START

	#include <crm/cluster/compat.h>

	gboolean
	cluster_connect_cpg(pcmk_cluster_t *cluster)
	{
	return pcmk__cpg_connect(cluster) == pcmk_rc_ok;
	}

	void
	cluster_disconnect_cpg(pcmk_cluster_t *cluster)
	{
	pcmk__cpg_disconnect(cluster);
	}

	uint32_t
	get_local_nodeid(cpg_handle_t handle)
	{
	return pcmk__cpg_local_nodeid(handle);
	}

	void
	pcmk_cpg_membership(cpg_handle_t handle,
	const struct cpg_name *group_name,
	const struct cpg_address *member_list,
	size_t member_list_entries,
	const struct cpg_address *left_list,
	size_t left_list_entries,
	const struct cpg_address *joined_list,
	size_t joined_list_entries)
	{
	pcmk__cpg_confchg_cb(handle, group_name, member_list, member_list_entries,
	left_list, left_list_entries,
	joined_list, joined_list_entries);
	}

	gboolean
	send_cluster_text(enum crm_ais_msg_class msg_class, const char *data,
	gboolean local, const crm_node_t *node,
	enum crm_ais_msg_types dest)
	{
	switch (msg_class) {
	case crm_class_cluster:
	return send_cpg_text(data, local, node, dest);
	default:
	crm_err("Invalid message class: %d", msg_class);
	return FALSE;
	}
	}

	char *
	pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid,
	void content, uint32_t kind, const char **from)
	{
	return pcmk__cpg_message_data(handle, nodeid, pid, content, kind, from);
	}

	enum crm_ais_msg_types
	text2msg_type(const char *text)
	{
	int type = crm_msg_none;

	CRM_CHECK(text != NULL, return type);
	text = pcmk__message_name(text);
	if (pcmk__str_eq(text, "ais", pcmk__str_casei)) {
	type = crm_msg_ais;
	} else if (pcmk__str_eq(text, CRM_SYSTEM_CIB, pcmk__str_casei)) {
	type = crm_msg_cib;
	} else if (pcmk__strcase_any_of(text, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL)) {
	type = crm_msg_crmd;
	} else if (pcmk__str_eq(text, CRM_SYSTEM_TENGINE, pcmk__str_casei)) {
	type = crm_msg_te;
	} else if (pcmk__str_eq(text, CRM_SYSTEM_PENGINE, pcmk__str_casei)) {
	type = crm_msg_pe;
	} else if (pcmk__str_eq(text, CRM_SYSTEM_LRMD, pcmk__str_casei)) {
	type = crm_msg_lrmd;
	} else if (pcmk__str_eq(text, CRM_SYSTEM_STONITHD, pcmk__str_casei)) {
	type = crm_msg_stonithd;
	} else if (pcmk__str_eq(text, "stonith-ng", pcmk__str_casei)) {
	type = crm_msg_stonith_ng;
	} else if (pcmk__str_eq(text, "attrd", pcmk__str_casei)) {
	type = crm_msg_attrd;

	} else {
	/* This will normally be a transient client rather than
	* a cluster daemon. Set the type to the pid of the client
	*/
	int scan_rc = sscanf(text, "%d", &type);

	if (scan_rc != 1 \|\| type <= crm_msg_stonith_ng) {
	/* Ensure it's sane */
	type = crm_msg_none;
	}
	}
	return type;
	}

	// LCOV_EXCL_STOP
	// End deprecated API
	diff --git a/lib/cluster/election.c b/lib/cluster/election.c
	index 98cd716826..c3e4cd126c 100644
	--- a/lib/cluster/election.c
	+++ b/lib/cluster/election.c
	@@ -1,726 +1,727 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/time.h>
	#include <sys/resource.h>

	#include <crm/common/xml.h>

	#include <crm/common/mainloop.h>
	#include <crm/cluster/internal.h>
	#include <crm/cluster/election_internal.h>
	#include <crm/crm.h>

	#define STORM_INTERVAL 2 /* in seconds */

	struct election_s {
	enum election_result state;
	guint count; // How many times local node has voted
	char *name; // Descriptive name for this election
	char *uname; // Local node's name
	GSourceFunc cb; // Function to call if election is won
	GHashTable *voted; // Key = node name, value = how node voted
	mainloop_timer_t *timeout; // When to abort if all votes not received
	int election_wins; // Track wins, for storm detection
	bool wrote_blackbox; // Write a storm blackbox at most once
	time_t expires; // When storm detection period ends
	time_t last_election_loss; // When dampening period ends
	};

	static void
	election_complete(election_t *e)
	{
	e->state = election_won;
	if (e->cb != NULL) {
	e->cb(e);
	}
	election_reset(e);
	}

	static gboolean
	election_timer_cb(gpointer user_data)
	{
	election_t *e = user_data;

	crm_info("%s timed out, declaring local node as winner", e->name);
	election_complete(e);
	return FALSE;
	}

	/*!
	* \brief Get current state of an election
	*
	* \param[in] e Election object
	*
	* \return Current state of \e
	*/
	enum election_result
	election_state(const election_t *e)
	{
	return (e == NULL)? election_error : e->state;
	}

	/*!
	* \brief Create a new election object
	*
	* Every node that wishes to participate in an election must create an election
	* object. Typically, this should be done once, at start-up. A caller should
	* only create a single election object.
	*
	* \param[in] name Label for election (for logging)
	* \param[in] uname Local node's name
	* \param[in] period_ms How long to wait for all peers to vote
	* \param[in] cb Function to call if local node wins election
	*
	* \return Newly allocated election object on success, NULL on error
	* \note The caller is responsible for freeing the returned value using
	* election_fini().
	*/
	election_t *
	election_init(const char name, const char uname, guint period_ms, GSourceFunc cb)
	{
	election_t *e = NULL;

	static guint count = 0;

	CRM_CHECK(uname != NULL, return NULL);

	e = calloc(1, sizeof(election_t));
	if (e == NULL) {
	crm_perror(LOG_CRIT, "Cannot create election");
	return NULL;
	}

	e->uname = strdup(uname);
	if (e->uname == NULL) {
	crm_perror(LOG_CRIT, "Cannot create election");
	free(e);
	return NULL;
	}

	e->name = name? crm_strdup_printf("election-%s", name)
	: crm_strdup_printf("election-%u", count++);
	e->cb = cb;
	e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
	election_timer_cb, e);
	crm_trace("Created %s", e->name);
	return e;
	}

	/*!
	* \brief Disregard any previous vote by specified peer
	*
	* This discards any recorded vote from a specified peer. Election users should
	* call this whenever a voting peer becomes inactive.
	*
	* \param[in,out] e Election object
	* \param[in] uname Name of peer to disregard
	*/
	void
	election_remove(election_t e, const char uname)
	{
	if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) {
	crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
	g_hash_table_remove(e->voted, uname);
	}
	}

	/*!
	* \brief Stop election timer and disregard all votes
	*
	* \param[in,out] e Election object
	*/
	void
	election_reset(election_t *e)
	{
	if (e != NULL) {
	crm_trace("Resetting election %s", e->name);
	mainloop_timer_stop(e->timeout);
	if (e->voted) {
	crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
	g_hash_table_destroy(e->voted);
	e->voted = NULL;
	}
	}
	}

	/*!
	* \brief Free an election object
	*
	* Free all memory associated with an election object, stopping its
	* election timer (if running).
	*
	* \param[in,out] e Election object
	*/
	void
	election_fini(election_t *e)
	{
	if (e != NULL) {
	election_reset(e);
	crm_trace("Destroying %s", e->name);
	mainloop_timer_del(e->timeout);
	free(e->uname);
	free(e->name);
	free(e);
	}
	}

	static void
	election_timeout_start(election_t *e)
	{
	if (e != NULL) {
	mainloop_timer_start(e->timeout);
	}
	}

	/*!
	* \brief Stop an election's timer, if running
	*
	* \param[in,out] e Election object
	*/
	void
	election_timeout_stop(election_t *e)
	{
	if (e != NULL) {
	mainloop_timer_stop(e->timeout);
	}
	}

	/*!
	* \brief Change an election's timeout (restarting timer if running)
	*
	* \param[in,out] e Election object
	* \param[in] period New timeout
	*/
	void
	election_timeout_set_period(election_t *e, guint period)
	{
	if (e != NULL) {
	mainloop_timer_set_period(e->timeout, period);
	} else {
	crm_err("No election defined");
	}
	}

	static int
	get_uptime(struct timeval *output)
	{
	static time_t expires = 0;
	static struct rusage info;

	time_t tm_now = time(NULL);

	if (expires < tm_now) {
	int rc = 0;

	info.ru_utime.tv_sec = 0;
	info.ru_utime.tv_usec = 0;
	rc = getrusage(RUSAGE_SELF, &info);

	output->tv_sec = 0;
	output->tv_usec = 0;

	if (rc < 0) {
	crm_perror(LOG_ERR, "Could not calculate the current uptime");
	expires = 0;
	return -1;
	}

	crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
	(long)info.ru_utime.tv_usec);
	}

	expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */
	output->tv_sec = info.ru_utime.tv_sec;
	output->tv_usec = info.ru_utime.tv_usec;

	return 1;
	}

	static int
	compare_age(struct timeval your_age)
	{
	struct timeval our_age;

	get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */

	if (our_age.tv_sec > your_age.tv_sec) {
	crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
	return 1;
	} else if (our_age.tv_sec < your_age.tv_sec) {
	crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
	return -1;
	} else if (our_age.tv_usec > your_age.tv_usec) {
	crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
	(long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
	return 1;
	} else if (our_age.tv_usec < your_age.tv_usec) {
	crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
	(long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
	return -1;
	}

	return 0;
	}

	/*!
	* \brief Start a new election by offering local node's candidacy
	*
	* Broadcast a "vote" election message containing the local node's ID,
	* (incremented) election counter, and uptime, and start the election timer.
	*
	* \param[in,out] e Election object
	*
	* \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
	* all active peers do so, or if the election times out, the local node
	* wins the election. (If we lose to any peer vote, we will stop the
	* timer, so a timeout means we did not lose -- either some peer did not
	* vote, or we did not call election_check() in time.)
	*/
	void
	election_vote(election_t *e)
	{
	struct timeval age;
	xmlNode *vote = NULL;
	crm_node_t *our_node;

	if (e == NULL) {
	crm_trace("Election vote requested, but no election available");
	return;
	}

	our_node = pcmk__get_node(0, e->uname, NULL,
	pcmk__node_search_cluster_member);
	if (!pcmk__cluster_is_node_active(our_node)) {
	crm_trace("Cannot vote in %s yet: local node not connected to cluster",
	e->name);
	return;
	}

	election_reset(e);
	e->state = election_in_progress;
	vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

	e->count++;
	- crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->uuid);
	+ crm_xml_add(vote, PCMK__XA_ELECTION_OWNER,
	+ pcmk__cluster_get_xml_id(our_node));
	crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, e->count);

	// Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds
	get_uptime(&age);
	crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC,
	PCMK__XA_ELECTION_AGE_NANO_SEC, &age);

	pcmk__cluster_send_message(NULL, crm_msg_crmd, vote);
	free_xml(vote);

	crm_debug("Started %s round %d", e->name, e->count);
	election_timeout_start(e);
	return;
	}

	/*!
	* \brief Check whether local node has won an election
	*
	* If all known peers have sent no-vote messages, stop the election timer, set
	* the election state to won, and call any registered win callback.
	*
	* \param[in,out] e Election object
	*
	* \return TRUE if local node has won, FALSE otherwise
	* \note If all known peers have sent no-vote messages, but the election owner
	* does not call this function, the election will not be won (and the
	* callback will not be called) until the election times out.
	* \note This should be called when election_count_vote() returns
	* \c election_in_progress.
	*/
	bool
	election_check(election_t *e)
	{
	int voted_size = 0;
	int num_members = 0;

	if (e == NULL) {
	crm_trace("Election check requested, but no election available");
	return FALSE;
	}
	if (e->voted == NULL) {
	crm_trace("%s check requested, but no votes received yet", e->name);
	return FALSE;
	}

	voted_size = g_hash_table_size(e->voted);
	num_members = pcmk__cluster_num_active_nodes();

	/* in the case of #voted > #members, it is better to
	* wait for the timeout and give the cluster time to
	* stabilize
	*/
	if (voted_size >= num_members) {
	/* we won and everyone has voted */
	election_timeout_stop(e);
	if (voted_size > num_members) {
	GHashTableIter gIter;
	const crm_node_t *node;
	char *key = NULL;

	crm_warn("Received too many votes in %s", e->name);
	g_hash_table_iter_init(&gIter, crm_peer_cache);
	while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
	if (pcmk__cluster_is_node_active(node)) {
	crm_warn("* expected vote: %s", node->uname);
	}
	}

	g_hash_table_iter_init(&gIter, e->voted);
	while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
	crm_warn("* actual vote: %s", key);
	}

	}

	crm_info("%s won by local node", e->name);
	election_complete(e);
	return TRUE;

	} else {
	crm_debug("%s still waiting on %d of %d votes",
	e->name, num_members - voted_size, num_members);
	}

	return FALSE;
	}

	#define LOSS_DAMPEN 2 /* in seconds */

	struct vote {
	const char *op;
	const char *from;
	const char *version;
	const char *election_owner;
	int election_id;
	struct timeval age;
	};

	/*!
	* \brief Unpack an election message
	*
	* \param[in] e Election object (for logging only)
	* \param[in] message Election message XML
	* \param[out] vote Parsed fields from message
	*
	* \return TRUE if election message and election are valid, FALSE otherwise
	* \note The parsed struct's pointer members are valid only for the lifetime of
	* the message argument.
	*/
	static bool
	parse_election_message(const election_t e, const xmlNode message,
	struct vote *vote)
	{
	CRM_CHECK(message && vote, return FALSE);

	vote->election_id = -1;
	vote->age.tv_sec = -1;
	vote->age.tv_usec = -1;

	vote->op = crm_element_value(message, PCMK__XA_CRM_TASK);
	vote->from = crm_element_value(message, PCMK__XA_SRC);
	vote->version = crm_element_value(message, PCMK_XA_VERSION);
	vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER);

	crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id));

	if ((vote->op == NULL) \|\| (vote->from == NULL) \|\| (vote->version == NULL)
	\|\| (vote->election_owner == NULL) \|\| (vote->election_id < 0)) {

	crm_warn("Invalid %s message from %s in %s ",
	(vote->op? vote->op : "election"),
	(vote->from? vote->from : "unspecified node"),
	(e? e->name : "election"));
	return FALSE;
	}

	// Op-specific validation

	if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
	/* Only vote ops have uptime.
	Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds.
	*/
	crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC,
	PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age));
	if ((vote->age.tv_sec < 0) \|\| (vote->age.tv_usec < 0)) {
	crm_warn("Cannot count %s %s from %s because it is missing uptime",
	(e? e->name : "election"), vote->op, vote->from);
	return FALSE;
	}

	} else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
	crm_info("Cannot process %s message from %s because %s is not a known election op",
	(e? e->name : "election"), vote->from, vote->op);
	return FALSE;
	}

	// Election validation

	if (e == NULL) {
	crm_info("Cannot count %s from %s because no election available",
	vote->op, vote->from);
	return FALSE;
	}

	/* If the membership cache is NULL, we REALLY shouldn't be voting --
	* the question is how we managed to get here.
	*/
	if (crm_peer_cache == NULL) {
	crm_info("Cannot count %s %s from %s because no peer information available",
	e->name, vote->op, vote->from);
	return FALSE;
	}
	return TRUE;
	}

	static void
	record_vote(election_t e, struct vote vote)
	{
	pcmk__assert(e && vote && vote->from && vote->op);

	if (e->voted == NULL) {
	e->voted = pcmk__strkey_table(free, free);
	}
	pcmk__insert_dup(e->voted, vote->from, vote->op);
	}

	static void
	send_no_vote(crm_node_t peer, struct vote vote)
	{
	// @TODO probably shouldn't hardcode CRM_SYSTEM_CRMD and crm_msg_crmd

	xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
	CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

	crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner);
	crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id);

	pcmk__cluster_send_message(peer, crm_msg_crmd, novote);
	free_xml(novote);
	}

	/*!
	* \brief Process an election message (vote or no-vote) from a peer
	*
	* \param[in,out] e Election object
	* \param[in] message Election message XML from peer
	* \param[in] can_win Whether local node is eligible to win
	*
	* \return Election state after new vote is considered
	* \note If the peer message is a vote, and we prefer the peer to win, this will
	* send a no-vote reply to the peer.
	* \note The situations "we lost to this vote" from "this is a late no-vote
	* after we've already lost" both return election_lost. If a caller needs
	* to distinguish them, it should save the current state before calling
	* this function, and then compare the result.
	*/
	enum election_result
	election_count_vote(election_t e, const xmlNode message, bool can_win)
	{
	int log_level = LOG_INFO;
	gboolean done = FALSE;
	gboolean we_lose = FALSE;
	const char *reason = "unknown";
	bool we_are_owner = FALSE;
	crm_node_t our_node = NULL, your_node = NULL;
	time_t tm_now = time(NULL);
	struct vote vote;

	CRM_CHECK(message != NULL, return election_error);
	if (parse_election_message(e, message, &vote) == FALSE) {
	return election_error;
	}

	your_node = pcmk__get_node(0, vote.from, NULL,
	pcmk__node_search_cluster_member);
	our_node = pcmk__get_node(0, e->uname, NULL,
	pcmk__node_search_cluster_member);
	we_are_owner = (our_node != NULL)
	- && pcmk__str_eq(our_node->uuid, vote.election_owner,
	- pcmk__str_none);
	+ && pcmk__str_eq(pcmk__cluster_get_xml_id(our_node),
	+ vote.election_owner, pcmk__str_none);

	if (!can_win) {
	reason = "Not eligible";
	we_lose = TRUE;

	} else if (!pcmk__cluster_is_node_active(our_node)) {
	reason = "We are not part of the cluster";
	log_level = LOG_ERR;
	we_lose = TRUE;

	} else if (we_are_owner && (vote.election_id != e->count)) {
	log_level = LOG_TRACE;
	reason = "Superseded";
	done = TRUE;

	} else if (!pcmk__cluster_is_node_active(your_node)) {
	/* Possibly we cached the message in the FSA queue at a point that it wasn't */
	reason = "Peer is not part of our cluster";
	log_level = LOG_WARNING;
	done = TRUE;

	} else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
	\|\| pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
	/* Receiving our own broadcast vote, or a no-vote from peer, is a vote
	* for us to win
	*/
	if (!we_are_owner) {
	crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
	e->name, vote.election_id, vote.op, vote.from,
	vote.election_owner);
	return election_error;
	}
	if (e->state != election_in_progress) {
	// Should only happen if we already lost
	crm_debug("Not counting %s round %d %s from %s because no election in progress",
	e->name, vote.election_id, vote.op, vote.from);
	return e->state;
	}
	record_vote(e, &vote);
	reason = "Recorded";
	done = TRUE;

	} else {
	// A peer vote requires a comparison to determine which node is better
	int age_result = compare_age(vote.age);
	int version_result = compare_version(vote.version, CRM_FEATURE_SET);

	if (version_result < 0) {
	reason = "Version";
	we_lose = TRUE;

	} else if (version_result > 0) {
	reason = "Version";

	} else if (age_result < 0) {
	reason = "Uptime";
	we_lose = TRUE;

	} else if (age_result > 0) {
	reason = "Uptime";

	} else if (strcasecmp(e->uname, vote.from) > 0) {
	reason = "Host name";
	we_lose = TRUE;

	} else {
	reason = "Host name";
	}
	}

	if (e->expires < tm_now) {
	e->election_wins = 0;
	e->expires = tm_now + STORM_INTERVAL;

	} else if (done == FALSE && we_lose == FALSE) {
	int peers = 1 + g_hash_table_size(crm_peer_cache);

	/* If every node has to vote down every other node, thats N*(N-1) total elections
	* Allow some leeway before _really_ complaining
	*/
	e->election_wins++;
	if (e->election_wins > (peers * peers)) {
	crm_warn("%s election storm detected: %d wins in %d seconds",
	e->name, e->election_wins, STORM_INTERVAL);
	e->election_wins = 0;
	e->expires = tm_now + STORM_INTERVAL;
	if (e->wrote_blackbox == FALSE) {
	/* It's questionable whether a black box (from every node in the
	* cluster) would be truly helpful in diagnosing an election
	* storm. It's also highly doubtful a production environment
	* would get multiple election storms from distinct causes, so
	* saving one blackbox per process lifetime should be
	* sufficient. Alternatives would be to save a timestamp of the
	* last blackbox write instead of a boolean, and write a new one
	* if some amount of time has passed; or to save a storm count,
	* write a blackbox on every Nth occurrence.
	*/
	crm_write_blackbox(0, NULL);
	e->wrote_blackbox = TRUE;
	}
	}
	}

	if (done) {
	do_crm_log(log_level + 1,
	"Processed %s round %d %s (current round %d) from %s (%s)",
	e->name, vote.election_id, vote.op, e->count, vote.from,
	reason);
	return e->state;

	} else if (we_lose == FALSE) {
	/* We track the time of the last election loss to implement an election
	* dampening period, reducing the likelihood of an election storm. If
	* this node has lost within the dampening period, don't start a new
	* election, even if we win against a peer's vote -- the peer we lost to
	* should win again.
	*
	* @TODO This has a problem case: if an election winner immediately
	* leaves the cluster, and a new election is immediately called, all
	* nodes could lose, with no new winner elected. The ideal solution
	* would be to tie the election structure with the peer caches, which
	* would allow us to clear the dampening when the previous winner
	* leaves (and would allow other improvements as well).
	*/
	if ((e->last_election_loss == 0)
	\|\| ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {

	do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
	e->name, vote.election_id, vote.election_owner, vote.op,
	vote.from, reason);

	e->last_election_loss = 0;
	election_timeout_stop(e);

	/* Start a new election by voting down this, and other, peers */
	e->state = election_start;
	return e->state;
	} else {
	char *loss_time = ctime(&e->last_election_loss);

	if (loss_time) {
	// Show only HH:MM:SS
	loss_time += 11;
	loss_time[8] = '\0';
	}
	crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
	e->name, vote.election_id, vote.election_owner, vote.from,
	LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
	}
	}

	e->last_election_loss = tm_now;

	do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
	e->name, vote.election_id, vote.election_owner, vote.op,
	vote.from, reason);

	election_reset(e);
	send_no_vote(your_node, &vote);
	e->state = election_lost;
	return e->state;
	}

	/*!
	* \brief Reset any election dampening currently in effect
	*
	* \param[in,out] e Election object to clear
	*/
	void
	election_clear_dampening(election_t *e)
	{
	e->last_election_loss = 0;
	}
	diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
	index 4d2e7a31fe..99d029e21e 100644
	--- a/lib/cluster/membership.c
	+++ b/lib/cluster/membership.c
	@@ -1,1570 +1,1591 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#ifndef _GNU_SOURCE
	# define _GNU_SOURCE
	#endif

	#include <inttypes.h> // PRIu32
	#include <sys/param.h>
	#include <sys/types.h>
	#include <stdio.h>
	#include <unistd.h>
	#include <string.h>
	#include <glib.h>
	#include <crm/common/ipc.h>
	#include <crm/common/xml_internal.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/xml.h>
	#include <crm/stonith-ng.h>
	#include "crmcluster_private.h"

	/* The peer cache remembers cluster nodes that have been seen.
	* This is managed mostly automatically by libcluster, based on
	* cluster membership events.
	*
	* Because cluster nodes can have conflicting names or UUIDs,
	* the hash table key is a uniquely generated ID.
	*
	* @COMPAT When this is internal, rename to cluster_node_member_cache and make
	* static.
	*/
	GHashTable *crm_peer_cache = NULL;

	/*
	* The remote peer cache tracks pacemaker_remote nodes. While the
	* value has the same type as the peer cache's, it is tracked separately for
	* three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
	* so the name (which is also the UUID) is used as the hash table key; there
	* is no equivalent of membership events, so management is not automatic; and
	* most users of the peer cache need to exclude pacemaker_remote nodes.
	*
	* That said, using a single cache would be more logical and less error-prone,
	* so it would be a good idea to merge them one day.
	*
	* libcluster provides two avenues for populating the cache:
	* pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node()
	* directly manage it, while refresh_remote_nodes() populates it via the CIB.
	*/
	GHashTable *crm_remote_peer_cache = NULL;

	/*
	* The CIB cluster node cache tracks cluster nodes that have been seen in
	* the CIB. It is useful mainly when a caller needs to know about a node that
	* may no longer be in the membership, but doesn't want to add the node to the
	* main peer cache tables.
	*/
	static GHashTable *cluster_node_cib_cache = NULL;

	unsigned long long crm_peer_seq = 0;
	gboolean crm_have_quorum = FALSE;
	static bool autoreap = true;

	// Flag setting and clearing for crm_node_t:flags

	#define set_peer_flags(peer, flags_to_set) do { \
	(peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
	"Peer", (peer)->uname, \
	(peer)->flags, (flags_to_set), \
	#flags_to_set); \
	} while (0)

	#define clear_peer_flags(peer, flags_to_clear) do { \
	(peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
	LOG_TRACE, \
	"Peer", (peer)->uname, \
	(peer)->flags, (flags_to_clear), \
	#flags_to_clear); \
	} while (0)

	static void update_peer_uname(crm_node_t node, const char uname);
	static crm_node_t find_cib_cluster_node(const char id, const char *uname);

	/*!
	* \internal
	* \brief Get the number of Pacemaker Remote nodes that have been seen
	*
	* \return Number of cached Pacemaker Remote nodes
	*/
	unsigned int
	pcmk__cluster_num_remote_nodes(void)
	{
	if (crm_remote_peer_cache == NULL) {
	return 0U;
	}
	return g_hash_table_size(crm_remote_peer_cache);
	}

	/*!
	* \internal
	* \brief Get a remote node cache entry, creating it if necessary
	*
	* \param[in] node_name Name of remote node
	*
	* \return Cache entry for node on success, or \c NULL (and set \c errno)
	* otherwise
	*
	* \note When creating a new entry, this will leave the node state undetermined.
	* The caller should also call \c pcmk__update_peer_state() if the state
	* is known.
	* \note Because this can add and remove cache entries, callers should not
	* assume any previously obtained cache entry pointers remain valid.
	*/
	crm_node_t *
	pcmk__cluster_lookup_remote_node(const char *node_name)
	{
	crm_node_t *node;
	char *node_name_copy = NULL;

	if (node_name == NULL) {
	errno = EINVAL;
	return NULL;
	}

	/* It's theoretically possible that the node was added to the cluster peer
	* cache before it was known to be a Pacemaker Remote node. Remove that
	- * entry unless it has a node ID, which means the name actually is
	+ * entry unless it has an XML ID, which means the name actually is
	* associated with a cluster node. (@TODO return an error in that case?)
	*/
	- node = pcmk__search_node_caches(0, node_name,
	+ node = pcmk__search_node_caches(0, node_name, NULL,
	pcmk__node_search_cluster_member);
	- if ((node != NULL) && (node->uuid == NULL)) {
	+ if ((node != NULL)
	+ && ((node->uuid == NULL)
	+ /* This assumes only Pacemaker Remote nodes have their XML ID the
	+ * same as their node name
	+ */
	+ \|\| pcmk__str_eq(node->uname, node->uuid, pcmk__str_none))) {
	+
	/* node_name could be a pointer into the cache entry being removed, so
	* reassign it to a copy before the original gets freed
	*/
	node_name_copy = strdup(node_name);
	if (node_name_copy == NULL) {
	errno = ENOMEM;
	return NULL;
	}
	node_name = node_name_copy;
	pcmk__cluster_forget_cluster_node(0, node_name);
	}

	/* Return existing cache entry if one exists */
	node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
	if (node) {
	free(node_name_copy);
	return node;
	}

	/* Allocate a new entry */
	node = calloc(1, sizeof(crm_node_t));
	if (node == NULL) {
	free(node_name_copy);
	return NULL;
	}

	/* Populate the essential information */
	set_peer_flags(node, crm_remote_node);
	node->uuid = strdup(node_name);
	if (node->uuid == NULL) {
	free(node);
	errno = ENOMEM;
	free(node_name_copy);
	return NULL;
	}

	/* Add the new entry to the cache */
	g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
	crm_trace("added %s to remote cache", node_name);

	/* Update the entry's uname, ensuring peer status callbacks are called */
	update_peer_uname(node, node_name);
	free(node_name_copy);
	return node;
	}

	/*!
	* \internal
	* \brief Remove a node from the Pacemaker Remote node cache
	*
	* \param[in] node_name Name of node to remove from cache
	*
	* \note The caller must be careful not to use \p node_name after calling this
	* function if it might be a pointer into the cache entry being removed.
	*/
	void
	pcmk__cluster_forget_remote_node(const char *node_name)
	{
	/* Do a lookup first, because node_name could be a pointer within the entry
	* being removed -- we can't log it after removing it.
	*/
	if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) {
	crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
	g_hash_table_remove(crm_remote_peer_cache, node_name);
	}
	}

	/*!
	* \internal
	* \brief Return node status based on a CIB status entry
	*
	* \param[in] node_state XML of node state
	*
	* \return \c CRM_NODE_LOST if \c PCMK__XA_IN_CCM is false in
	* \c PCMK__XE_NODE_STATE, \c CRM_NODE_MEMBER otherwise
	* \note Unlike most boolean XML attributes, this one defaults to true, for
	* backward compatibility with older controllers that don't set it.
	*/
	static const char *
	remote_state_from_cib(const xmlNode *node_state)
	{
	bool status = false;

	if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM,
	&status) == pcmk_rc_ok) && !status) {
	return CRM_NODE_LOST;
	} else {
	return CRM_NODE_MEMBER;
	}
	}

	/* user data for looping through remote node xpath searches */
	struct refresh_data {
	const char field; / XML attribute to check for node name */
	gboolean has_state; /* whether to update node state based on XML */
	};

	/*!
	* \internal
	* \brief Process one pacemaker_remote node xpath search result
	*
	* \param[in] result XML search result
	* \param[in] user_data what to look for in the XML
	*/
	static void
	remote_cache_refresh_helper(xmlNode result, void user_data)
	{
	const struct refresh_data *data = user_data;
	const char *remote = crm_element_value(result, data->field);
	const char *state = NULL;
	crm_node_t *node;

	CRM_CHECK(remote != NULL, return);

	/* Determine node's state, if the result has it */
	if (data->has_state) {
	state = remote_state_from_cib(result);
	}

	/* Check whether cache already has entry for node */
	node = g_hash_table_lookup(crm_remote_peer_cache, remote);

	if (node == NULL) {
	/* Node is not in cache, so add a new entry for it */
	node = pcmk__cluster_lookup_remote_node(remote);
	pcmk__assert(node != NULL);
	if (state) {
	pcmk__update_peer_state(__func__, node, state, 0);
	}

	} else if (pcmk_is_set(node->flags, crm_node_dirty)) {
	/* Node is in cache and hasn't been updated already, so mark it clean */
	clear_peer_flags(node, crm_node_dirty);
	if (state) {
	pcmk__update_peer_state(__func__, node, state, 0);
	}
	}
	}

	static void
	mark_dirty(gpointer key, gpointer value, gpointer user_data)
	{
	set_peer_flags((crm_node_t *) value, crm_node_dirty);
	}

	static gboolean
	is_dirty(gpointer key, gpointer value, gpointer user_data)
	{
	return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
	}

	/*!
	* \internal
	* \brief Repopulate the remote node cache based on CIB XML
	*
	* \param[in] cib CIB XML to parse
	*/
	static void
	refresh_remote_nodes(xmlNode *cib)
	{
	struct refresh_data data;

	pcmk__cluster_init_node_caches();

	/* First, we mark all existing cache entries as dirty,
	* so that later we can remove any that weren't in the CIB.
	* We don't empty the cache, because we need to detect changes in state.
	*/
	g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);

	/* Look for guest nodes and remote nodes in the status section */
	data.field = PCMK_XA_ID;
	data.has_state = TRUE;
	crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS,
	remote_cache_refresh_helper, &data);

	/* Look for guest nodes and remote nodes in the configuration section,
	* because they may have just been added and not have a status entry yet.
	* In that case, the cached node state will be left NULL, so that the
	* peer status callback isn't called until we're sure the node started
	* successfully.
	*/
	data.field = PCMK_XA_VALUE;
	data.has_state = FALSE;
	crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG,
	remote_cache_refresh_helper, &data);
	data.field = PCMK_XA_ID;
	data.has_state = FALSE;
	crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG,
	remote_cache_refresh_helper, &data);

	/* Remove all old cache entries that weren't seen in the CIB */
	g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
	}

	/*!
	* \internal
	* \brief Check whether a node is an active cluster node
	*
	* Remote nodes are never considered active. This guarantees that they can never
	* become DC.
	*
	* \param[in] node Node to check
	*
	* \return \c true if the node is an active cluster node, or \c false otherwise
	*/
	bool
	pcmk__cluster_is_node_active(const crm_node_t *node)
	{
	const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();

	if ((node == NULL) \|\| pcmk_is_set(node->flags, crm_remote_node)) {
	return false;
	}

	switch (cluster_layer) {
	case pcmk_cluster_layer_corosync:
	#if SUPPORT_COROSYNC
	return pcmk__corosync_is_peer_active(node);
	#else
	break;
	#endif // SUPPORT_COROSYNC
	default:
	break;
	}

	crm_err("Unhandled cluster layer: %s",
	pcmk_cluster_layer_text(cluster_layer));
	return false;
	}

	/*!
	* \internal
	* \brief Check if a node's entry should be removed from the cluster node cache
	*
	* A node should be removed from the cache if it's inactive and matches another
	* \c crm_node_t (the search object). The node is considered a mismatch if any
	* of the following are true:
	* * The search object is \c NULL.
	* * The search object has an ID set and the cached node's ID does not match it.
	* * The search object does not have an ID set, and the cached node's name does
	* not match the search node's name. (If both names are \c NULL, it's a
	* match.)
	*
	* Otherwise, the node is considered a match.
	*
	* Note that if the search object has both an ID and a name set, the name is
	* ignored for matching purposes.
	*
	* \param[in] key Ignored
	* \param[in] value \c crm_node_t object from cluster node cache
	* \param[in] user_data \c crm_node_t object to match against (search object)
	*
	* \return \c TRUE if the node entry should be removed from \c crm_peer_cache,
	* or \c FALSE otherwise
	*/
	static gboolean
	should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
	{
	crm_node_t *node = value;
	crm_node_t *search = user_data;

	if (search == NULL) {
	return FALSE;
	}
	if ((search->id != 0) && (node->id != search->id)) {
	return FALSE;
	}
	if ((search->id == 0)
	&& !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
	// @TODO Consider name even if ID is set?
	return FALSE;
	}
	if (pcmk__cluster_is_node_active(value)) {
	return FALSE;
	}

	crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership "
	"cache",
	pcmk__s(node->uname, "(unknown)"), node->id);
	return TRUE;
	}

	/*!
	* \internal
	* \brief Remove one or more inactive nodes from the cluster node cache
	*
	* All inactive nodes matching \p id and \p node_name as described in
	* \c should_forget_cluster_node documentation are removed from the cache.
	*
	* If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed
	* from the cache regardless of ID and name. This differs from clearing the
	* cache, in that entries for active nodes are preserved.
	*
	* \param[in] id ID of node to remove from cache (0 to ignore)
	* \param[in] node_name Name of node to remove from cache (ignored if \p id is
	* nonzero)
	*
	* \note \p node_name is not modified directly, but it will be freed if it's a
	* pointer into a cache entry that is removed.
	*/
	void
	pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
	{
	crm_node_t search = { 0, };
	char *criterion = NULL; // For logging
	guint matches = 0;

	if (crm_peer_cache == NULL) {
	crm_trace("Membership cache not initialized, ignoring removal request");
	return;
	}

	search.id = id;
	search.uname = pcmk__str_copy(node_name); // May log after original freed

	if (id > 0) {
	criterion = crm_strdup_printf(PCMK_XA_ID "=%" PRIu32, id);

	} else if (node_name != NULL) {
	criterion = crm_strdup_printf(PCMK_XA_UNAME "=%s", node_name);
	}

	matches = g_hash_table_foreach_remove(crm_peer_cache,
	should_forget_cluster_node, &search);
	if (matches > 0) {
	if (criterion != NULL) {
	crm_notice("Removed %u inactive node%s with %s from the membership "
	"cache",
	matches, pcmk__plural_s(matches), criterion);
	} else {
	crm_notice("Removed all (%u) inactive cluster nodes from the "
	"membership cache",
	matches);
	}

	} else {
	crm_info("No inactive cluster nodes%s%s to remove from the membership "
	"cache",
	((criterion != NULL)? " with " : ""), pcmk__s(criterion, ""));
	}

	free(search.uname);
	free(criterion);
	}

	static void
	count_peer(gpointer key, gpointer value, gpointer user_data)
	{
	unsigned int *count = user_data;
	crm_node_t *node = value;

	if (pcmk__cluster_is_node_active(node)) {
	count = count + 1;
	}
	}

	/*!
	* \internal
	* \brief Get the number of active cluster nodes that have been seen
	*
	* Remote nodes are never considered active. This guarantees that they can never
	* become DC.
	*
	* \return Number of active nodes in the cluster node cache
	*/
	unsigned int
	pcmk__cluster_num_active_nodes(void)
	{
	unsigned int count = 0;

	if (crm_peer_cache != NULL) {
	g_hash_table_foreach(crm_peer_cache, count_peer, &count);
	}
	return count;
	}

	static void
	destroy_crm_node(gpointer data)
	{
	crm_node_t *node = data;

	crm_trace("Destroying entry for node %u: %s", node->id, node->uname);

	free(node->uname);
	free(node->state);
	free(node->uuid);
	free(node->expected);
	free(node->conn_host);
	free(node);
	}

	/*!
	* \internal
	* \brief Initialize node caches
	*/
	void
	pcmk__cluster_init_node_caches(void)
	{
	if (crm_peer_cache == NULL) {
	crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node);
	}

	if (crm_remote_peer_cache == NULL) {
	crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
	}

	if (cluster_node_cib_cache == NULL) {
	cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
	}
	}

	/*!
	* \internal
	* \brief Initialize node caches
	*/
	void
	pcmk__cluster_destroy_node_caches(void)
	{
	if (crm_peer_cache != NULL) {
	crm_trace("Destroying peer cache with %d members",
	g_hash_table_size(crm_peer_cache));
	g_hash_table_destroy(crm_peer_cache);
	crm_peer_cache = NULL;
	}

	if (crm_remote_peer_cache != NULL) {
	crm_trace("Destroying remote peer cache with %d members",
	pcmk__cluster_num_remote_nodes());
	g_hash_table_destroy(crm_remote_peer_cache);
	crm_remote_peer_cache = NULL;
	}

	if (cluster_node_cib_cache != NULL) {
	crm_trace("Destroying configured cluster node cache with %d members",
	g_hash_table_size(cluster_node_cib_cache));
	g_hash_table_destroy(cluster_node_cib_cache);
	cluster_node_cib_cache = NULL;
	}
	}

	static void (peer_status_callback)(enum crm_status_type, crm_node_t ,
	const void *) = NULL;

	/*!
	* \internal
	* \brief Set a client function that will be called after peer status changes
	*
	* \param[in] dispatch Pointer to function to use as callback
	*
	* \note Client callbacks should do only client-specific handling. Callbacks
	* must not add or remove entries in the peer caches.
	*/
	void
	pcmk__cluster_set_status_callback(void (*dispatch)(enum crm_status_type,
	crm_node_t , const void ))
	{
	// @TODO Improve documentation of peer_status_callback
	peer_status_callback = dispatch;
	}

	/*!
	* \internal
	* \brief Tell the library whether to automatically reap lost nodes
	*
	* If \c true (the default), calling \c crm_update_peer_proc() will also update
	* the peer state to \c CRM_NODE_MEMBER or \c CRM_NODE_LOST, and updating the
	* peer state will reap peers whose state changes to anything other than
	* \c CRM_NODE_MEMBER.
	*
	* Callers should leave this enabled unless they plan to manage the cache
	* separately on their own.
	*
	* \param[in] enable \c true to enable automatic reaping, \c false to disable
	*/
	void
	pcmk__cluster_set_autoreap(bool enable)
	{
	autoreap = enable;
	}

	static void
	dump_peer_hash(int level, const char *caller)
	{
	GHashTableIter iter;
	const char *id = NULL;
	crm_node_t *node = NULL;

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, (gpointer ) &id, (gpointer ) &node)) {
	do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
	}
	}

	static gboolean
	hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
	{
	return value == user_data;
	}

	/*!
	* \internal
	* \brief Search cluster member node cache
	*
	* \param[in] id If not 0, cluster node ID to search for
	* \param[in] uname If not NULL, node name to search for
	* \param[in] uuid If not NULL while id is 0, node UUID instead of cluster
	* node ID to search for
	*
	* \return Cluster node cache entry if found, otherwise NULL
	*/
	static crm_node_t *
	search_cluster_member_cache(unsigned int id, const char *uname,
	const char *uuid)
	{
	GHashTableIter iter;
	crm_node_t *node = NULL;
	crm_node_t *by_id = NULL;
	crm_node_t *by_name = NULL;

	pcmk__assert((id > 0) \|\| (uname != NULL));

	pcmk__cluster_init_node_caches();

	if (uname != NULL) {
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if(node->uname && strcasecmp(node->uname, uname) == 0) {
	crm_trace("Name match: %s = %p", node->uname, node);
	by_name = node;
	break;
	}
	}
	}

	if (id > 0) {
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if(node->id == id) {
	crm_trace("ID match: %u = %p", node->id, node);
	by_id = node;
	break;
	}
	}

	} else if (uuid != NULL) {
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	- if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
	- crm_trace("UUID match: %s = %p", node->uuid, node);
	+ const char *this_xml_id = pcmk__cluster_get_xml_id(node);
	+
	+ if (pcmk__str_eq(uuid, this_xml_id, pcmk__str_none)) {
	+ crm_trace("Found cluster node cache entry by XML ID %s",
	+ this_xml_id);
	by_id = node;
	break;
	}
	}
	}

	node = by_id; /* Good default */
	if(by_id == by_name) {
	/* Nothing to do if they match (both NULL counts) */
	crm_trace("Consistent: %p for %u/%s", by_id, id, uname);

	} else if(by_id == NULL && by_name) {
	crm_trace("Only one: %p for %u/%s", by_name, id, uname);

	if(id && by_name->id) {
	dump_peer_hash(LOG_WARNING, __func__);
	crm_crit("Node %u and %u share the same name '%s'",
	id, by_name->id, uname);
	node = NULL; /* Create a new one */

	} else {
	node = by_name;
	}

	} else if(by_name == NULL && by_id) {
	crm_trace("Only one: %p for %u/%s", by_id, id, uname);

	if(uname && by_id->uname) {
	dump_peer_hash(LOG_WARNING, __func__);
	crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
	uname, by_id->uname, id, uname);
	}

	} else if(uname && by_id->uname) {
	if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
	crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
	g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);

	} else {
	crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
	dump_peer_hash(LOG_INFO, __func__);
	crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
	TRUE);
	}

	} else if(id && by_name->id) {
	crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);

	} else {
	/* Simple merge */

	/* Only corosync-based clusters use node IDs. The functions that call
	* pcmk__update_peer_state() and crm_update_peer_proc() only know
	* nodeid, so 'by_id' is authoritative when merging.
	*/
	dump_peer_hash(LOG_DEBUG, __func__);

	crm_info("Merging %p into %p", by_name, by_id);
	g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
	}

	return node;
	}

	/*!
	* \internal
	* \brief Search caches for a node (cluster or Pacemaker Remote)
	*
	- * \param[in] id If not 0, cluster node ID to search for
	- * \param[in] uname If not NULL, node name to search for
	- * \param[in] flags Group of enum pcmk__node_search_flags
	+ * \param[in] id If not 0, cluster node ID to search for
	+ * \param[in] uname If not NULL, node name to search for
	+ * \param[in] xml_id If not NULL, CIB XML ID of node to search for
	+ * \param[in] flags Group of enum pcmk__node_search_flags
	*
	* \return Node cache entry if found, otherwise NULL
	*/
	crm_node_t *
	-pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
	+pcmk__search_node_caches(unsigned int id, const char *uname,
	+ const char *xml_id, uint32_t flags)
	{
	crm_node_t *node = NULL;

	- pcmk__assert((id > 0) \|\| (uname != NULL));
	+ pcmk__assert((id > 0) \|\| (uname != NULL) \|\| (xml_id != NULL));

	pcmk__cluster_init_node_caches();

	- if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) {
	- node = g_hash_table_lookup(crm_remote_peer_cache, uname);
	+ if (pcmk_is_set(flags, pcmk__node_search_remote)) {
	+ if (uname != NULL) {
	+ node = g_hash_table_lookup(crm_remote_peer_cache, uname);
	+ } else if (xml_id != NULL) {
	+ node = g_hash_table_lookup(crm_remote_peer_cache, xml_id);
	+ }
	}

	if ((node == NULL)
	&& pcmk_is_set(flags, pcmk__node_search_cluster_member)) {

	- node = search_cluster_member_cache(id, uname, NULL);
	+ node = search_cluster_member_cache(id, uname, xml_id);
	}

	if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
	- char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
	+ if (xml_id != NULL) {
	+ node = find_cib_cluster_node(xml_id, uname);
	+ } else {
	+ // Assumes XML ID is node ID as string (as with Corosync)
	+ char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);

	- node = find_cib_cluster_node(id_str, uname);
	- free(id_str);
	+ node = find_cib_cluster_node(id_str, uname);
	+ free(id_str);
	+ }
	}

	return node;
	}

	/*!
	* \internal
	* \brief Purge a node from cache (both cluster and Pacemaker Remote)
	*
	* \param[in] node_name If not NULL, purge only nodes with this name
	* \param[in] node_id If not 0, purge cluster nodes only if they have this ID
	*
	* \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged.
	* If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote
	* nodes that match \p node_name will be purged, and cluster nodes that
	* match both \p node_name and \p node_id will be purged.
	* \note The caller must be careful not to use \p node_name after calling this
	* function if it might be a pointer into a cache entry being removed.
	*/
	void
	pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
	{
	char *node_name_copy = NULL;

	if ((node_name == NULL) && (node_id == 0U)) {
	return;
	}

	// Purge from Pacemaker Remote node cache
	if ((node_name != NULL)
	&& (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) {
	/* node_name could be a pointer into the cache entry being purged,
	* so reassign it to a copy before the original gets freed
	*/
	node_name_copy = pcmk__str_copy(node_name);
	node_name = node_name_copy;

	crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
	g_hash_table_remove(crm_remote_peer_cache, node_name);
	}

	pcmk__cluster_forget_cluster_node(node_id, node_name);
	free(node_name_copy);
	}

	#if SUPPORT_COROSYNC
	static guint
	remove_conflicting_peer(crm_node_t *node)
	{
	int matches = 0;
	GHashTableIter iter;
	crm_node_t *existing_node = NULL;

	if (node->id == 0 \|\| node->uname == NULL) {
	return 0;
	}

	if (!pcmk__corosync_has_nodelist()) {
	return 0;
	}

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
	if (existing_node->id > 0
	&& existing_node->id != node->id
	&& existing_node->uname != NULL
	&& strcasecmp(existing_node->uname, node->uname) == 0) {

	if (pcmk__cluster_is_node_active(existing_node)) {
	continue;
	}

	crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
	existing_node->id, existing_node->uname, node->id);

	g_hash_table_iter_remove(&iter);
	matches++;
	}
	}

	return matches;
	}
	#endif

	/*!
	* \internal
	* \brief Get a cluster node cache entry, possibly creating one if not found
	*
	* If \c pcmk__node_search_cluster_member is set in \p flags, the return value
	* is guaranteed not to be \c NULL. A new cache entry is created if one does not
	* already exist.
	*
	* \param[in] id If not 0, cluster node ID to search for
	* \param[in] uname If not NULL, node name to search for
	* \param[in] uuid If not NULL while id is 0, node UUID instead of cluster
	* node ID to search for
	* \param[in] flags Group of enum pcmk__node_search_flags
	*
	* \return (Possibly newly created) cluster node cache entry
	*/
	/* coverity[-alloc] Memory is referenced in one or both hashtables */
	crm_node_t *
	pcmk__get_node(unsigned int id, const char uname, const char uuid,
	uint32_t flags)
	{
	crm_node_t *node = NULL;
	char *uname_lookup = NULL;

	pcmk__assert((id > 0) \|\| (uname != NULL));

	pcmk__cluster_init_node_caches();

	// Check the Pacemaker Remote node cache first
	if (pcmk_is_set(flags, pcmk__node_search_remote)) {
	node = g_hash_table_lookup(crm_remote_peer_cache, uname);
	if (node != NULL) {
	return node;
	}
	}

	if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
	return NULL;
	}

	node = search_cluster_member_cache(id, uname, uuid);

	/* if uname wasn't provided, and find_peer did not turn up a uname based on id.
	* we need to do a lookup of the node name using the id in the cluster membership. */
	if ((node == NULL \|\| node->uname == NULL) && (uname == NULL)) {
	uname_lookup = pcmk__cluster_node_name(id);
	}

	if (uname_lookup) {
	uname = uname_lookup;
	crm_trace("Inferred a name of '%s' for node %u", uname, id);

	/* try to turn up the node one more time now that we know the uname. */
	if (node == NULL) {
	node = search_cluster_member_cache(id, uname, uuid);
	}
	}

	if (node == NULL) {
	char *uniqueid = crm_generate_uuid();

	node = pcmk__assert_alloc(1, sizeof(crm_node_t));

	crm_info("Created entry %s/%p for node %s/%u (%d total)",
	uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
	g_hash_table_replace(crm_peer_cache, uniqueid, node);
	}

	if(id > 0 && uname && (node->id == 0 \|\| node->uname == NULL)) {
	crm_info("Node %u is now known as %s", id, uname);
	}

	if(id > 0 && node->id == 0) {
	node->id = id;
	}

	if (uname && (node->uname == NULL)) {
	update_peer_uname(node, uname);
	}

	if(node->uuid == NULL) {
	if (uuid == NULL) {
	- uuid = pcmk__cluster_node_uuid(node);
	+ uuid = pcmk__cluster_get_xml_id(node);
	}

	if (uuid) {
	crm_info("Node %u has uuid %s", id, uuid);

	} else {
	crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
	}
	}

	free(uname_lookup);

	return node;
	}

	/*!
	* \internal
	* \brief Update a node's uname
	*
	* \param[in,out] node Node object to update
	* \param[in] uname New name to set
	*
	* \note This function should not be called within a peer cache iteration,
	* because in some cases it can remove conflicting cache entries,
	* which would invalidate the iterator.
	*/
	static void
	update_peer_uname(crm_node_t node, const char uname)
	{
	CRM_CHECK(uname != NULL,
	crm_err("Bug: can't update node name without name"); return);
	CRM_CHECK(node != NULL,
	crm_err("Bug: can't update node name to %s without node", uname);
	return);

	if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
	crm_debug("Node uname '%s' did not change", uname);
	return;
	}

	for (const char c = uname; c; ++c) {
	if ((c >= 'A') && (c <= 'Z')) {
	crm_warn("Node names with capitals are discouraged, consider changing '%s'",
	uname);
	break;
	}
	}

	pcmk__str_update(&node->uname, uname);

	if (peer_status_callback != NULL) {
	peer_status_callback(crm_status_uname, node, NULL);
	}

	#if SUPPORT_COROSYNC
	if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)
	&& !pcmk_is_set(node->flags, crm_remote_node)) {

	remove_conflicting_peer(node);
	}
	#endif
	}

	/*!
	* \internal
	* \brief Get log-friendly string equivalent of a process flag
	*
	* \param[in] proc Process flag
	*
	* \return Log-friendly string equivalent of \p proc
	*/
	static inline const char *
	proc2text(enum crm_proc_flag proc)
	{
	const char *text = "unknown";

	switch (proc) {
	case crm_proc_none:
	text = "none";
	break;
	case crm_proc_cpg:
	text = "corosync-cpg";
	break;
	}
	return text;
	}

	/*!
	* \internal
	* \brief Update a node's process information (and potentially state)
	*
	* \param[in] source Caller's function name (for log messages)
	* \param[in,out] node Node object to update
	* \param[in] flag Bitmask of new process information
	* \param[in] status node status (online, offline, etc.)
	*
	* \return NULL if any node was reaped from peer caches, value of node otherwise
	*
	* \note If this function returns NULL, the supplied node object was likely
	* freed and should not be used again. This function should not be
	* called within a cache iteration if reaping is possible, otherwise
	* reaping could invalidate the iterator.
	*/
	crm_node_t *
	crm_update_peer_proc(const char source, crm_node_t node, uint32_t flag, const char *status)
	{
	uint32_t last = 0;
	gboolean changed = FALSE;

	CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
	source, proc2text(flag), status);
	return NULL);

	/* Pacemaker doesn't spawn processes on remote nodes */
	if (pcmk_is_set(node->flags, crm_remote_node)) {
	return node;
	}

	last = node->processes;
	if (status == NULL) {
	node->processes = flag;
	if (node->processes != last) {
	changed = TRUE;
	}

	} else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
	if ((node->processes & flag) != flag) {
	node->processes = pcmk__set_flags_as(__func__, __LINE__,
	LOG_TRACE, "Peer process",
	node->uname, node->processes,
	flag, "processes");
	changed = TRUE;
	}

	} else if (node->processes & flag) {
	node->processes = pcmk__clear_flags_as(__func__, __LINE__,
	LOG_TRACE, "Peer process",
	node->uname, node->processes,
	flag, "processes");
	changed = TRUE;
	}

	if (changed) {
	if (status == NULL && flag <= crm_proc_none) {
	crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
	node->id);
	} else {
	crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
	proc2text(flag), status);
	}

	if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
	node->when_online = time(NULL);

	} else {
	node->when_online = 0;
	}

	/* Call the client callback first, then update the peer state,
	* in case the node will be reaped
	*/
	if (peer_status_callback != NULL) {
	peer_status_callback(crm_status_processes, node, &last);
	}

	/* The client callback shouldn't touch the peer caches,
	* but as a safety net, bail if the peer cache was destroyed.
	*/
	if (crm_peer_cache == NULL) {
	return NULL;
	}

	if (autoreap) {
	const char *peer_state = NULL;

	if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
	peer_state = CRM_NODE_MEMBER;
	} else {
	peer_state = CRM_NODE_LOST;
	}
	node = pcmk__update_peer_state(__func__, node, peer_state, 0);
	}
	} else {
	crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
	proc2text(flag), status);
	}
	return node;
	}

	/*!
	* \internal
	* \brief Update a cluster node cache entry's expected join state
	*
	* \param[in] source Caller's function name (for logging)
	* \param[in,out] node Node to update
	* \param[in] expected Node's new join state
	*/
	void
	pcmk__update_peer_expected(const char source, crm_node_t node,
	const char *expected)
	{
	char *last = NULL;
	gboolean changed = FALSE;

	CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
	return);

	/* Remote nodes don't participate in joins */
	if (pcmk_is_set(node->flags, crm_remote_node)) {
	return;
	}

	last = node->expected;
	if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
	node->expected = strdup(expected);
	changed = TRUE;
	}

	if (changed) {
	crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
	expected, last);
	free(last);
	} else {
	crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
	node->id, expected);
	}
	}

	/*!
	* \internal
	* \brief Update a node's state and membership information
	*
	* \param[in] source Caller's function name (for log messages)
	* \param[in,out] node Node object to update
	* \param[in] state Node's new state
	* \param[in] membership Node's new membership ID
	* \param[in,out] iter If not NULL, pointer to node's peer cache iterator
	*
	* \return NULL if any node was reaped, value of node otherwise
	*
	* \note If this function returns NULL, the supplied node object was likely
	* freed and should not be used again. This function may be called from
	* within a peer cache iteration if the iterator is supplied.
	*/
	static crm_node_t *
	update_peer_state_iter(const char source, crm_node_t node, const char *state,
	uint64_t membership, GHashTableIter *iter)
	{
	gboolean is_member;

	CRM_CHECK(node != NULL,
	crm_err("Could not set state for unknown host to %s "
	CRM_XS " source=%s", state, source);
	return NULL);

	is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
	if (is_member) {
	node->when_lost = 0;
	if (membership) {
	node->last_seen = membership;
	}
	}

	if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
	char *last = node->state;

	if (is_member) {
	node->when_member = time(NULL);

	} else {
	node->when_member = 0;
	}

	node->state = strdup(state);
	crm_notice("Node %s state is now %s " CRM_XS
	" nodeid=%u previous=%s source=%s", node->uname, state,
	node->id, (last? last : "unknown"), source);
	if (peer_status_callback != NULL) {
	peer_status_callback(crm_status_nstate, node, last);
	}
	free(last);

	if (autoreap && !is_member
	&& !pcmk_is_set(node->flags, crm_remote_node)) {
	/* We only autoreap from the peer cache, not the remote peer cache,
	* because the latter should be managed only by
	* refresh_remote_nodes().
	*/
	if(iter) {
	crm_notice("Purged 1 peer with " PCMK_XA_ID
	"=%u and/or uname=%s from the membership cache",
	node->id, node->uname);
	g_hash_table_iter_remove(iter);

	} else {
	pcmk__cluster_forget_cluster_node(node->id, node->uname);
	}
	node = NULL;
	}

	} else {
	crm_trace("Node %s state is unchanged (%s) " CRM_XS
	" nodeid=%u source=%s", node->uname, state, node->id, source);
	}
	return node;
	}

	/*!
	* \brief Update a node's state and membership information
	*
	* \param[in] source Caller's function name (for log messages)
	* \param[in,out] node Node object to update
	* \param[in] state Node's new state
	* \param[in] membership Node's new membership ID
	*
	* \return NULL if any node was reaped, value of node otherwise
	*
	* \note If this function returns NULL, the supplied node object was likely
	* freed and should not be used again. This function should not be
	* called within a cache iteration if reaping is possible,
	* otherwise reaping could invalidate the iterator.
	*/
	crm_node_t *
	pcmk__update_peer_state(const char source, crm_node_t node,
	const char *state, uint64_t membership)
	{
	return update_peer_state_iter(source, node, state, membership, NULL);
	}

	/*!
	* \internal
	* \brief Reap all nodes from cache whose membership information does not match
	*
	* \param[in] membership Membership ID of nodes to keep
	*/
	void
	pcmk__reap_unseen_nodes(uint64_t membership)
	{
	GHashTableIter iter;
	crm_node_t *node = NULL;

	crm_trace("Reaping unseen nodes...");
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
	if (node->last_seen != membership) {
	if (node->state) {
	/*
	* Calling update_peer_state_iter() allows us to
	* remove the node from crm_peer_cache without
	* invalidating our iterator
	*/
	update_peer_state_iter(__func__, node, CRM_NODE_LOST,
	membership, &iter);

	} else {
	crm_info("State of node %s[%u] is still unknown",
	node->uname, node->id);
	}
	}
	}
	}

	static crm_node_t *
	find_cib_cluster_node(const char id, const char uname)
	{
	GHashTableIter iter;
	crm_node_t *node = NULL;
	crm_node_t *by_id = NULL;
	crm_node_t *by_name = NULL;

	if (uname) {
	g_hash_table_iter_init(&iter, cluster_node_cib_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if (node->uname && strcasecmp(node->uname, uname) == 0) {
	crm_trace("Name match: %s = %p", node->uname, node);
	by_name = node;
	break;
	}
	}
	}

	if (id) {
	g_hash_table_iter_init(&iter, cluster_node_cib_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	- if(strcasecmp(node->uuid, id) == 0) {
	+ if (pcmk__str_eq(id, pcmk__cluster_get_xml_id(node),
	+ pcmk__str_none)) {
	crm_trace("ID match: %s= %p", id, node);
	by_id = node;
	break;
	}
	}
	}

	node = by_id; /* Good default */
	if (by_id == by_name) {
	/* Nothing to do if they match (both NULL counts) */
	crm_trace("Consistent: %p for %s/%s", by_id, id, uname);

	} else if (by_id == NULL && by_name) {
	crm_trace("Only one: %p for %s/%s", by_name, id, uname);

	if (id) {
	node = NULL;

	} else {
	node = by_name;
	}

	} else if (by_name == NULL && by_id) {
	crm_trace("Only one: %p for %s/%s", by_id, id, uname);

	if (uname) {
	node = NULL;
	}

	} else if (uname && by_id->uname
	&& pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
	/* Multiple nodes have the same uname in the CIB.
	* Return by_id. */

	} else if (id && by_name->uuid
	- && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
	+ && pcmk__str_eq(id, by_name->uuid, pcmk__str_none)) {
	/* Multiple nodes have the same id in the CIB.
	* Return by_name. */
	node = by_name;

	} else {
	node = NULL;
	}

	if (node == NULL) {
	crm_debug("Couldn't find node%s%s%s%s",
	id? " " : "",
	id? id : "",
	uname? " with name " : "",
	uname? uname : "");
	}

	return node;
	}

	static void
	cluster_node_cib_cache_refresh_helper(xmlNode xml_node, void user_data)
	{
	const char *id = crm_element_value(xml_node, PCMK_XA_ID);
	const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
	crm_node_t * node = NULL;

	CRM_CHECK(id != NULL && uname !=NULL, return);
	node = find_cib_cluster_node(id, uname);

	if (node == NULL) {
	char *uniqueid = crm_generate_uuid();

	node = pcmk__assert_alloc(1, sizeof(crm_node_t));

	node->uname = pcmk__str_copy(uname);
	node->uuid = pcmk__str_copy(id);

	g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);

	} else if (pcmk_is_set(node->flags, crm_node_dirty)) {
	pcmk__str_update(&node->uname, uname);

	/* Node is in cache and hasn't been updated already, so mark it clean */
	clear_peer_flags(node, crm_node_dirty);
	}

	}

	static void
	refresh_cluster_node_cib_cache(xmlNode *cib)
	{
	pcmk__cluster_init_node_caches();

	g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);

	crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG,
	cluster_node_cib_cache_refresh_helper, NULL);

	// Remove all old cache entries that weren't seen in the CIB
	g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
	}

	void
	pcmk__refresh_node_caches_from_cib(xmlNode *cib)
	{
	refresh_remote_nodes(cib);
	refresh_cluster_node_cib_cache(cib);
	}

	// Deprecated functions kept only for backward API compatibility
	// LCOV_EXCL_START

	#include <crm/cluster/compat.h>

	int
	crm_terminate_member(int nodeid, const char uname, void unused)
	{
	return stonith_api_kick(nodeid, uname, 120, TRUE);
	}

	int
	crm_terminate_member_no_mainloop(int nodeid, const char uname, int connection)
	{
	return stonith_api_kick(nodeid, uname, 120, TRUE);
	}

	crm_node_t *
	crm_get_peer(unsigned int id, const char *uname)
	{
	return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster_member);
	}

	crm_node_t *
	crm_get_peer_full(unsigned int id, const char *uname, int flags)
	{
	return pcmk__get_node(id, uname, NULL, flags);
	}

	int
	crm_remote_peer_cache_size(void)
	{
	unsigned int count = pcmk__cluster_num_remote_nodes();

	return QB_MIN(count, INT_MAX);
	}

	void
	crm_remote_peer_cache_refresh(xmlNode *cib)
	{
	refresh_remote_nodes(cib);
	}

	crm_node_t *
	crm_remote_peer_get(const char *node_name)
	{
	return pcmk__cluster_lookup_remote_node(node_name);
	}

	void
	crm_remote_peer_cache_remove(const char *node_name)
	{
	pcmk__cluster_forget_remote_node(node_name);
	}

	gboolean
	crm_is_peer_active(const crm_node_t * node)
	{
	return pcmk__cluster_is_node_active(node);
	}

	guint
	crm_active_peers(void)
	{
	return pcmk__cluster_num_active_nodes();
	}

	guint
	reap_crm_member(uint32_t id, const char *name)
	{
	int matches = 0;
	crm_node_t search = { 0, };

	if (crm_peer_cache == NULL) {
	crm_trace("Membership cache not initialized, ignoring purge request");
	return 0;
	}

	search.id = id;
	search.uname = pcmk__str_copy(name);
	matches = g_hash_table_foreach_remove(crm_peer_cache,
	should_forget_cluster_node, &search);
	if(matches) {
	crm_notice("Purged %d peer%s with " PCMK_XA_ID
	"=%u%s%s from the membership cache",
	matches, pcmk__plural_s(matches), search.id,
	(search.uname? " and/or uname=" : ""),
	(search.uname? search.uname : ""));

	} else {
	crm_info("No peers with " PCMK_XA_ID
	"=%u%s%s to purge from the membership cache",
	search.id, (search.uname? " and/or uname=" : ""),
	(search.uname? search.uname : ""));
	}

	free(search.uname);
	return matches;
	}

	void
	crm_peer_init(void)
	{
	pcmk__cluster_init_node_caches();
	}

	void
	crm_peer_destroy(void)
	{
	pcmk__cluster_destroy_node_caches();
	}

	void
	crm_set_autoreap(gboolean enable)
	{
	pcmk__cluster_set_autoreap(enable);
	}

	void
	crm_set_status_callback(void (dispatch) (enum crm_status_type, crm_node_t , const void *))
	{
	pcmk__cluster_set_status_callback(dispatch);
	}

	// LCOV_EXCL_STOP
	// End deprecated API

File Metadata

Mime Type: text/x-diff
Expires: Sat, Jan 25, 11:49 AM (1 d, 19 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1305015
Default Alt Text: (678 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions