No OneTemporary
Actions

Size

112 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
	index b46d4ad4ed..49240ed067 100644
	--- a/daemons/attrd/attrd_corosync.c
	+++ b/daemons/attrd/attrd_corosync.c
	@@ -1,613 +1,611 @@
	/*
	* Copyright 2013-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <errno.h>
	#include <stdbool.h>
	#include <stdint.h>
	#include <stdlib.h>

	#include <crm/cluster.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/logging.h>
	#include <crm/common/results.h>
	#include <crm/common/strings_internal.h>
	#include <crm/common/xml.h>

	#include "pacemaker-attrd.h"

	static xmlNode *
	attrd_confirmation(int callid)
	{
	xmlNode *node = pcmk__xe_create(NULL, __func__);

	crm_xml_add(node, PCMK__XA_T, PCMK__VALUE_ATTRD);
	crm_xml_add(node, PCMK__XA_SRC, pcmk__cluster_local_node_name());
	crm_xml_add(node, PCMK_XA_TASK, PCMK__ATTRD_CMD_CONFIRM);
	crm_xml_add_int(node, PCMK__XA_CALL_ID, callid);

	return node;
	}

	static void
	attrd_peer_message(pcmk__node_status_t peer, xmlNode xml)
	{
	const char *election_op = crm_element_value(xml, PCMK__XA_CRM_TASK);

	if (election_op) {
	attrd_handle_election_op(peer, xml);
	return;
	}

	if (attrd_shutting_down(false)) {
	/* If we're shutting down, we want to continue responding to election
	* ops as long as we're a cluster member (because our vote may be
	* needed). Ignore all other messages.
	*/
	return;

	} else {
	pcmk__request_t request = {
	.ipc_client = NULL,
	.ipc_id = 0,
	.ipc_flags = 0,
	.peer = peer->name,
	.xml = xml,
	.call_options = 0,
	.result = PCMK__UNKNOWN_RESULT,
	};

	request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK);
	CRM_CHECK(request.op != NULL, return);

	attrd_handle_request(&request);

	/* Having finished handling the request, check to see if the originating
	* peer requested confirmation. If so, send that confirmation back now.
	*/
	if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) &&
	!pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) {
	int callid = 0;
	xmlNode *reply = NULL;

	/* Add the confirmation ID for the message we are confirming to the
	* response so the originating peer knows what they're a confirmation
	* for.
	*/
	crm_element_value_int(xml, PCMK__XA_CALL_ID, &callid);
	reply = attrd_confirmation(callid);

	/* And then send the confirmation back to the originating peer. This
	* ends up right back in this same function (attrd_peer_message) on the
	* peer where it will have to do something with a PCMK__XA_CONFIRM type
	* message.
	*/
	crm_debug("Sending %s a confirmation", peer->name);
	attrd_send_message(peer, reply, false);
	pcmk__xml_free(reply);
	}

	pcmk__reset_request(&request);
	}
	}

	static void
	attrd_cpg_dispatch(cpg_handle_t handle,
	const struct cpg_name *groupName,
	uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
	{
	- uint32_t kind = 0;
	xmlNode *xml = NULL;
	const char *from = NULL;
	- char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &kind, &from);
	+ char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from);

	if(data == NULL) {
	return;
	}

	- if (kind == crm_class_cluster) {
	- xml = pcmk__xml_parse(data);
	- }
	+ xml = pcmk__xml_parse(data);

	if (xml == NULL) {
	- crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data);
	+ crm_err("Bad message received from %s[%u]: '%.120s'",
	+ from, nodeid, data);
	} else {
	attrd_peer_message(pcmk__get_node(nodeid, from, NULL,
	pcmk__node_search_cluster_member),
	xml);
	}

	pcmk__xml_free(xml);
	free(data);
	}

	static void
	attrd_cpg_destroy(gpointer unused)
	{
	if (attrd_shutting_down(false)) {
	crm_info("Disconnected from Corosync process group");

	} else {
	crm_crit("Lost connection to Corosync process group, shutting down");
	attrd_exit_status = CRM_EX_DISCONNECT;
	attrd_shutdown(0);
	}
	}

	/*!
	* \internal
	* \brief Broadcast an update for a single attribute value
	*
	* \param[in] a Attribute to broadcast
	* \param[in] v Attribute value to broadcast
	*/
	void
	attrd_broadcast_value(const attribute_t a, const attribute_value_t v)
	{
	xmlNode *op = pcmk__xe_create(NULL, PCMK_XE_OP);

	crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
	attrd_add_value_xml(op, a, v, false);
	attrd_send_message(NULL, op, false);
	pcmk__xml_free(op);
	}

	#define state_text(state) pcmk__s((state), "in unknown state")

	static void
	attrd_peer_change_cb(enum pcmk__node_update kind, pcmk__node_status_t *peer,
	const void *data)
	{
	bool gone = false;
	bool is_remote = pcmk_is_set(peer->flags, pcmk__node_status_remote);

	switch (kind) {
	case pcmk__node_update_name:
	crm_debug("%s node %s is now %s",
	(is_remote? "Remote" : "Cluster"),
	peer->name, state_text(peer->state));
	break;

	case pcmk__node_update_processes:
	if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
	gone = true;
	}
	crm_debug("Node %s is %s a peer",
	peer->name, (gone? "no longer" : "now"));
	break;

	case pcmk__node_update_state:
	crm_debug("%s node %s is now %s (was %s)",
	(is_remote? "Remote" : "Cluster"),
	peer->name, state_text(peer->state), state_text(data));
	if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
	/* If we're the writer, send new peers a list of all attributes
	* (unless it's a remote node, which doesn't run its own attrd)
	*/
	if (attrd_election_won()
	&& !pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
	attrd_peer_sync(peer);
	}
	} else {
	// Remove all attribute values associated with lost nodes
	attrd_peer_remove(peer->name, false, "loss");
	gone = true;
	}
	break;
	}

	// Remove votes from cluster nodes that leave, in case election in progress
	if (gone && !is_remote) {
	attrd_remove_voter(peer);
	attrd_remove_peer_protocol_ver(peer->name);
	attrd_do_not_expect_from_peer(peer->name);
	}
	}

	static void
	record_peer_nodeid(attribute_value_t v, const char host)
	{
	pcmk__node_status_t *known_peer =
	pcmk__get_node(v->nodeid, host, NULL, pcmk__node_search_cluster_member);

	crm_trace("Learned %s has node id %s",
	known_peer->name, known_peer->xml_id);
	if (attrd_election_won()) {
	attrd_write_attributes(attrd_write_changed);
	}
	}

	#define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)")

	#define readable_peer(p) \
	(((p) == NULL)? "all peers" : pcmk__s((p)->name, "unknown peer"))

	static void
	update_attr_on_host(attribute_t a, const pcmk__node_status_t peer,
	const xmlNode xml, const char attr, const char *value,
	const char *host, bool filter)
	{
	int is_remote = 0;
	bool changed = false;
	attribute_value_t *v = NULL;

	// Create entry for value if not already existing
	v = g_hash_table_lookup(a->values, host);
	if (v == NULL) {
	v = pcmk__assert_alloc(1, sizeof(attribute_value_t));

	v->nodename = pcmk__str_copy(host);
	g_hash_table_replace(a->values, v->nodename, v);
	}

	// If value is for a Pacemaker Remote node, remember that
	crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
	if (is_remote) {
	attrd_set_value_flags(v, attrd_value_remote);
	CRM_ASSERT(pcmk__cluster_lookup_remote_node(host) != NULL);
	}

	// Check whether the value changed
	changed = !pcmk__str_eq(v->current, value, pcmk__str_casei);

	if (changed && filter && pcmk__str_eq(host, attrd_cluster->uname,
	pcmk__str_casei)) {
	/* Broadcast the local value for an attribute that differs from the
	* value provided in a peer's attribute synchronization response. This
	* ensures a node's values for itself take precedence and all peers are
	* kept in sync.
	*/
	v = g_hash_table_lookup(a->values, attrd_cluster->uname);
	crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
	attr, host, readable_value(v), value, peer->name);
	attrd_broadcast_value(a, v);

	} else if (changed) {
	crm_notice("Setting %s[%s]%s%s: %s -> %s "
	QB_XS " from %s with %s write delay",
	attr, host, a->set_type ? " in " : "",
	pcmk__s(a->set_type, ""), readable_value(v),
	pcmk__s(value, "(unset)"), peer->name,
	(a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
	pcmk__str_update(&v->current, value);
	attrd_set_attr_flags(a, attrd_attr_changed);

	if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)
	&& pcmk__str_eq(attr, PCMK__NODE_ATTR_SHUTDOWN, pcmk__str_none)) {

	if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) {
	attrd_set_requesting_shutdown();

	} else {
	attrd_clear_requesting_shutdown();
	}
	}

	// Write out new value or start dampening timer
	if (a->timeout_ms && a->timer) {
	crm_trace("Delaying write of %s %s for dampening",
	attr, pcmk__readable_interval(a->timeout_ms));
	mainloop_timer_start(a->timer);
	} else {
	attrd_write_or_elect_attribute(a);
	}

	} else {
	int is_force_write = 0;

	crm_element_value_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE,
	&is_force_write);

	if (is_force_write == 1 && a->timeout_ms && a->timer) {
	/* Save forced writing and set change flag. */
	/* The actual attribute is written by Writer after election. */
	crm_trace("%s[%s] from %s is unchanged (%s), forcing write",
	attr, host, peer->name, pcmk__s(value, "unset"));
	attrd_set_attr_flags(a, attrd_attr_force_write);
	} else {
	crm_trace("%s[%s] from %s is unchanged (%s)",
	attr, host, peer->name, pcmk__s(value, "unset"));
	}
	}

	// This allows us to later detect local values that peer doesn't know about
	attrd_set_value_flags(v, attrd_value_from_peer);

	/* If this is a cluster node whose node ID we are learning, remember it */
	if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote)
	&& (crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID,
	(int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
	record_peer_nodeid(v, host);
	}
	}

	static void
	attrd_peer_update_one(const pcmk__node_status_t peer, xmlNode xml,
	bool filter)
	{
	attribute_t *a = NULL;
	const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
	const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
	const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);

	if (attr == NULL) {
	crm_warn("Could not update attribute: peer did not specify name");
	return;
	}

	a = attrd_populate_attribute(xml, attr);
	if (a == NULL) {
	return;
	}

	if (host == NULL) {
	// If no host was specified, update all hosts
	GHashTableIter vIter;

	crm_debug("Setting %s for all hosts to %s", attr, value);
	pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_HOST_ID);
	g_hash_table_iter_init(&vIter, a->values);

	while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
	update_attr_on_host(a, peer, xml, attr, value, host, filter);
	}

	} else {
	// Update attribute value for the given host
	update_attr_on_host(a, peer, xml, attr, value, host, filter);
	}

	/* If this is a message from some attrd instance broadcasting its protocol
	* version, check to see if it's a new minimum version.
	*/
	if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) {
	attrd_update_minimum_protocol_ver(peer->name, value);
	}
	}

	static void
	broadcast_unseen_local_values(void)
	{
	GHashTableIter aIter;
	GHashTableIter vIter;
	attribute_t *a = NULL;
	attribute_value_t *v = NULL;
	xmlNode *sync = NULL;

	g_hash_table_iter_init(&aIter, attributes);
	while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {

	g_hash_table_iter_init(&vIter, a->values);
	while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {

	if (!pcmk_is_set(v->flags, attrd_value_from_peer)
	&& pcmk__str_eq(v->nodename, attrd_cluster->uname,
	pcmk__str_casei)) {
	crm_trace("* %s[%s]='%s' is local-only",
	a->id, v->nodename, readable_value(v));
	if (sync == NULL) {
	sync = pcmk__xe_create(NULL, __func__);
	crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
	}
	attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer);
	}
	}
	}

	if (sync != NULL) {
	crm_debug("Broadcasting local-only values");
	attrd_send_message(NULL, sync, false);
	pcmk__xml_free(sync);
	}
	}

	int
	attrd_cluster_connect(void)
	{
	int rc = pcmk_rc_ok;

	attrd_cluster = pcmk_cluster_new();

	pcmk_cluster_set_destroy_fn(attrd_cluster, attrd_cpg_destroy);
	pcmk_cpg_set_deliver_fn(attrd_cluster, attrd_cpg_dispatch);
	pcmk_cpg_set_confchg_fn(attrd_cluster, pcmk__cpg_confchg_cb);

	pcmk__cluster_set_status_callback(&attrd_peer_change_cb);

	rc = pcmk_cluster_connect(attrd_cluster);
	rc = pcmk_rc2legacy(rc);
	if (rc != pcmk_ok) {
	crm_err("Cluster connection failed");
	return rc;
	}
	return pcmk_ok;
	}

	void
	attrd_peer_clear_failure(pcmk__request_t *request)
	{
	xmlNode *xml = request->xml;
	const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
	const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
	const char *op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION);
	const char *interval_spec = crm_element_value(xml,
	PCMK__XA_ATTR_CLEAR_INTERVAL);
	guint interval_ms = 0U;
	char *attr = NULL;
	GHashTableIter iter;
	regex_t regex;

	pcmk__node_status_t *peer =
	pcmk__get_node(0, request->peer, NULL,
	pcmk__node_search_cluster_member);

	pcmk_parse_interval_spec(interval_spec, &interval_ms);

	if (attrd_failure_regex(&regex, rsc, op, interval_ms) != pcmk_ok) {
	crm_info("Ignoring invalid request to clear failures for %s",
	pcmk__s(rsc, "all resources"));
	return;
	}

	crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);

	/* Make sure value is not set, so we delete */
	pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE);

	g_hash_table_iter_init(&iter, attributes);
	while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) {
	if (regexec(&regex, attr, 0, NULL, 0) == 0) {
	crm_trace("Matched %s when clearing %s",
	attr, pcmk__s(rsc, "all resources"));
	crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr);
	attrd_peer_update(peer, xml, host, false);
	}
	}
	regfree(&regex);
	}

	/*!
	* \internal
	* \brief Load attributes from a peer sync response
	*
	* \param[in] peer Peer that sent sync response
	* \param[in] peer_won Whether peer is the attribute writer
	* \param[in,out] xml Request XML
	*/
	void
	attrd_peer_sync_response(const pcmk__node_status_t *peer, bool peer_won,
	xmlNode *xml)
	{
	crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
	peer->name);

	if (peer_won) {
	/* Initialize the "seen" flag for all attributes to cleared, so we can
	* detect attributes that local node has but the writer doesn't.
	*/
	attrd_clear_value_seen();
	}

	// Process each attribute update in the sync response
	for (xmlNode *child = pcmk__xe_first_child(xml, NULL, NULL, NULL);
	child != NULL; child = pcmk__xe_next(child)) {

	attrd_peer_update(peer, child,
	crm_element_value(child, PCMK__XA_ATTR_HOST), true);
	}

	if (peer_won) {
	/* If any attributes are still not marked as seen, the writer doesn't
	* know about them, so send all peers an update with them.
	*/
	broadcast_unseen_local_values();
	}
	}

	/*!
	* \internal
	* \brief Remove all attributes and optionally peer cache entries for a node
	*
	* \param[in] host Name of node to purge
	* \param[in] uncache If true, remove node from peer caches
	* \param[in] source Who requested removal (only used for logging)
	*/
	void
	attrd_peer_remove(const char host, bool uncache, const char source)
	{
	attribute_t *a = NULL;
	GHashTableIter aIter;

	CRM_CHECK(host != NULL, return);
	crm_notice("Removing all %s attributes for node %s "
	QB_XS " %s reaping node from cache",
	host, source, (uncache? "and" : "without"));

	g_hash_table_iter_init(&aIter, attributes);
	while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
	if(g_hash_table_remove(a->values, host)) {
	crm_debug("Removed %s[%s] for peer %s", a->id, host, source);
	}
	}

	if (uncache) {
	pcmk__purge_node_from_cache(host, 0);
	}
	}

	/*!
	* \internal
	* \brief Send all known attributes and values to a peer
	*
	* \param[in] peer Peer to send sync to (if NULL, broadcast to all peers)
	*/
	void
	attrd_peer_sync(pcmk__node_status_t *peer)
	{
	GHashTableIter aIter;
	GHashTableIter vIter;

	attribute_t *a = NULL;
	attribute_value_t *v = NULL;
	xmlNode *sync = pcmk__xe_create(NULL, __func__);

	crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);

	g_hash_table_iter_init(&aIter, attributes);
	while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
	g_hash_table_iter_init(&vIter, a->values);
	while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
	crm_debug("Syncing %s[%s]='%s' to %s",
	a->id, v->nodename, readable_value(v),
	readable_peer(peer));
	attrd_add_value_xml(sync, a, v, false);
	}
	}

	crm_debug("Syncing values to %s", readable_peer(peer));
	attrd_send_message(peer, sync, false);
	pcmk__xml_free(sync);
	}

	void
	attrd_peer_update(const pcmk__node_status_t peer, xmlNode xml,
	const char *host, bool filter)
	{
	bool handle_sync_point = false;

	CRM_CHECK((peer != NULL) && (xml != NULL), return);
	if (xml->children != NULL) {
	for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL, NULL);
	child != NULL; child = pcmk__xe_next_same(child)) {

	pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite);
	attrd_peer_update_one(peer, child, filter);

	if (attrd_request_has_sync_point(child)) {
	handle_sync_point = true;
	}
	}

	} else {
	attrd_peer_update_one(peer, xml, filter);

	if (attrd_request_has_sync_point(xml)) {
	handle_sync_point = true;
	}
	}

	/* If the update XML specified that the client wanted to wait for a sync
	* point, process that now.
	*/
	if (handle_sync_point) {
	crm_trace("Hit local sync point for attribute update");
	attrd_ack_waitlist_clients(attrd_sync_point_local, xml);
	}
	}
	diff --git a/daemons/based/pacemaker-based.c b/daemons/based/pacemaker-based.c
	index 999f737a4b..7d3c884c1c 100644
	--- a/daemons/based/pacemaker-based.c
	+++ b/daemons/based/pacemaker-based.c
	@@ -1,435 +1,433 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <stdio.h>
	#include <stdlib.h>
	#include <pwd.h>
	#include <grp.h>
	#include <bzlib.h>
	#include <sys/types.h>

	#include <glib.h>
	#include <libxml/tree.h>

	#include <crm/crm.h>
	#include <crm/cib/internal.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/cmdline_internal.h>
	#include <crm/common/mainloop.h>
	#include <crm/common/output_internal.h>
	#include <crm/common/xml.h>

	#include <pacemaker-based.h>

	#define SUMMARY "daemon for managing the configuration of a Pacemaker cluster"

	extern int init_remote_listener(int port, gboolean encrypted);
	gboolean cib_shutdown_flag = FALSE;
	int cib_status = pcmk_ok;

	pcmk_cluster_t *crm_cluster = NULL;

	GMainLoop *mainloop = NULL;
	gchar *cib_root = NULL;
	static gboolean preserve_status = FALSE;

	gboolean cib_writes_enabled = TRUE;
	gboolean stand_alone = FALSE;

	int remote_fd = 0;
	int remote_tls_fd = 0;

	GHashTable *config_hash = NULL;

	static void cib_init(void);
	void cib_shutdown(int nsig);
	static bool startCib(const char *filename);
	extern int write_cib_contents(gpointer p);

	static crm_exit_t exit_code = CRM_EX_OK;

	static void
	cib_enable_writes(int nsig)
	{
	crm_info("(Re)enabling disk writes");
	cib_writes_enabled = TRUE;
	}

	/*!
	* \internal
	* \brief Set up options, users, and groups for stand-alone mode
	*
	* \param[out] error GLib error object
	*
	* \return Standard Pacemaker return code
	*/
	static int
	setup_stand_alone(GError **error)
	{
	int rc = 0;
	struct passwd *pwentry = NULL;

	preserve_status = TRUE;
	cib_writes_enabled = FALSE;

	errno = 0;
	pwentry = getpwnam(CRM_DAEMON_USER);
	if (pwentry == NULL) {
	exit_code = CRM_EX_FATAL;
	if (errno != 0) {
	g_set_error(error, PCMK__EXITC_ERROR, exit_code,
	"Error getting password DB entry for %s: %s",
	CRM_DAEMON_USER, strerror(errno));
	return errno;
	}
	g_set_error(error, PCMK__EXITC_ERROR, exit_code,
	"Password DB entry for '%s' not found", CRM_DAEMON_USER);
	return ENXIO;
	}

	rc = setgid(pwentry->pw_gid);
	if (rc < 0) {
	exit_code = CRM_EX_FATAL;
	g_set_error(error, PCMK__EXITC_ERROR, exit_code,
	"Could not set group to %d: %s",
	pwentry->pw_gid, strerror(errno));
	return errno;
	}

	rc = initgroups(CRM_DAEMON_USER, pwentry->pw_gid);
	if (rc < 0) {
	exit_code = CRM_EX_FATAL;
	g_set_error(error, PCMK__EXITC_ERROR, exit_code,
	"Could not setup groups for user %d: %s",
	pwentry->pw_uid, strerror(errno));
	return errno;
	}

	rc = setuid(pwentry->pw_uid);
	if (rc < 0) {
	exit_code = CRM_EX_FATAL;
	g_set_error(error, PCMK__EXITC_ERROR, exit_code,
	"Could not set user to %d: %s",
	pwentry->pw_uid, strerror(errno));
	return errno;
	}
	return pcmk_rc_ok;
	}

	/* @COMPAT Deprecated since 2.1.8. Use pcmk_list_cluster_options() or
	* crm_attribute --list-options=cluster instead of querying daemon metadata.
	*/
	static int
	based_metadata(pcmk__output_t *out)
	{
	return pcmk__daemon_metadata(out, "pacemaker-based",
	"Cluster Information Base manager options",
	"Cluster options used by Pacemaker's Cluster "
	"Information Base manager",
	pcmk__opt_based);
	}

	static GOptionEntry entries[] = {
	{ "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone,
	"(Advanced use only) Run in stand-alone mode", NULL },

	{ "disk-writes", 'w', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE,
	&cib_writes_enabled,
	"(Advanced use only) Enable disk writes (enabled by default unless in "
	"stand-alone mode)", NULL },

	{ "cib-root", 'r', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME, &cib_root,
	"(Advanced use only) Directory where the CIB XML file should be located "
	"(default: " CRM_CONFIG_DIR ")", NULL },

	{ NULL }
	};

	static pcmk__supported_format_t formats[] = {
	PCMK__SUPPORTED_FORMAT_NONE,
	PCMK__SUPPORTED_FORMAT_TEXT,
	PCMK__SUPPORTED_FORMAT_XML,
	{ NULL, NULL, NULL }
	};

	static GOptionContext *
	build_arg_context(pcmk__common_args_t args, GOptionGroup *group)
	{
	GOptionContext *context = NULL;

	context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
	pcmk__add_main_args(context, entries);
	return context;
	}

	int
	main(int argc, char **argv)
	{
	int rc = pcmk_rc_ok;
	crm_ipc_t *old_instance = NULL;

	pcmk__output_t *out = NULL;

	GError *error = NULL;

	GOptionGroup *output_group = NULL;
	pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
	gchar **processed_args = pcmk__cmdline_preproc(argv, "r");
	GOptionContext *context = build_arg_context(args, &output_group);

	crm_log_preinit(NULL, argc, argv);

	pcmk__register_formats(output_group, formats);
	if (!g_option_context_parse_strv(context, &processed_args, &error)) {
	exit_code = CRM_EX_USAGE;
	goto done;
	}

	rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
	if (rc != pcmk_rc_ok) {
	exit_code = CRM_EX_ERROR;
	g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
	"Error creating output format %s: %s",
	args->output_ty, pcmk_rc_str(rc));
	goto done;
	}

	if (args->version) {
	out->version(out, false);
	goto done;
	}

	mainloop_add_signal(SIGTERM, cib_shutdown);
	mainloop_add_signal(SIGPIPE, cib_enable_writes);

	cib_writer = mainloop_add_trigger(G_PRIORITY_LOW, write_cib_contents, NULL);

	if ((g_strv_length(processed_args) >= 2)
	&& pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {

	rc = based_metadata(out);
	if (rc != pcmk_rc_ok) {
	exit_code = CRM_EX_FATAL;
	g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
	"Unable to display metadata: %s", pcmk_rc_str(rc));
	}
	goto done;
	}

	pcmk__cli_init_logging("pacemaker-based", args->verbosity);
	crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
	crm_notice("Starting Pacemaker CIB manager");

	old_instance = crm_ipc_new(PCMK__SERVER_BASED_RO, 0);
	if (old_instance == NULL) {
	/* crm_ipc_new() will have already logged an error message with
	* crm_err()
	*/
	exit_code = CRM_EX_FATAL;
	goto done;
	}

	if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) {
	/* IPC end-point already up */
	crm_ipc_close(old_instance);
	crm_ipc_destroy(old_instance);
	crm_err("pacemaker-based is already active, aborting startup");
	goto done;
	} else {
	/* not up or not authentic, we'll proceed either way */
	crm_ipc_destroy(old_instance);
	old_instance = NULL;
	}

	if (stand_alone) {
	rc = setup_stand_alone(&error);
	if (rc != pcmk_rc_ok) {
	goto done;
	}
	}

	if (cib_root == NULL) {
	cib_root = g_strdup(CRM_CONFIG_DIR);
	} else {
	crm_notice("Using custom config location: %s", cib_root);
	}

	if (!pcmk__daemon_can_write(cib_root, NULL)) {
	exit_code = CRM_EX_FATAL;
	crm_err("Terminating due to bad permissions on %s", cib_root);
	g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
	"Bad permissions on %s (see logs for details)", cib_root);
	goto done;
	}

	pcmk__cluster_init_node_caches();

	// Read initial CIB, connect to cluster, and start IPC servers
	cib_init();

	// Run the main loop
	mainloop = g_main_loop_new(NULL, FALSE);
	crm_notice("Pacemaker CIB manager successfully started and accepting connections");
	g_main_loop_run(mainloop);

	/* If main loop returned, clean up and exit. We disconnect in case
	* terminate_cib() was called with fast=-1.
	*/
	pcmk_cluster_disconnect(crm_cluster);
	pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm);

	done:
	g_strfreev(processed_args);
	pcmk__free_arg_context(context);

	pcmk__cluster_destroy_node_caches();

	if (config_hash != NULL) {
	g_hash_table_destroy(config_hash);
	}
	pcmk__client_cleanup();
	pcmk_cluster_free(crm_cluster);
	g_free(cib_root);

	pcmk__output_and_clear_error(&error, out);

	if (out != NULL) {
	out->finish(out, exit_code, true, NULL);
	pcmk__output_free(out);
	}
	pcmk__unregister_formats();
	crm_exit(exit_code);
	}

	#if SUPPORT_COROSYNC
	static void
	cib_cs_dispatch(cpg_handle_t handle,
	const struct cpg_name *groupName,
	uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
	{
	- uint32_t kind = 0;
	xmlNode *xml = NULL;
	const char *from = NULL;
	- char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &kind, &from);
	+ char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from);

	if(data == NULL) {
	return;
	}
	- if (kind == crm_class_cluster) {
	- xml = pcmk__xml_parse(data);
	- if (xml == NULL) {
	- crm_err("Invalid XML: '%.120s'", data);
	- free(data);
	- return;
	- }
	- crm_xml_add(xml, PCMK__XA_SRC, from);
	- cib_peer_callback(xml, NULL);
	+
	+ xml = pcmk__xml_parse(data);
	+ if (xml == NULL) {
	+ crm_err("Invalid XML: '%.120s'", data);
	+ free(data);
	+ return;
	}
	+ crm_xml_add(xml, PCMK__XA_SRC, from);
	+ cib_peer_callback(xml, NULL);

	pcmk__xml_free(xml);
	free(data);
	}

	static void
	cib_cs_destroy(gpointer user_data)
	{
	if (cib_shutdown_flag) {
	crm_info("Corosync disconnection complete");
	} else {
	crm_crit("Lost connection to cluster layer, shutting down");
	terminate_cib(__func__, CRM_EX_DISCONNECT);
	}
	}
	#endif

	static void
	cib_peer_update_callback(enum pcmk__node_update type,
	pcmk__node_status_t node, const void data)
	{
	switch (type) {
	case pcmk__node_update_name:
	case pcmk__node_update_state:
	if (cib_shutdown_flag && (pcmk__cluster_num_active_nodes() < 2)
	&& (pcmk__ipc_client_count() == 0)) {

	crm_info("No more peers");
	terminate_cib(__func__, -1);
	}
	break;

	default:
	break;
	}
	}

	static void
	cib_init(void)
	{
	crm_cluster = pcmk_cluster_new();

	#if SUPPORT_COROSYNC
	if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
	pcmk_cluster_set_destroy_fn(crm_cluster, cib_cs_destroy);
	pcmk_cpg_set_deliver_fn(crm_cluster, cib_cs_dispatch);
	pcmk_cpg_set_confchg_fn(crm_cluster, pcmk__cpg_confchg_cb);
	}
	#endif // SUPPORT_COROSYNC

	config_hash = pcmk__strkey_table(free, free);

	if (startCib("cib.xml") == FALSE) {
	crm_crit("Cannot start CIB... terminating");
	crm_exit(CRM_EX_NOINPUT);
	}

	if (!stand_alone) {
	pcmk__cluster_set_status_callback(&cib_peer_update_callback);

	if (pcmk_cluster_connect(crm_cluster) != pcmk_rc_ok) {
	crm_crit("Cannot sign in to the cluster... terminating");
	crm_exit(CRM_EX_FATAL);
	}
	}

	pcmk__serve_based_ipc(&ipcs_ro, &ipcs_rw, &ipcs_shm, &ipc_ro_callbacks,
	&ipc_rw_callbacks);

	if (stand_alone) {
	based_is_primary = true;
	}
	}

	static bool
	startCib(const char *filename)
	{
	gboolean active = FALSE;
	xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status);

	if (activateCibXml(cib, TRUE, "start") == 0) {
	int port = 0;

	active = TRUE;

	cib_read_config(config_hash, cib);

	pcmk__scan_port(crm_element_value(cib, PCMK_XA_REMOTE_TLS_PORT), &port);
	if (port >= 0) {
	remote_tls_fd = init_remote_listener(port, TRUE);
	}

	pcmk__scan_port(crm_element_value(cib, PCMK_XA_REMOTE_CLEAR_PORT),
	&port);
	if (port >= 0) {
	remote_fd = init_remote_listener(port, FALSE);
	}
	}
	return active;
	}
	diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c
	index 499dbf0bb1..02b0e823ad 100644
	--- a/daemons/controld/controld_corosync.c
	+++ b/daemons/controld/controld_corosync.c
	@@ -1,167 +1,163 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <inttypes.h> // PRIu32
	#include <stdio.h> // NULL
	#include <stdlib.h> // free(), etc.

	#include <sys/param.h>
	#include <sys/types.h>
	#include <sys/stat.h>

	#include <crm/crm.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/xml.h>

	#include <pacemaker-controld.h>

	#if SUPPORT_COROSYNC

	extern void post_cache_update(int seq);

	/* A_HA_CONNECT */

	static void
	crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName,
	uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
	{
	- uint32_t kind = 0;
	const char *from = NULL;
	- char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &kind, &from);
	+ char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from);
	+ pcmk__node_status_t *peer = NULL;
	+ xmlNode *xml = NULL;

	if(data == NULL) {
	return;
	}
	- if (kind == crm_class_cluster) {
	- pcmk__node_status_t *peer = NULL;
	- xmlNode *xml = pcmk__xml_parse(data);

	- if (xml == NULL) {
	- crm_err("Could not parse message content (%d): %.100s", kind, data);
	- free(data);
	- return;
	- }
	+ xml = pcmk__xml_parse(data);
	+ if (xml == NULL) {
	+ crm_err("Could not parse message content: %.100s", data);
	+ free(data);
	+ return;
	+ }

	- crm_xml_add(xml, PCMK__XA_SRC, from);
	-
	- peer = pcmk__get_node(0, from, NULL, pcmk__node_search_cluster_member);
	- if (!pcmk_is_set(peer->processes, crm_proc_cpg)) {
	- /* If we can still talk to our peer process on that node,
	- * then it must be part of the corosync membership
	- */
	- crm_warn("Receiving messages from a node we think is dead: "
	- "%s[%" PRIu32 "]",
	- peer->name, peer->cluster_layer_id);
	- crm_update_peer_proc(__func__, peer, crm_proc_cpg,
	- PCMK_VALUE_ONLINE);
	- }
	- crmd_ha_msg_filter(xml);
	- pcmk__xml_free(xml);
	- } else {
	- crm_err("Invalid message class (%d): %.100s", kind, data);
	+ crm_xml_add(xml, PCMK__XA_SRC, from);
	+
	+ peer = pcmk__get_node(0, from, NULL, pcmk__node_search_cluster_member);
	+ if (!pcmk_is_set(peer->processes, crm_proc_cpg)) {
	+ /* If we can still talk to our peer process on that node, then it must
	+ * be part of the corosync membership
	+ */
	+ crm_warn("Receiving messages from a node we think is dead: "
	+ "%s[%" PRIu32 "]",
	+ peer->name, peer->cluster_layer_id);
	+ crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_ONLINE);
	}
	+
	+ crmd_ha_msg_filter(xml);
	+ pcmk__xml_free(xml);
	free(data);
	}

	static gboolean
	crmd_quorum_callback(unsigned long long seq, gboolean quorate)
	{
	crm_update_quorum(quorate, FALSE);
	post_cache_update(seq);
	return TRUE;
	}

	static void
	crmd_cs_destroy(gpointer user_data)
	{
	if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) {
	crm_crit("Lost connection to cluster layer, shutting down");
	crmd_exit(CRM_EX_DISCONNECT);
	}
	}

	/*!
	* \brief Handle a Corosync notification of a CPG configuration change
	*
	* \param[in] handle CPG connection
	* \param[in] cpg_name CPG group name
	* \param[in] member_list List of current CPG members
	* \param[in] member_list_entries Number of entries in \p member_list
	* \param[in] left_list List of CPG members that left
	* \param[in] left_list_entries Number of entries in \p left_list
	* \param[in] joined_list List of CPG members that joined
	* \param[in] joined_list_entries Number of entries in \p joined_list
	*/
	static void
	cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name,
	const struct cpg_address *member_list,
	size_t member_list_entries,
	const struct cpg_address *left_list,
	size_t left_list_entries,
	const struct cpg_address *joined_list,
	size_t joined_list_entries)
	{
	/* When nodes leave CPG, the DC clears their transient node attributes.
	*
	* However if there is no DC, or the DC is among the nodes that left, each
	* remaining node needs to do the clearing, to ensure it gets done.
	* Otherwise, the attributes would persist when the nodes rejoin, which
	* could have serious consequences for unfencing, agents that use attributes
	* for internal logic, etc.
	*
	* Here, we set a global boolean if the DC is among the nodes that left, for
	* use by the peer callback.
	*/
	if (controld_globals.dc_name != NULL) {
	pcmk__node_status_t *peer = NULL;

	peer = pcmk__search_node_caches(0, controld_globals.dc_name,
	pcmk__node_search_cluster_member);
	if (peer != NULL) {
	for (int i = 0; i < left_list_entries; ++i) {
	if (left_list[i].nodeid == peer->cluster_layer_id) {
	controld_set_global_flags(controld_dc_left);
	break;
	}
	}
	}
	}

	// Process the change normally, which will call the peer callback as needed
	pcmk__cpg_confchg_cb(handle, cpg_name, member_list, member_list_entries,
	left_list, left_list_entries,
	joined_list, joined_list_entries);

	controld_clear_global_flags(controld_dc_left);
	}

	extern gboolean crm_connect_corosync(pcmk_cluster_t *cluster);

	gboolean
	crm_connect_corosync(pcmk_cluster_t *cluster)
	{
	if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
	pcmk__cluster_set_status_callback(&peer_update_callback);

	pcmk_cluster_set_destroy_fn(cluster, crmd_cs_destroy);
	pcmk_cpg_set_deliver_fn(cluster, crmd_cs_dispatch);
	pcmk_cpg_set_confchg_fn(cluster, cpg_membership_callback);

	if (pcmk_cluster_connect(cluster) == pcmk_rc_ok) {
	pcmk__corosync_quorum_connect(crmd_quorum_callback,
	crmd_cs_destroy);
	return TRUE;
	}
	}
	return FALSE;
	}

	#endif
	diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
	index 842f1c54e4..323fbd5440 100644
	--- a/daemons/fenced/pacemaker-fenced.c
	+++ b/daemons/fenced/pacemaker-fenced.c
	@@ -1,681 +1,678 @@
	/*
	* Copyright 2009-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <stdio.h>
	#include <sys/types.h>
	#include <sys/stat.h>
	#include <unistd.h>
	#include <sys/utsname.h>

	#include <stdlib.h>
	#include <errno.h>
	#include <fcntl.h>
	#include <inttypes.h> // PRIu32, PRIx32

	#include <crm/crm.h>
	#include <crm/common/cmdline_internal.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipc_internal.h>
	#include <crm/common/output_internal.h>

	#include <crm/stonith-ng.h>
	#include <crm/fencing/internal.h>
	#include <crm/common/xml.h>
	#include <crm/common/xml_internal.h>

	#include <crm/common/mainloop.h>

	#include <crm/cib/internal.h>

	#include <pacemaker-fenced.h>

	#define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster"

	char *stonith_our_uname = NULL;
	long long stonith_watchdog_timeout_ms = 0;
	GList *stonith_watchdog_targets = NULL;

	static GMainLoop *mainloop = NULL;

	gboolean stand_alone = FALSE;
	gboolean stonith_shutdown_flag = FALSE;

	static qb_ipcs_service_t *ipcs = NULL;
	static pcmk__output_t *out = NULL;

	pcmk__supported_format_t formats[] = {
	PCMK__SUPPORTED_FORMAT_NONE,
	PCMK__SUPPORTED_FORMAT_TEXT,
	PCMK__SUPPORTED_FORMAT_XML,
	{ NULL, NULL, NULL }
	};

	static struct {
	bool no_cib_connect;
	gchar **log_files;
	} options;

	crm_exit_t exit_code = CRM_EX_OK;

	static void stonith_cleanup(void);

	static int32_t
	st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
	{
	if (stonith_shutdown_flag) {
	crm_info("Ignoring new client [%d] during shutdown",
	pcmk__client_pid(c));
	return -ECONNREFUSED;
	}

	if (pcmk__new_client(c, uid, gid) == NULL) {
	return -ENOMEM;
	}
	return 0;
	}

	/* Exit code means? */
	static int32_t
	st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
	{
	uint32_t id = 0;
	uint32_t flags = 0;
	int call_options = 0;
	xmlNode *request = NULL;
	pcmk__client_t *c = pcmk__find_client(qbc);
	const char *op = NULL;

	if (c == NULL) {
	crm_info("Invalid client: %p", qbc);
	return 0;
	}

	request = pcmk__client_data2xml(c, data, &id, &flags);
	if (request == NULL) {
	pcmk__ipc_send_ack(c, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL);
	return 0;
	}


	op = crm_element_value(request, PCMK__XA_CRM_TASK);
	if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
	crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
	crm_xml_add(request, PCMK__XA_ST_OP, op);
	crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id);
	crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c));
	crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, stonith_our_uname);

	pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, request);
	pcmk__xml_free(request);
	return 0;
	}

	if (c->name == NULL) {
	const char *value = crm_element_value(request, PCMK__XA_ST_CLIENTNAME);

	c->name = crm_strdup_printf("%s.%u", pcmk__s(value, "unknown"), c->pid);
	}

	crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options);
	crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32
	" from client %s", flags, call_options, id, pcmk__client_name(c));

	if (pcmk_is_set(call_options, st_opt_sync_call)) {
	CRM_ASSERT(flags & crm_ipc_client_response);
	CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
	c->request_id = id; /* Reply only to the last one */
	}

	crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id);
	crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c));
	crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, stonith_our_uname);

	crm_log_xml_trace(request, "ipc-received");
	stonith_command(c, id, flags, request, NULL);

	pcmk__xml_free(request);
	return 0;
	}

	/* Error code means? */
	static int32_t
	st_ipc_closed(qb_ipcs_connection_t * c)
	{
	pcmk__client_t *client = pcmk__find_client(c);

	if (client == NULL) {
	return 0;
	}

	crm_trace("Connection %p closed", c);
	pcmk__free_client(client);

	/* 0 means: yes, go ahead and destroy the connection */
	return 0;
	}

	static void
	st_ipc_destroy(qb_ipcs_connection_t * c)
	{
	crm_trace("Connection %p destroyed", c);
	st_ipc_closed(c);
	}

	static void
	stonith_peer_callback(xmlNode * msg, void *private_data)
	{
	const char *remote_peer = crm_element_value(msg, PCMK__XA_SRC);
	const char *op = crm_element_value(msg, PCMK__XA_ST_OP);

	if (pcmk__str_eq(op, STONITH_OP_POKE, pcmk__str_none)) {
	return;
	}

	crm_log_xml_trace(msg, "Peer[inbound]");
	stonith_command(NULL, 0, 0, msg, remote_peer);
	}

	#if SUPPORT_COROSYNC
	static void
	stonith_peer_ais_callback(cpg_handle_t handle,
	const struct cpg_name *groupName,
	uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
	{
	- uint32_t kind = 0;
	xmlNode *xml = NULL;
	const char *from = NULL;
	- char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &kind, &from);
	+ char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from);

	if(data == NULL) {
	return;
	}
	- if (kind == crm_class_cluster) {
	- xml = pcmk__xml_parse(data);
	- if (xml == NULL) {
	- crm_err("Invalid XML: '%.120s'", data);
	- free(data);
	- return;
	- }
	- crm_xml_add(xml, PCMK__XA_SRC, from);
	- stonith_peer_callback(xml, NULL);
	+
	+ xml = pcmk__xml_parse(data);
	+ if (xml == NULL) {
	+ crm_err("Invalid XML: '%.120s'", data);
	+ free(data);
	+ return;
	}
	+ crm_xml_add(xml, PCMK__XA_SRC, from);
	+ stonith_peer_callback(xml, NULL);

	pcmk__xml_free(xml);
	free(data);
	- return;
	}

	static void
	stonith_peer_cs_destroy(gpointer user_data)
	{
	crm_crit("Lost connection to cluster layer, shutting down");
	stonith_shutdown(0);
	}
	#endif

	void
	do_local_reply(const xmlNode notify_src, pcmk__client_t client,
	int call_options)
	{
	/* send callback to originating child */
	int local_rc = pcmk_rc_ok;
	int rid = 0;
	uint32_t ipc_flags = crm_ipc_server_event;

	if (pcmk_is_set(call_options, st_opt_sync_call)) {
	CRM_LOG_ASSERT(client->request_id);
	rid = client->request_id;
	client->request_id = 0;
	ipc_flags = crm_ipc_flags_none;
	}

	local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags);
	if (local_rc == pcmk_rc_ok) {
	crm_trace("Sent response %d to client %s",
	rid, pcmk__client_name(client));
	} else {
	crm_warn("%synchronous reply to client %s failed: %s",
	(pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"),
	pcmk__client_name(client), pcmk_rc_str(local_rc));
	}
	}

	uint64_t
	get_stonith_flag(const char *name)
	{
	if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) {
	return st_callback_notify_fence;

	} else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) {
	return st_callback_device_add;

	} else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) {
	return st_callback_device_del;

	} else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY,
	pcmk__str_none)) {
	return st_callback_notify_history;

	} else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
	pcmk__str_none)) {
	return st_callback_notify_history_synced;

	}
	return st_callback_unknown;
	}

	static void
	stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
	{

	const xmlNode *update_msg = user_data;
	pcmk__client_t *client = value;
	const char *type = NULL;

	CRM_CHECK(client != NULL, return);
	CRM_CHECK(update_msg != NULL, return);

	type = crm_element_value(update_msg, PCMK__XA_SUBT);
	CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);

	if (client->ipcs == NULL) {
	crm_trace("Skipping client with NULL channel");
	return;
	}

	if (pcmk_is_set(client->flags, get_stonith_flag(type))) {
	int rc = pcmk__ipc_send_xml(client, 0, update_msg,
	crm_ipc_server_event);

	if (rc != pcmk_rc_ok) {
	crm_warn("%s notification of client %s failed: %s "
	QB_XS " id=%.8s rc=%d", type, pcmk__client_name(client),
	pcmk_rc_str(rc), client->id, rc);
	} else {
	crm_trace("Sent %s notification to client %s",
	type, pcmk__client_name(client));
	}
	}
	}

	void
	do_stonith_async_timeout_update(const char client_id, const char call_id, int timeout)
	{
	pcmk__client_t *client = NULL;
	xmlNode *notify_data = NULL;

	if (!timeout \|\| !call_id \|\| !client_id) {
	return;
	}

	client = pcmk__find_client_by_id(client_id);
	if (!client) {
	return;
	}

	notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_ASYNC_TIMEOUT_VALUE);
	crm_xml_add(notify_data, PCMK__XA_T, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE);
	crm_xml_add(notify_data, PCMK__XA_ST_CALLID, call_id);
	crm_xml_add_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout);

	crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);

	if (client) {
	pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event);
	}

	pcmk__xml_free(notify_data);
	}

	/*!
	* \internal
	* \brief Notify relevant IPC clients of a fencing operation result
	*
	* \param[in] type Notification type
	* \param[in] result Result of fencing operation (assume success if NULL)
	* \param[in] data If not NULL, add to notification as call data
	*/
	void
	fenced_send_notification(const char type, const pcmk__action_result_t result,
	xmlNode *data)
	{
	/* TODO: Standardize the contents of data */
	xmlNode *update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY);

	CRM_LOG_ASSERT(type != NULL);

	crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY);
	crm_xml_add(update_msg, PCMK__XA_SUBT, type);
	crm_xml_add(update_msg, PCMK__XA_ST_OP, type);
	stonith__xe_set_result(update_msg, result);

	if (data != NULL) {
	xmlNode *wrapper = pcmk__xe_create(update_msg, PCMK__XE_ST_CALLDATA);

	pcmk__xml_copy(wrapper, data);
	}

	crm_trace("Notifying clients");
	pcmk__foreach_ipc_client(stonith_notify_client, update_msg);
	pcmk__xml_free(update_msg);
	crm_trace("Notify complete");
	}

	/*!
	* \internal
	* \brief Send notifications for a configuration change to subscribed clients
	*
	* \param[in] op Notification type (\c STONITH_OP_DEVICE_ADD,
	* \c STONITH_OP_DEVICE_DEL, \c STONITH_OP_LEVEL_ADD, or
	* \c STONITH_OP_LEVEL_DEL)
	* \param[in] result Operation result
	* \param[in] desc Description of what changed (either device ID or string
	* representation of level
	* (<tt><target>[<level_index>]</tt>))
	*/
	void
	fenced_send_config_notification(const char *op,
	const pcmk__action_result_t *result,
	const char *desc)
	{
	xmlNode *notify_data = pcmk__xe_create(NULL, op);

	crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, desc);

	fenced_send_notification(op, result, notify_data);
	pcmk__xml_free(notify_data);
	}

	/*!
	* \internal
	* \brief Check whether a node does watchdog-fencing
	*
	* \param[in] node Name of node to check
	*
	* \return TRUE if node found in stonith_watchdog_targets
	* or stonith_watchdog_targets is empty indicating
	* all nodes are doing watchdog-fencing
	*/
	gboolean
	node_does_watchdog_fencing(const char *node)
	{
	return ((stonith_watchdog_targets == NULL) \|\|
	pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei));
	}

	void
	stonith_shutdown(int nsig)
	{
	crm_info("Terminating with %d clients", pcmk__ipc_client_count());
	stonith_shutdown_flag = TRUE;
	if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
	g_main_loop_quit(mainloop);
	}
	}

	static void
	stonith_cleanup(void)
	{
	fenced_cib_cleanup();
	if (ipcs) {
	qb_ipcs_destroy(ipcs);
	}

	pcmk__cluster_destroy_node_caches();
	pcmk__client_cleanup();
	free_stonith_remote_op_list();
	free_topology_list();
	free_device_list();
	free_metadata_cache();
	fenced_unregister_handlers();

	free(stonith_our_uname);
	stonith_our_uname = NULL;
	}

	static gboolean
	stand_alone_cpg_cb(const gchar option_name, const gchar optarg, gpointer data,
	GError **error)
	{
	stand_alone = FALSE;
	options.no_cib_connect = true;
	return TRUE;
	}

	struct qb_ipcs_service_handlers ipc_callbacks = {
	.connection_accept = st_ipc_accept,
	.connection_created = NULL,
	.msg_process = st_ipc_dispatch,
	.connection_closed = st_ipc_closed,
	.connection_destroyed = st_ipc_destroy
	};

	/*!
	* \internal
	* \brief Callback for peer status changes
	*
	* \param[in] type What changed
	* \param[in] node What peer had the change
	* \param[in] data Previous value of what changed
	*/
	static void
	st_peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node,
	const void *data)
	{
	if ((type != pcmk__node_update_processes)
	&& !pcmk_is_set(node->flags, pcmk__node_status_remote)) {
	/*
	* This is a hack until we can send to a nodeid and/or we fix node name lookups
	* These messages are ignored in stonith_peer_callback()
	*/
	xmlNode *query = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND);

	crm_xml_add(query, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
	crm_xml_add(query, PCMK__XA_ST_OP, STONITH_OP_POKE);

	crm_debug("Broadcasting our uname because of node %" PRIu32,
	node->cluster_layer_id);
	pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, query);

	pcmk__xml_free(query);
	}
	}

	/* @COMPAT Deprecated since 2.1.8. Use pcmk_list_fence_attrs() or
	* crm_resource --list-options=fencing instead of querying daemon metadata.
	*/
	static int
	fencer_metadata(void)
	{
	const char *name = "pacemaker-fenced";
	const char *desc_short = N_("Instance attributes available for all "
	"\"stonith\"-class resources");
	const char *desc_long = N_("Instance attributes available for all "
	"\"stonith\"-class resources and used by "
	"Pacemaker's fence daemon");

	return pcmk__daemon_metadata(out, name, desc_short, desc_long,
	pcmk__opt_fencing);
	}

	static GOptionEntry entries[] = {
	{ "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone,
	N_("Deprecated (will be removed in a future release)"), NULL },

	{ "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
	stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL },

	{ "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
	&options.log_files, N_("Send logs to the additional named logfile"), NULL },

	{ NULL }
	};

	static GOptionContext *
	build_arg_context(pcmk__common_args_t args, GOptionGroup *group)
	{
	GOptionContext *context = NULL;

	context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
	pcmk__add_main_args(context, entries);
	return context;
	}

	int
	main(int argc, char **argv)
	{
	int rc = pcmk_rc_ok;
	pcmk_cluster_t *cluster = NULL;
	crm_ipc_t *old_instance = NULL;

	GError *error = NULL;

	GOptionGroup *output_group = NULL;
	pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
	gchar **processed_args = pcmk__cmdline_preproc(argv, "l");
	GOptionContext *context = build_arg_context(args, &output_group);

	crm_log_preinit(NULL, argc, argv);

	pcmk__register_formats(output_group, formats);
	if (!g_option_context_parse_strv(context, &processed_args, &error)) {
	exit_code = CRM_EX_USAGE;
	goto done;
	}

	rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
	if (rc != pcmk_rc_ok) {
	exit_code = CRM_EX_ERROR;
	g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
	"Error creating output format %s: %s",
	args->output_ty, pcmk_rc_str(rc));
	goto done;
	}

	if (args->version) {
	out->version(out, false);
	goto done;
	}

	if ((g_strv_length(processed_args) >= 2)
	&& pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {

	rc = fencer_metadata();
	if (rc != pcmk_rc_ok) {
	exit_code = CRM_EX_FATAL;
	g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
	"Unable to display metadata: %s", pcmk_rc_str(rc));
	}
	goto done;
	}

	// Open additional log files
	pcmk__add_logfiles(options.log_files, out);

	crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE,
	(args->verbosity > 0), argc, argv, FALSE);

	crm_notice("Starting Pacemaker fencer");

	old_instance = crm_ipc_new("stonith-ng", 0);
	if (old_instance == NULL) {
	/* crm_ipc_new() will have already logged an error message with
	* crm_err()
	*/
	exit_code = CRM_EX_FATAL;
	goto done;
	}

	if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) {
	// IPC endpoint already up
	crm_ipc_close(old_instance);
	crm_ipc_destroy(old_instance);
	crm_err("pacemaker-fenced is already active, aborting startup");
	goto done;
	} else {
	// Not up or not authentic, we'll proceed either way
	crm_ipc_destroy(old_instance);
	old_instance = NULL;
	}

	mainloop_add_signal(SIGTERM, stonith_shutdown);

	pcmk__cluster_init_node_caches();

	rc = fenced_scheduler_init();
	if (rc != pcmk_rc_ok) {
	exit_code = CRM_EX_FATAL;
	g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
	"Error initializing scheduler data: %s", pcmk_rc_str(rc));
	goto done;
	}

	cluster = pcmk_cluster_new();

	if (!stand_alone) {
	#if SUPPORT_COROSYNC
	if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
	pcmk_cluster_set_destroy_fn(cluster, stonith_peer_cs_destroy);
	pcmk_cpg_set_deliver_fn(cluster, stonith_peer_ais_callback);
	pcmk_cpg_set_confchg_fn(cluster, pcmk__cpg_confchg_cb);
	}
	#endif // SUPPORT_COROSYNC

	pcmk__cluster_set_status_callback(&st_peer_update_callback);

	if (pcmk_cluster_connect(cluster) != pcmk_rc_ok) {
	exit_code = CRM_EX_FATAL;
	crm_crit("Cannot sign in to the cluster... terminating");
	goto done;
	}
	pcmk__str_update(&stonith_our_uname, cluster->uname);

	if (!options.no_cib_connect) {
	setup_cib();
	}

	} else {
	pcmk__str_update(&stonith_our_uname, "localhost");
	crm_warn("Stand-alone mode is deprecated and will be removed "
	"in a future release");
	}

	init_device_list();
	init_topology_list();

	pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks);

	// Create the mainloop and run it...
	mainloop = g_main_loop_new(NULL, FALSE);
	crm_notice("Pacemaker fencer successfully started and accepting connections");
	g_main_loop_run(mainloop);

	done:
	g_strfreev(processed_args);
	pcmk__free_arg_context(context);

	g_strfreev(options.log_files);

	stonith_cleanup();
	pcmk_cluster_free(cluster);
	fenced_scheduler_cleanup();

	pcmk__output_and_clear_error(&error, out);

	if (out != NULL) {
	out->finish(out, exit_code, true, NULL);
	pcmk__output_free(out);
	}

	pcmk__unregister_formats();
	crm_exit(exit_code);
	}
	diff --git a/include/crm/cluster.h b/include/crm/cluster.h
	index 81d36fae97..45b8e88954 100644
	--- a/include/crm/cluster.h
	+++ b/include/crm/cluster.h
	@@ -1,146 +1,135 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#ifndef PCMK__CRM_CLUSTER__H
	# define PCMK__CRM_CLUSTER__H

	# include <stdint.h> // uint32_t, uint64_t
	# include <glib.h> // gboolean, GHashTable
	# include <libxml/tree.h> // xmlNode
	# include <crm/common/xml.h>
	# include <crm/common/util.h>

	#ifdef __cplusplus
	extern "C" {
	#endif

	# if SUPPORT_COROSYNC
	# include <corosync/cpg.h>
	# endif

	// @COMPAT Make this internal when we can break API backward compatibility
	//! \deprecated Do not use (public access will be removed in a future release)
	extern GHashTable *crm_peer_cache;

	// @COMPAT Make this internal when we can break API backward compatibility
	//! \deprecated Do not use (public access will be removed in a future release)
	extern GHashTable *crm_remote_peer_cache;

	// @COMPAT Make this internal when we can break API backward compatibility
	//! \deprecated Do not use (public access will be removed in a future release)
	extern unsigned long long crm_peer_seq;

	// @COMPAT Make this internal when we can break API backward compatibility
	//! \deprecated Do not use (public access will be removed in a future release)
	#define CRM_NODE_LOST "lost"

	// @COMPAT Make this internal when we can break API backward compatibility
	//! \deprecated Do not use (public access will be removed in a future release)
	#define CRM_NODE_MEMBER "member"

	// @COMPAT Make this internal when we can break API backward compatibility
	//!@{
	//! \deprecated Do not use (public access will be removed in a future release)
	enum crm_join_phase {
	/* @COMPAT: crm_join_nack_quiet can be replaced by
	* pcmk__node_status_t:user_data at a compatibility break
	*/
	//! Not allowed to join, but don't send a nack message
	crm_join_nack_quiet = -2,

	crm_join_nack = -1,
	crm_join_none = 0,
	crm_join_welcomed = 1,
	crm_join_integrated = 2,
	crm_join_finalized = 3,
	crm_join_confirmed = 4,
	};
	//!@}

	// Implementation of pcmk_cluster_t
	// @COMPAT Make this internal when we can break API backward compatibility
	//!@{
	//! \deprecated Do not use (public access will be removed in a future release)
	struct crm_cluster_s {
	char *uuid;
	char *uname;
	uint32_t nodeid;

	// NOTE: sbd (as of at least 1.5.2) uses this
	//! \deprecated Call pcmk_cluster_set_destroy_fn() to set this
	void (*destroy) (gpointer);

	# if SUPPORT_COROSYNC
	/* @TODO When we can break public API compatibility, make these members a
	* separate struct and use void *cluster_data here instead, to abstract the
	* cluster layer further.
	*/
	struct cpg_name group;

	// NOTE: sbd (as of at least 1.5.2) uses this
	/*!
	* \deprecated Call pcmk_cpg_set_deliver_fn() and pcmk_cpg_set_confchg_fn()
	* to set these
	*/
	cpg_callbacks_t cpg;

	cpg_handle_t cpg_handle;
	# endif

	};
	//!@}

	//! Connection to a cluster layer
	typedef struct crm_cluster_s pcmk_cluster_t;

	int pcmk_cluster_connect(pcmk_cluster_t *cluster);
	int pcmk_cluster_disconnect(pcmk_cluster_t *cluster);

	pcmk_cluster_t *pcmk_cluster_new(void);
	void pcmk_cluster_free(pcmk_cluster_t *cluster);

	int pcmk_cluster_set_destroy_fn(pcmk_cluster_t cluster, void (fn)(gpointer));
	#if SUPPORT_COROSYNC
	int pcmk_cpg_set_deliver_fn(pcmk_cluster_t *cluster, cpg_deliver_fn_t fn);
	int pcmk_cpg_set_confchg_fn(pcmk_cluster_t *cluster, cpg_confchg_fn_t fn);
	#endif // SUPPORT_COROSYNC

	-/* @COMPAT Make this internal when we can break API backward compatibility. Also
	- * evaluate whether we can drop this entirely. Since 2.0.0, we have sent only
	- * messages with crm_class_cluster.
	- */
	-//!@{
	-//! \deprecated Do not use (public access will be removed in a future release)
	-enum crm_ais_msg_class {
	- crm_class_cluster = 0,
	-};
	-//!@}
	-
	/*!
	* \enum pcmk_cluster_layer
	* \brief Types of cluster layer
	*/
	enum pcmk_cluster_layer {
	pcmk_cluster_layer_unknown = 1, //!< Unknown cluster layer
	pcmk_cluster_layer_invalid = 2, //!< Invalid cluster layer
	pcmk_cluster_layer_corosync = 32, //!< Corosync Cluster Engine
	};

	enum pcmk_cluster_layer pcmk_get_cluster_layer(void);
	const char *pcmk_cluster_layer_text(enum pcmk_cluster_layer layer);

	#ifdef __cplusplus
	}
	#endif

	#if !defined(PCMK_ALLOW_DEPRECATED) \|\| (PCMK_ALLOW_DEPRECATED == 1)
	#include <crm/cluster/compat.h>
	#endif

	#endif
	diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
	index d5904d3ba1..21d3c1cfff 100644
	--- a/include/crm/cluster/internal.h
	+++ b/include/crm/cluster/internal.h
	@@ -1,310 +1,309 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#ifndef PCMK__CRM_CLUSTER_INTERNAL__H
	#define PCMK__CRM_CLUSTER_INTERNAL__H

	#include <stdbool.h>
	#include <stdint.h> // uint32_t, uint64_t

	#include <glib.h> // gboolean

	#include <crm/cluster.h>

	#ifdef __cplusplus
	extern "C" {
	#endif

	/*!
	* \internal
	* \enum pcmk__cluster_msg
	* \brief Types of message sent via the cluster layer
	*/
	enum pcmk__cluster_msg {
	pcmk__cluster_msg_unknown,
	pcmk__cluster_msg_attrd,
	pcmk__cluster_msg_based,
	pcmk__cluster_msg_controld,
	pcmk__cluster_msg_execd,
	pcmk__cluster_msg_fenced,
	};

	enum crm_proc_flag {
	/* @COMPAT When pcmk__node_status_t:processes is made internal, we can merge
	* this into node flags or turn it into a boolean. Until then, in theory
	* something could depend on these particular numeric values.
	*/
	crm_proc_none = 0x00000001,

	// Cluster layers
	crm_proc_cpg = 0x04000000,
	};

	/*!
	* \internal
	* \enum pcmk__node_status_flags
	* \brief Boolean flags for a \c pcmk__node_status_t object
	*
	* Some flags may not be related to status specifically. However, we keep these
	* separate from <tt>enum pcmk__node_flags</tt> because they're used with
	* different object types.
	*/
	enum pcmk__node_status_flags {
	/*!
	* Node is a Pacemaker Remote node and should not be considered for cluster
	* membership
	*/
	pcmk__node_status_remote = (UINT32_C(1) << 0),

	//! Node's cache entry is dirty
	pcmk__node_status_dirty = (UINT32_C(1) << 1),
	};

	// Used with node cache search functions
	enum pcmk__node_search_flags {
	//! Does not affect search
	pcmk__node_search_none = 0,

	//! Search for cluster nodes from membership cache
	pcmk__node_search_cluster_member = (1 << 0),

	//! Search for remote nodes
	pcmk__node_search_remote = (1 << 1),

	//! Search for cluster member nodes and remote nodes
	pcmk__node_search_any = pcmk__node_search_cluster_member
	\|pcmk__node_search_remote,

	//! Search for cluster nodes from CIB (as of last cache refresh)
	pcmk__node_search_cluster_cib = (1 << 2),
	};

	/*!
	* \internal
	* \enum pcmk__node_update
	* \brief Type of update to a \c pcmk__node_status_t object
	*/
	enum pcmk__node_update {
	pcmk__node_update_name, //!< Node name updated
	pcmk__node_update_state, //!< Node connection state updated
	pcmk__node_update_processes, //!< Node process group membership updated
	};

	//! Node status data (may be a cluster node or a Pacemaker Remote node)
	typedef struct pcmk__node_status {
	//! Node name as known to cluster layer, or Pacemaker Remote node name
	char *name;

	/* @COMPAT This is less than ideal since the value is not a valid XML ID
	* (for Corosync, it's the string equivalent of the node's numeric node ID,
	* but XML IDs can't start with a number) and the three elements should have
	* different IDs.
	*
	* Ideally, we would use something like node-NODEID, node_state-NODEID, and
	* transient_attributes-NODEID as the element IDs. Unfortunately changing it
	* would be impractical due to backward compatibility; older nodes in a
	* rolling upgrade will always write and expect the value in the old format.
	*/

	/*!
	* Value of the PCMK_XA_ID XML attribute to use with the node's
	* PCMK_XE_NODE, PCMK_XE_NODE_STATE, and PCMK_XE_TRANSIENT_ATTRIBUTES
	* XML elements in the CIB
	*/
	char *xml_id;

	char *state; // @TODO change to enum

	//! Group of <tt>enum pcmk__node_status_flags</tt>
	uint32_t flags;

	/*!
	* Most recent cluster membership in which node was seen (0 for Pacemaker
	* Remote nodes)
	*/
	uint64_t membership_id;

	uint32_t processes; // @TODO most not needed, merge into flags

	/* @TODO When we can break public API compatibility, we can make the rest of
	* these members separate structs and use void *cluster_data and
	* void *user_data here instead, to abstract the cluster layer further.
	*/

	// Only used by controller
	enum crm_join_phase join;
	char *expected;

	time_t peer_lost;
	char *conn_host;

	time_t when_member; // Since when node has been a cluster member
	time_t when_online; // Since when peer has been online in CPG

	/* @TODO The following are currently needed only by the Corosync stack.
	* Eventually consider moving them to a cluster-layer-specific data object.
	*/
	uint32_t cluster_layer_id; //!< Cluster-layer numeric node ID
	time_t when_lost; //!< When CPG membership was last lost
	} pcmk__node_status_t;

	/*!
	* \internal
	* \brief Return the process bit corresponding to the current cluster stack
	*
	* \return Process flag if detectable, otherwise 0
	*/
	static inline uint32_t
	crm_get_cluster_proc(void)
	{
	switch (pcmk_get_cluster_layer()) {
	case pcmk_cluster_layer_corosync:
	return crm_proc_cpg;

	default:
	break;
	}
	return crm_proc_none;
	}

	/*!
	* \internal
	* \brief Get log-friendly string description of a Corosync return code
	*
	* \param[in] error Corosync return code
	*
	* \return Log-friendly string description corresponding to \p error
	*/
	static inline const char *
	pcmk__cs_err_str(int error)
	{
	# if SUPPORT_COROSYNC
	switch (error) {
	case CS_OK: return "OK";
	case CS_ERR_LIBRARY: return "Library error";
	case CS_ERR_VERSION: return "Version error";
	case CS_ERR_INIT: return "Initialization error";
	case CS_ERR_TIMEOUT: return "Timeout";
	case CS_ERR_TRY_AGAIN: return "Try again";
	case CS_ERR_INVALID_PARAM: return "Invalid parameter";
	case CS_ERR_NO_MEMORY: return "No memory";
	case CS_ERR_BAD_HANDLE: return "Bad handle";
	case CS_ERR_BUSY: return "Busy";
	case CS_ERR_ACCESS: return "Access error";
	case CS_ERR_NOT_EXIST: return "Doesn't exist";
	case CS_ERR_NAME_TOO_LONG: return "Name too long";
	case CS_ERR_EXIST: return "Exists";
	case CS_ERR_NO_SPACE: return "No space";
	case CS_ERR_INTERRUPT: return "Interrupt";
	case CS_ERR_NAME_NOT_FOUND: return "Name not found";
	case CS_ERR_NO_RESOURCES: return "No resources";
	case CS_ERR_NOT_SUPPORTED: return "Not supported";
	case CS_ERR_BAD_OPERATION: return "Bad operation";
	case CS_ERR_FAILED_OPERATION: return "Failed operation";
	case CS_ERR_MESSAGE_ERROR: return "Message error";
	case CS_ERR_QUEUE_FULL: return "Queue full";
	case CS_ERR_QUEUE_NOT_AVAILABLE: return "Queue not available";
	case CS_ERR_BAD_FLAGS: return "Bad flags";
	case CS_ERR_TOO_BIG: return "Too big";
	case CS_ERR_NO_SECTIONS: return "No sections";
	}
	# endif
	return "Corosync error";
	}

	# if SUPPORT_COROSYNC

	#if 0
	/* This is the new way to do it, but we still support all Corosync 2 versions,
	* and this isn't always available. A better alternative here would be to check
	* for support in the configure script and enable this conditionally.
	*/
	#define pcmk__init_cmap(handle) cmap_initialize_map((handle), CMAP_MAP_ICMAP)
	#else
	#define pcmk__init_cmap(handle) cmap_initialize(handle)
	#endif

	char *pcmk__corosync_cluster_name(void);
	bool pcmk__corosync_add_nodes(xmlNode *xml_parent);

	void pcmk__cpg_confchg_cb(cpg_handle_t handle,
	const struct cpg_name *group_name,
	const struct cpg_address *member_list,
	size_t member_list_entries,
	const struct cpg_address *left_list,
	size_t left_list_entries,
	const struct cpg_address *joined_list,
	size_t joined_list_entries);

	char *pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id,
	- uint32_t pid, void content, uint32_t kind,
	- const char **from);
	+ uint32_t pid, void content, const char *from);

	# endif

	const char pcmk__cluster_node_uuid(pcmk__node_status_t node);
	char *pcmk__cluster_node_name(uint32_t nodeid);
	const char *pcmk__cluster_local_node_name(void);
	const char pcmk__node_name_from_uuid(const char uuid);

	pcmk__node_status_t crm_update_peer_proc(const char source,
	pcmk__node_status_t *peer,
	uint32_t flag, const char *status);
	pcmk__node_status_t pcmk__update_peer_state(const char source,
	pcmk__node_status_t *node,
	const char *state,
	uint64_t membership);

	void pcmk__update_peer_expected(const char source, pcmk__node_status_t node,
	const char *expected);
	void pcmk__reap_unseen_nodes(uint64_t ring_id);

	void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
	gboolean),
	void (*destroy) (gpointer));

	enum pcmk__cluster_msg pcmk__cluster_parse_msg_type(const char *text);
	bool pcmk__cluster_send_message(const pcmk__node_status_t *node,
	enum pcmk__cluster_msg service,
	const xmlNode *data);

	// Membership

	bool pcmk__cluster_has_quorum(void);

	void pcmk__cluster_init_node_caches(void);
	void pcmk__cluster_destroy_node_caches(void);

	void pcmk__cluster_set_autoreap(bool enable);
	void pcmk__cluster_set_status_callback(void (*dispatch)(enum pcmk__node_update,
	pcmk__node_status_t *,
	const void *));

	bool pcmk__cluster_is_node_active(const pcmk__node_status_t *node);
	unsigned int pcmk__cluster_num_active_nodes(void);
	unsigned int pcmk__cluster_num_remote_nodes(void);

	pcmk__node_status_t pcmk__cluster_lookup_remote_node(const char node_name);
	void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name);
	void pcmk__cluster_forget_remote_node(const char *node_name);
	pcmk__node_status_t *pcmk__search_node_caches(unsigned int id,
	const char *uname,
	uint32_t flags);
	void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id);

	void pcmk__refresh_node_caches_from_cib(xmlNode *cib);

	pcmk__node_status_t pcmk__get_node(unsigned int id, const char uname,
	const char *uuid, uint32_t flags);

	#ifdef __cplusplus
	}
	#endif

	#endif // PCMK__CRM_CLUSTER_INTERNAL__H
	diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c
	index 726e136831..2bda2a1b37 100644
	--- a/lib/cluster/cpg.c
	+++ b/lib/cluster/cpg.c
	@@ -1,1065 +1,1058 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <arpa/inet.h>
	#include <inttypes.h> // PRIu32
	#include <netdb.h>
	#include <netinet/in.h>
	#include <stdbool.h>
	#include <stdint.h> // uint32_t
	#include <sys/socket.h>
	#include <sys/types.h> // size_t
	#include <sys/utsname.h>

	#include <bzlib.h>
	#include <corosync/corodefs.h>
	#include <corosync/corotypes.h>
	#include <corosync/hdb.h>
	#include <corosync/cpg.h>
	#include <qb/qbipc_common.h>
	#include <qb/qbipcc.h>
	#include <qb/qbutil.h>

	#include <crm/cluster/internal.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipc_internal.h> // PCMK__SPECIAL_PID
	#include <crm/common/mainloop.h>
	#include <crm/common/xml.h>

	#include "crmcluster_private.h"

	/* @TODO Once we can update the public API to require pcmk_cluster_t* in more
	* functions, we can ditch this in favor of cluster->cpg_handle.
	*/
	static cpg_handle_t pcmk_cpg_handle = 0;

	// @TODO These could be moved to pcmk_cluster_t* at that time as well
	static bool cpg_evicted = false;
	static GList *cs_message_queue = NULL;
	static int cs_message_timer = 0;

	struct pcmk__cpg_host_s {
	uint32_t id;
	uint32_t pid;
	enum pcmk__cluster_msg type;
	uint32_t size;
	char uname[MAX_NAME];
	} __attribute__ ((packed));

	typedef struct pcmk__cpg_host_s pcmk__cpg_host_t;

	struct pcmk__cpg_msg_s {
	struct qb_ipc_response_header header __attribute__ ((aligned(8)));
	uint32_t id;
	gboolean is_compressed;

	pcmk__cpg_host_t host;
	pcmk__cpg_host_t sender;

	uint32_t size;
	uint32_t compressed_size;
	/* 584 bytes */
	char data[0];

	} __attribute__ ((packed));

	typedef struct pcmk__cpg_msg_s pcmk__cpg_msg_t;

	static void crm_cs_flush(gpointer data);

	#define msg_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size)

	#define cs_repeat(rc, counter, max, code) do { \
	rc = code; \
	if ((rc == CS_ERR_TRY_AGAIN) \|\| (rc == CS_ERR_QUEUE_FULL)) { \
	counter++; \
	crm_debug("Retrying operation after %ds", counter); \
	sleep(counter); \
	} else { \
	break; \
	} \
	} while (counter < max)

	/*!
	* \internal
	* \brief Get the local Corosync node ID (via CPG)
	*
	* \param[in] handle CPG connection to use (or 0 to use new connection)
	*
	* \return Corosync ID of local node (or 0 if not known)
	*/
	uint32_t
	pcmk__cpg_local_nodeid(cpg_handle_t handle)
	{
	cs_error_t rc = CS_OK;
	int retries = 0;
	static uint32_t local_nodeid = 0;
	cpg_handle_t local_handle = handle;
	cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0};
	int fd = -1;
	uid_t found_uid = 0;
	gid_t found_gid = 0;
	pid_t found_pid = 0;
	int rv = 0;

	if (local_nodeid != 0) {
	return local_nodeid;
	}

	if (handle == 0) {
	crm_trace("Creating connection");
	cs_repeat(rc, retries, 5,
	cpg_model_initialize(&local_handle, CPG_MODEL_V1,
	(cpg_model_data_t *) &cpg_model_info,
	NULL));
	if (rc != CS_OK) {
	crm_err("Could not connect to the CPG API: %s (%d)",
	cs_strerror(rc), rc);
	return 0;
	}

	rc = cpg_fd_get(local_handle, &fd);
	if (rc != CS_OK) {
	crm_err("Could not obtain the CPG API connection: %s (%d)",
	cs_strerror(rc), rc);
	goto bail;
	}

	// CPG provider run as root (at least in given user namespace)?
	rv = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0, &found_pid,
	&found_uid, &found_gid);
	if (rv == 0) {
	crm_err("CPG provider is not authentic:"
	" process %lld (uid: %lld, gid: %lld)",
	(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
	(long long) found_uid, (long long) found_gid);
	goto bail;

	} else if (rv < 0) {
	crm_err("Could not verify authenticity of CPG provider: %s (%d)",
	strerror(-rv), -rv);
	goto bail;
	}
	}

	if (rc == CS_OK) {
	retries = 0;
	crm_trace("Performing lookup");
	cs_repeat(rc, retries, 5, cpg_local_get(local_handle, &local_nodeid));
	}

	if (rc != CS_OK) {
	crm_err("Could not get local node id from the CPG API: %s (%d)",
	pcmk__cs_err_str(rc), rc);
	}

	bail:
	if (handle == 0) {
	crm_trace("Closing connection");
	cpg_finalize(local_handle);
	}
	crm_debug("Local nodeid is %u", local_nodeid);
	return local_nodeid;
	}

	/*!
	* \internal
	* \brief Callback function for Corosync message queue timer
	*
	* \param[in] data CPG handle
	*
	* \return FALSE (to indicate to glib that timer should not be removed)
	*/
	static gboolean
	crm_cs_flush_cb(gpointer data)
	{
	cs_message_timer = 0;
	crm_cs_flush(data);
	return FALSE;
	}

	// Send no more than this many CPG messages in one flush
	#define CS_SEND_MAX 200

	/*!
	* \internal
	* \brief Send messages in Corosync CPG message queue
	*
	* \param[in] data CPG handle
	*/
	static void
	crm_cs_flush(gpointer data)
	{
	unsigned int sent = 0;
	guint queue_len = 0;
	cs_error_t rc = 0;
	cpg_handle_t handle = (cpg_handle_t ) data;

	if (*handle == 0) {
	crm_trace("Connection is dead");
	return;
	}

	queue_len = g_list_length(cs_message_queue);
	if (((queue_len % 1000) == 0) && (queue_len > 1)) {
	crm_err("CPG queue has grown to %d", queue_len);

	} else if (queue_len == CS_SEND_MAX) {
	crm_warn("CPG queue has grown to %d", queue_len);
	}

	if (cs_message_timer != 0) {
	/* There is already a timer, wait until it goes off */
	crm_trace("Timer active %d", cs_message_timer);
	return;
	}

	while ((cs_message_queue != NULL) && (sent < CS_SEND_MAX)) {
	struct iovec *iov = cs_message_queue->data;

	rc = cpg_mcast_joined(*handle, CPG_TYPE_AGREED, iov, 1);
	if (rc != CS_OK) {
	break;
	}

	sent++;
	crm_trace("CPG message sent, size=%llu",
	(unsigned long long) iov->iov_len);

	cs_message_queue = g_list_remove(cs_message_queue, iov);
	free(iov->iov_base);
	free(iov);
	}

	queue_len -= sent;
	do_crm_log((queue_len > 5)? LOG_INFO : LOG_TRACE,
	"Sent %u CPG message%s (%d still queued): %s (rc=%d)",
	sent, pcmk__plural_s(sent), queue_len, pcmk__cs_err_str(rc),
	(int) rc);

	if (cs_message_queue) {
	uint32_t delay_ms = 100;
	if (rc != CS_OK) {
	/* Proportionally more if sending failed but cap at 1s */
	delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len));
	}
	cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, data);
	}
	}

	/*!
	* \internal
	* \brief Dispatch function for CPG handle
	*
	* \param[in,out] user_data Cluster object
	*
	* \return 0 on success, -1 on error (per mainloop_io_t interface)
	*/
	static int
	pcmk_cpg_dispatch(gpointer user_data)
	{
	cs_error_t rc = CS_OK;
	pcmk_cluster_t cluster = (pcmk_cluster_t ) user_data;

	rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ONE);
	if (rc != CS_OK) {
	crm_err("Connection to the CPG API failed: %s (%d)",
	pcmk__cs_err_str(rc), rc);
	cpg_finalize(cluster->cpg_handle);
	cluster->cpg_handle = 0;
	return -1;

	} else if (cpg_evicted) {
	crm_err("Evicted from CPG membership");
	return -1;
	}
	return 0;
	}

	static inline const char *
	ais_dest(const pcmk__cpg_host_t *host)
	{
	return (host->size > 0)? host->uname : "<all>";
	}

	static inline const char *
	msg_type2text(enum pcmk__cluster_msg type)
	{
	switch (type) {
	case pcmk__cluster_msg_attrd:
	return "attrd";
	case pcmk__cluster_msg_based:
	return "cib";
	case pcmk__cluster_msg_controld:
	return "crmd";
	case pcmk__cluster_msg_execd:
	return "lrmd";
	case pcmk__cluster_msg_fenced:
	return "stonith-ng";
	default:
	return "unknown";
	}
	}

	/*!
	* \internal
	* \brief Check whether a Corosync CPG message is valid
	*
	* \param[in] msg Corosync CPG message to check
	*
	* \return true if \p msg is valid, otherwise false
	*/
	static bool
	check_message_sanity(const pcmk__cpg_msg_t *msg)
	{
	int32_t payload_size = msg->header.size - sizeof(pcmk__cpg_msg_t);

	if (payload_size < 1) {
	crm_err("%sCPG message %d from %s invalid: "
	"Claimed size of %d bytes is too small "
	QB_XS " from %s[%u] to %s@%s",
	(msg->is_compressed? "Compressed " : ""),
	msg->id, ais_dest(&(msg->sender)),
	(int) msg->header.size,
	msg_type2text(msg->sender.type), msg->sender.pid,
	msg_type2text(msg->host.type), ais_dest(&(msg->host)));
	return false;
	}

	if (msg->header.error != CS_OK) {
	crm_err("%sCPG message %d from %s invalid: "
	"Sender indicated error %d "
	QB_XS " from %s[%u] to %s@%s",
	(msg->is_compressed? "Compressed " : ""),
	msg->id, ais_dest(&(msg->sender)),
	msg->header.error,
	msg_type2text(msg->sender.type), msg->sender.pid,
	msg_type2text(msg->host.type), ais_dest(&(msg->host)));
	return false;
	}

	if (msg_data_len(msg) != payload_size) {
	crm_err("%sCPG message %d from %s invalid: "
	"Total size %d inconsistent with payload size %d "
	QB_XS " from %s[%u] to %s@%s",
	(msg->is_compressed? "Compressed " : ""),
	msg->id, ais_dest(&(msg->sender)),
	(int) msg->header.size, (int) msg_data_len(msg),
	msg_type2text(msg->sender.type), msg->sender.pid,
	msg_type2text(msg->host.type), ais_dest(&(msg->host)));
	return false;
	}

	if (!msg->is_compressed &&
	/* msg->size != (strlen(msg->data) + 1) would be a stronger check,
	* but checking the last byte or two should be quick
	*/
	(((msg->size > 1) && (msg->data[msg->size - 2] == '\0'))
	\|\| (msg->data[msg->size - 1] != '\0'))) {
	crm_err("CPG message %d from %s invalid: "
	"Payload does not end at byte %llu "
	QB_XS " from %s[%u] to %s@%s",
	msg->id, ais_dest(&(msg->sender)),
	(unsigned long long) msg->size,
	msg_type2text(msg->sender.type), msg->sender.pid,
	msg_type2text(msg->host.type), ais_dest(&(msg->host)));
	return false;
	}

	crm_trace("Verified %d-byte %sCPG message %d from %s[%u]@%s to %s@%s",
	(int) msg->header.size, (msg->is_compressed? "compressed " : ""),
	msg->id, msg_type2text(msg->sender.type), msg->sender.pid,
	ais_dest(&(msg->sender)),
	msg_type2text(msg->host.type), ais_dest(&(msg->host)));
	return true;
	}

	/*!
	* \internal
	* \brief Extract text data from a Corosync CPG message
	*
	* \param[in] handle CPG connection (to get local node ID if not known)
	* \param[in] sender_id Corosync ID of node that sent message
	* \param[in] pid Process ID of message sender (for logging only)
	* \param[in,out] content CPG message
	- * \param[out] kind If not \c NULL, will be set to CPG header ID
	- * (which should be an <tt>enum crm_ais_msg_class</tt>
	- * value, currently always \c crm_class_cluster)
	* \param[out] from If not \c NULL, will be set to sender uname
	* (valid for the lifetime of \p content)
	*
	* \return Newly allocated string with message data
	*
	* \note The caller is responsible for freeing the return value using \c free().
	*/
	char *
	pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id, uint32_t pid,
	- void content, uint32_t kind, const char **from)
	+ void content, const char *from)
	{
	char *data = NULL;
	pcmk__cpg_msg_t *msg = content;

	if (handle != 0) {
	// Do filtering and field massaging
	uint32_t local_nodeid = pcmk__cpg_local_nodeid(handle);
	const char *local_name = pcmk__cluster_local_node_name();

	if ((msg->sender.id != 0) && (msg->sender.id != sender_id)) {
	crm_err("Nodeid mismatch from %" PRIu32 ".%" PRIu32
	": claimed nodeid=%" PRIu32,
	sender_id, pid, msg->sender.id);
	return NULL;
	}
	if ((msg->host.id != 0) && (local_nodeid != msg->host.id)) {
	crm_trace("Not for us: %" PRIu32" != %" PRIu32,
	msg->host.id, local_nodeid);
	return NULL;
	}
	if ((msg->host.size > 0)
	&& !pcmk__str_eq(msg->host.uname, local_name, pcmk__str_casei)) {

	crm_trace("Not for us: %s != %s", msg->host.uname, local_name);
	return NULL;
	}

	msg->sender.id = sender_id;
	if (msg->sender.size == 0) {
	const pcmk__node_status_t *peer =
	pcmk__get_node(sender_id, NULL, NULL,
	pcmk__node_search_cluster_member);

	if (peer->name == NULL) {
	crm_err("No node name for peer with nodeid=%u", sender_id);

	} else {
	crm_notice("Fixing node name for peer with nodeid=%u",
	sender_id);
	msg->sender.size = strlen(peer->name);
	memset(msg->sender.uname, 0, MAX_NAME);
	memcpy(msg->sender.uname, peer->name, msg->sender.size);
	}
	}
	}

	crm_trace("Got new%s message (size=%d, %d, %d)",
	msg->is_compressed ? " compressed" : "",
	msg_data_len(msg), msg->size, msg->compressed_size);

	- if (kind != NULL) {
	- *kind = msg->header.id;
	- }
	if (from != NULL) {
	*from = msg->sender.uname;
	}

	if (msg->is_compressed && (msg->size > 0)) {
	int rc = BZ_OK;
	char *uncompressed = NULL;
	unsigned int new_size = msg->size + 1;

	if (!check_message_sanity(msg)) {
	goto badmsg;
	}

	crm_trace("Decompressing message data");
	uncompressed = pcmk__assert_alloc(1, new_size);
	rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data,
	msg->compressed_size, 1, 0);

	rc = pcmk__bzlib2rc(rc);

	if (rc != pcmk_rc_ok) {
	crm_err("Decompression failed: %s " QB_XS " rc=%d",
	pcmk_rc_str(rc), rc);
	free(uncompressed);
	goto badmsg;
	}

	CRM_ASSERT(new_size == msg->size);

	data = uncompressed;

	} else if (!check_message_sanity(msg)) {
	goto badmsg;

	} else {
	data = strdup(msg->data);
	}

	// Is this necessary?
	pcmk__get_node(msg->sender.id, msg->sender.uname, NULL,
	pcmk__node_search_cluster_member);

	crm_trace("Payload: %.200s", data);
	return data;

	badmsg:
	crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):"
	" min=%d, total=%d, size=%d, bz2_size=%d",
	msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type),
	ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
	msg->sender.pid, (int)sizeof(pcmk__cpg_msg_t),
	msg->header.size, msg->size, msg->compressed_size);

	free(data);
	return NULL;
	}

	/*!
	* \internal
	* \brief Compare cpg_address objects by node ID
	*
	* \param[in] first First cpg_address structure to compare
	* \param[in] second Second cpg_address structure to compare
	*
	* \return Negative number if first's node ID is lower,
	* positive number if first's node ID is greater,
	* or 0 if both node IDs are equal
	*/
	static int
	cmp_member_list_nodeid(const void first, const void second)
	{
	const struct cpg_address const a = ((const struct cpg_address **) first),
	const b = ((const struct cpg_address **) second);
	if (a->nodeid < b->nodeid) {
	return -1;
	} else if (a->nodeid > b->nodeid) {
	return 1;
	}
	/* don't bother with "reason" nor "pid" */
	return 0;
	}

	/*!
	* \internal
	* \brief Get a readable string equivalent of a cpg_reason_t value
	*
	* \param[in] reason CPG reason value
	*
	* \return Readable string suitable for logging
	*/
	static const char *
	cpgreason2str(cpg_reason_t reason)
	{
	switch (reason) {
	case CPG_REASON_JOIN: return " via cpg_join";
	case CPG_REASON_LEAVE: return " via cpg_leave";
	case CPG_REASON_NODEDOWN: return " via cluster exit";
	case CPG_REASON_NODEUP: return " via cluster join";
	case CPG_REASON_PROCDOWN: return " for unknown reason";
	default: break;
	}
	return "";
	}

	/*!
	* \internal
	* \brief Get a log-friendly node name
	*
	* \param[in] peer Node to check
	*
	* \return Node's uname, or readable string if not known
	*/
	static inline const char *
	peer_name(const pcmk__node_status_t *peer)
	{
	return (peer != NULL)? pcmk__s(peer->name, "peer node") : "unknown node";
	}

	/*!
	* \internal
	* \brief Process a CPG peer's leaving the cluster
	*
	* \param[in] cpg_group_name CPG group name (for logging)
	* \param[in] event_counter Event number (for logging)
	* \param[in] local_nodeid Node ID of local node
	* \param[in] cpg_peer CPG peer that left
	* \param[in] sorted_member_list List of remaining members, qsort()-ed by ID
	* \param[in] member_list_entries Number of entries in \p sorted_member_list
	*/
	static void
	node_left(const char *cpg_group_name, int event_counter,
	uint32_t local_nodeid, const struct cpg_address *cpg_peer,
	const struct cpg_address **sorted_member_list,
	size_t member_list_entries)
	{
	pcmk__node_status_t *peer =
	pcmk__search_node_caches(cpg_peer->nodeid, NULL,
	pcmk__node_search_cluster_member);
	const struct cpg_address **rival = NULL;

	/* Most CPG-related Pacemaker code assumes that only one process on a node
	* can be in the process group, but Corosync does not impose this
	* limitation, and more than one can be a member in practice due to a
	* daemon attempting to start while another instance is already running.
	*
	* Check for any such duplicate instances, because we don't want to process
	* their leaving as if our actual peer left. If the peer that left still has
	* an entry in sorted_member_list (with a different PID), we will ignore the
	* leaving.
	*
	* @TODO Track CPG members' PIDs so we can tell exactly who left.
	*/
	if (peer != NULL) {
	rival = bsearch(&cpg_peer, sorted_member_list, member_list_entries,
	sizeof(const struct cpg_address *),
	cmp_member_list_nodeid);
	}

	if (rival == NULL) {
	crm_info("Group %s event %d: %s (node %u pid %u) left%s",
	cpg_group_name, event_counter, peer_name(peer),
	cpg_peer->nodeid, cpg_peer->pid,
	cpgreason2str(cpg_peer->reason));
	if (peer != NULL) {
	crm_update_peer_proc(__func__, peer, crm_proc_cpg,
	PCMK_VALUE_OFFLINE);
	}
	} else if (cpg_peer->nodeid == local_nodeid) {
	crm_warn("Group %s event %d: duplicate local pid %u left%s",
	cpg_group_name, event_counter,
	cpg_peer->pid, cpgreason2str(cpg_peer->reason));
	} else {
	crm_warn("Group %s event %d: "
	"%s (node %u) duplicate pid %u left%s (%u remains)",
	cpg_group_name, event_counter, peer_name(peer),
	cpg_peer->nodeid, cpg_peer->pid,
	cpgreason2str(cpg_peer->reason), (*rival)->pid);
	}
	}

	/*!
	* \internal
	* \brief Handle a CPG configuration change event
	*
	* \param[in] handle CPG connection
	* \param[in] group_name CPG group name
	* \param[in] member_list List of current CPG members
	* \param[in] member_list_entries Number of entries in \p member_list
	* \param[in] left_list List of CPG members that left
	* \param[in] left_list_entries Number of entries in \p left_list
	* \param[in] joined_list List of CPG members that joined
	* \param[in] joined_list_entries Number of entries in \p joined_list
	*
	* \note This is of type \c cpg_confchg_fn_t, intended to be used in a
	* \c cpg_callbacks_t object.
	*/
	void
	pcmk__cpg_confchg_cb(cpg_handle_t handle,
	const struct cpg_name *group_name,
	const struct cpg_address *member_list,
	size_t member_list_entries,
	const struct cpg_address *left_list,
	size_t left_list_entries,
	const struct cpg_address *joined_list,
	size_t joined_list_entries)
	{
	static int counter = 0;

	bool found = false;
	uint32_t local_nodeid = pcmk__cpg_local_nodeid(handle);
	const struct cpg_address **sorted = NULL;

	sorted = pcmk__assert_alloc(member_list_entries,
	sizeof(const struct cpg_address *));

	for (size_t iter = 0; iter < member_list_entries; iter++) {
	sorted[iter] = member_list + iter;
	}

	// So that the cross-matching of multiply-subscribed nodes is then cheap
	qsort(sorted, member_list_entries, sizeof(const struct cpg_address *),
	cmp_member_list_nodeid);

	for (int i = 0; i < left_list_entries; i++) {
	node_left(group_name->value, counter, local_nodeid, &left_list[i],
	sorted, member_list_entries);
	}
	free(sorted);
	sorted = NULL;

	for (int i = 0; i < joined_list_entries; i++) {
	crm_info("Group %s event %d: node %u pid %u joined%s",
	group_name->value, counter, joined_list[i].nodeid,
	joined_list[i].pid, cpgreason2str(joined_list[i].reason));
	}

	for (int i = 0; i < member_list_entries; i++) {
	pcmk__node_status_t *peer =
	pcmk__get_node(member_list[i].nodeid, NULL, NULL,
	pcmk__node_search_cluster_member);

	if (member_list[i].nodeid == local_nodeid
	&& member_list[i].pid != getpid()) {
	// See the note in node_left()
	crm_warn("Group %s event %d: detected duplicate local pid %u",
	group_name->value, counter, member_list[i].pid);
	continue;
	}
	crm_info("Group %s event %d: %s (node %u pid %u) is member",
	group_name->value, counter, peer_name(peer),
	member_list[i].nodeid, member_list[i].pid);

	/* If the caller left auto-reaping enabled, this will also update the
	* state to member.
	*/
	peer = crm_update_peer_proc(__func__, peer, crm_proc_cpg,
	PCMK_VALUE_ONLINE);

	if (peer && peer->state && strcmp(peer->state, CRM_NODE_MEMBER)) {
	/* The node is a CPG member, but we currently think it's not a
	* cluster member. This is possible only if auto-reaping was
	* disabled. The node may be joining, and we happened to get the CPG
	* notification before the quorum notification; or the node may have
	* just died, and we are processing its final messages; or a bug
	* has affected the peer cache.
	*/
	time_t now = time(NULL);

	if (peer->when_lost == 0) {
	// Track when we first got into this contradictory state
	peer->when_lost = now;

	} else if (now > (peer->when_lost + 60)) {
	// If it persists for more than a minute, update the state
	crm_warn("Node %u is member of group %s but was believed "
	"offline",
	member_list[i].nodeid, group_name->value);
	pcmk__update_peer_state(__func__, peer, CRM_NODE_MEMBER, 0);
	}
	}

	if (local_nodeid == member_list[i].nodeid) {
	found = true;
	}
	}

	if (!found) {
	crm_err("Local node was evicted from group %s", group_name->value);
	cpg_evicted = true;
	}

	counter++;
	}

	/*!
	* \brief Set the CPG deliver callback function for a cluster object
	*
	* \param[in,out] cluster Cluster object
	* \param[in] fn Deliver callback function to set
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk_cpg_set_deliver_fn(pcmk_cluster_t *cluster, cpg_deliver_fn_t fn)
	{
	if (cluster == NULL) {
	return EINVAL;
	}
	cluster->cpg.cpg_deliver_fn = fn;
	return pcmk_rc_ok;
	}

	/*!
	* \brief Set the CPG config change callback function for a cluster object
	*
	* \param[in,out] cluster Cluster object
	* \param[in] fn Configuration change callback function to set
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk_cpg_set_confchg_fn(pcmk_cluster_t *cluster, cpg_confchg_fn_t fn)
	{
	if (cluster == NULL) {
	return EINVAL;
	}
	cluster->cpg.cpg_confchg_fn = fn;
	return pcmk_rc_ok;
	}

	/*!
	* \brief Connect to Corosync CPG
	*
	* \param[in,out] cluster Initialized cluster object to connect
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk__cpg_connect(pcmk_cluster_t *cluster)
	{
	cs_error_t rc;
	int fd = -1;
	int retries = 0;
	uint32_t id = 0;
	pcmk__node_status_t *peer = NULL;
	cpg_handle_t handle = 0;
	const char *message_name = pcmk__message_name(crm_system_name);
	uid_t found_uid = 0;
	gid_t found_gid = 0;
	pid_t found_pid = 0;
	int rv;

	struct mainloop_fd_callbacks cpg_fd_callbacks = {
	.dispatch = pcmk_cpg_dispatch,
	.destroy = cluster->destroy,
	};

	cpg_model_v1_data_t cpg_model_info = {
	.model = CPG_MODEL_V1,
	.cpg_deliver_fn = cluster->cpg.cpg_deliver_fn,
	.cpg_confchg_fn = cluster->cpg.cpg_confchg_fn,
	.cpg_totem_confchg_fn = NULL,
	.flags = 0,
	};

	cpg_evicted = false;
	cluster->group.length = 0;
	cluster->group.value[0] = 0;

	/* group.value is char[128] */
	strncpy(cluster->group.value, message_name, 127);
	cluster->group.value[127] = 0;
	cluster->group.length = 1 + QB_MIN(127, strlen(cluster->group.value));

	cs_repeat(rc, retries, 30, cpg_model_initialize(&handle, CPG_MODEL_V1, (cpg_model_data_t *)&cpg_model_info, NULL));
	if (rc != CS_OK) {
	crm_err("Could not connect to the CPG API: %s (%d)",
	cs_strerror(rc), rc);
	goto bail;
	}

	rc = cpg_fd_get(handle, &fd);
	if (rc != CS_OK) {
	crm_err("Could not obtain the CPG API connection: %s (%d)",
	cs_strerror(rc), rc);
	goto bail;
	}

	/* CPG provider run as root (in given user namespace, anyway)? */
	if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
	&found_uid, &found_gid))) {
	crm_err("CPG provider is not authentic:"
	" process %lld (uid: %lld, gid: %lld)",
	(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
	(long long) found_uid, (long long) found_gid);
	rc = CS_ERR_ACCESS;
	goto bail;
	} else if (rv < 0) {
	crm_err("Could not verify authenticity of CPG provider: %s (%d)",
	strerror(-rv), -rv);
	rc = CS_ERR_ACCESS;
	goto bail;
	}

	id = pcmk__cpg_local_nodeid(handle);
	if (id == 0) {
	crm_err("Could not get local node id from the CPG API");
	goto bail;

	}
	cluster->nodeid = id;

	retries = 0;
	cs_repeat(rc, retries, 30, cpg_join(handle, &cluster->group));
	if (rc != CS_OK) {
	crm_err("Could not join the CPG group '%s': %d", message_name, rc);
	goto bail;
	}

	pcmk_cpg_handle = handle;
	cluster->cpg_handle = handle;
	mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks);

	bail:
	if (rc != CS_OK) {
	cpg_finalize(handle);
	// @TODO Map rc to more specific Pacemaker return code
	return ENOTCONN;
	}

	peer = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member);
	crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_ONLINE);
	return pcmk_rc_ok;
	}

	/*!
	* \internal
	* \brief Disconnect from Corosync CPG
	*
	* \param[in,out] cluster Cluster object to disconnect
	*/
	void
	pcmk__cpg_disconnect(pcmk_cluster_t *cluster)
	{
	pcmk_cpg_handle = 0;
	if (cluster->cpg_handle != 0) {
	crm_trace("Disconnecting CPG");
	cpg_leave(cluster->cpg_handle, &cluster->group);
	cpg_finalize(cluster->cpg_handle);
	cluster->cpg_handle = 0;

	} else {
	crm_info("No CPG connection");
	}
	}

	/*!
	* \internal
	* \brief Send string data via Corosync CPG
	*
	* \param[in] data Data to send
	* \param[in] node Cluster node to send message to
	* \param[in] dest Type of message to send
	*
	* \return \c true on success, or \c false otherwise
	*/
	static bool
	send_cpg_text(const char data, const pcmk__node_status_t node,
	enum pcmk__cluster_msg dest)
	{
	static int msg_id = 0;
	static int local_pid = 0;
	static int local_name_len = 0;
	static const char *local_name = NULL;

	char *target = NULL;
	struct iovec *iov;
	pcmk__cpg_msg_t *msg = NULL;

	if (local_name == NULL) {
	local_name = pcmk__cluster_local_node_name();
	}
	if ((local_name_len == 0) && (local_name != NULL)) {
	local_name_len = strlen(local_name);
	}

	if (data == NULL) {
	data = "";
	}

	if (local_pid == 0) {
	local_pid = getpid();
	}

	msg = pcmk__assert_alloc(1, sizeof(pcmk__cpg_msg_t));

	msg_id++;
	msg->id = msg_id;
	- msg->header.id = crm_class_cluster;
	msg->header.error = CS_OK;

	msg->host.type = dest;

	if (node != NULL) {
	if (node->name != NULL) {
	target = pcmk__str_copy(node->name);
	msg->host.size = strlen(node->name);
	memset(msg->host.uname, 0, MAX_NAME);
	memcpy(msg->host.uname, node->name, msg->host.size);

	} else {
	target = crm_strdup_printf("%" PRIu32, node->cluster_layer_id);
	}
	msg->host.id = node->cluster_layer_id;

	} else {
	target = pcmk__str_copy("all");
	}

	msg->sender.id = 0;
	msg->sender.type = pcmk__cluster_parse_msg_type(crm_system_name);
	msg->sender.pid = local_pid;
	msg->sender.size = local_name_len;
	memset(msg->sender.uname, 0, MAX_NAME);

	if ((local_name != NULL) && (msg->sender.size != 0)) {
	memcpy(msg->sender.uname, local_name, msg->sender.size);
	}

	msg->size = 1 + strlen(data);
	msg->header.size = sizeof(pcmk__cpg_msg_t) + msg->size;

	if (msg->size < CRM_BZ2_THRESHOLD) {
	msg = pcmk__realloc(msg, msg->header.size);
	memcpy(msg->data, data, msg->size);

	} else {
	char *compressed = NULL;
	unsigned int new_size = 0;

	if (pcmk__compress(data, (unsigned int) msg->size, 0, &compressed,
	&new_size) == pcmk_rc_ok) {

	msg->header.size = sizeof(pcmk__cpg_msg_t) + new_size;
	msg = pcmk__realloc(msg, msg->header.size);
	memcpy(msg->data, compressed, new_size);

	msg->is_compressed = TRUE;
	msg->compressed_size = new_size;

	} else {
	// cppcheck seems not to understand the abort logic in pcmk__realloc
	// cppcheck-suppress memleak
	msg = pcmk__realloc(msg, msg->header.size);
	memcpy(msg->data, data, msg->size);
	}

	free(compressed);
	}

	iov = pcmk__assert_alloc(1, sizeof(struct iovec));
	iov->iov_base = msg;
	iov->iov_len = msg->header.size;

	if (msg->compressed_size > 0) {
	crm_trace("Queueing CPG message %u to %s "
	"(%llu bytes, %d bytes compressed payload): %.200s",
	msg->id, target, (unsigned long long) iov->iov_len,
	msg->compressed_size, data);
	} else {
	crm_trace("Queueing CPG message %u to %s "
	"(%llu bytes, %d bytes payload): %.200s",
	msg->id, target, (unsigned long long) iov->iov_len,
	msg->size, data);
	}

	free(target);

	cs_message_queue = g_list_append(cs_message_queue, iov);
	crm_cs_flush(&pcmk_cpg_handle);

	return true;
	}

	/*!
	* \internal
	* \brief Send an XML message via Corosync CPG
	*
	* \param[in] msg XML message to send
	* \param[in] node Cluster node to send message to
	* \param[in] dest Type of message to send
	*
	* \return TRUE on success, otherwise FALSE
	*/
	bool
	pcmk__cpg_send_xml(const xmlNode msg, const pcmk__node_status_t node,
	enum pcmk__cluster_msg dest)
	{
	bool rc = true;
	GString *data = g_string_sized_new(1024);

	pcmk__xml_string(msg, 0, data, 0);

	rc = send_cpg_text(data->str, node, dest);
	g_string_free(data, TRUE);
	return rc;
	}

File Metadata

Mime Type: text/x-diff
Expires: Sat, Nov 23, 4:19 PM (21 h, 14 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1018870
Default Alt Text: (112 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions