No OneTemporary
Actions

Size

146 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
	index f868cea739..9abaf0535d 100644
	--- a/daemons/controld/controld_join_dc.c
	+++ b/daemons/controld/controld_join_dc.c
	@@ -1,1044 +1,1044 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <crm/crm.h>

	#include <crm/common/xml.h>
	#include <crm/cluster.h>

	#include <pacemaker-controld.h>

	static char *max_generation_from = NULL;
	static xmlNodePtr max_generation_xml = NULL;

	/*!
	* \internal
	* \brief Nodes from which a CIB sync has failed since the peer joined
	*
	* This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
	* the name of a client node from which a CIB \p sync_from() call has failed in
	* \p do_dc_join_finalize() since the client joined the cluster as a peer.
	* \p join_id is the ID of the join round in which the \p sync_from() failed,
	* and is intended for use in nack log messages.
	*/
	static GHashTable *failed_sync_nodes = NULL;

	void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
	void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
	gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);

	/* Numeric counter used to identify join rounds (an unsigned int would be
	* appropriate, except we get and set it in XML as int)
	*/
	static int current_join_id = 0;

	/*!
	* \internal
	* \brief Destroy the hash table containing failed sync nodes
	*/
	void
	controld_destroy_failed_sync_table(void)
	{
	if (failed_sync_nodes != NULL) {
	g_hash_table_destroy(failed_sync_nodes);
	failed_sync_nodes = NULL;
	}
	}

	/*!
	* \internal
	* \brief Remove a node from the failed sync nodes table if present
	*
	* \param[in] node_name Node name to remove
	*/
	void
	controld_remove_failed_sync_node(const char *node_name)
	{
	if (failed_sync_nodes != NULL) {
	g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
	}
	}

	/*!
	* \internal
	* \brief Add to a hash table a node whose CIB failed to sync
	*
	* \param[in] node_name Name of node whose CIB failed to sync
	* \param[in] join_id Join round when the failure occurred
	*/
	static void
	record_failed_sync_node(const char *node_name, gint join_id)
	{
	if (failed_sync_nodes == NULL) {
	failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
	}

	/* If the node is already in the table then we failed to nack it during the
	* filter offer step
	*/
	CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
	GINT_TO_POINTER(join_id)));
	}

	/*!
	* \internal
	* \brief Look up a node name in the failed sync table
	*
	* \param[in] node_name Name of node to look up
	* \param[out] join_id Where to store the join ID of when the sync failed
	*
	* \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
	* node name was found, or \p pcmk_rc_node_unknown otherwise.
	* \note \p *join_id is set to -1 if the node is not found.
	*/
	static int
	lookup_failed_sync_node(const char node_name, gint join_id)
	{
	*join_id = -1;

	if (failed_sync_nodes != NULL) {
	gpointer result = g_hash_table_lookup(failed_sync_nodes,
	(gchar *) node_name);
	if (result != NULL) {
	*join_id = GPOINTER_TO_INT(result);
	return pcmk_rc_ok;
	}
	}
	return pcmk_rc_node_unknown;
	}

	void
	crm_update_peer_join(const char source, crm_node_t node, enum crm_join_phase phase)
	{
	enum crm_join_phase last = 0;

	CRM_CHECK(node != NULL, return);

	/* Remote nodes do not participate in joins */
	if (pcmk_is_set(node->flags, crm_remote_node)) {
	return;
	}

	last = node->join;

	if(phase == last) {
	crm_trace("Node %s join-%d phase is still %s "
	QB_XS " nodeid=%u source=%s",
	node->uname, current_join_id, crm_join_phase_str(last),
	node->id, source);

	} else if ((phase <= crm_join_none) \|\| (phase == (last + 1))) {
	node->join = phase;
	crm_trace("Node %s join-%d phase is now %s (was %s) "
	QB_XS " nodeid=%u source=%s",
	node->uname, current_join_id, crm_join_phase_str(phase),
	crm_join_phase_str(last), node->id, source);

	} else {
	crm_warn("Rejecting join-%d phase update for node %s because "
	"can't go from %s to %s " QB_XS " nodeid=%u source=%s",
	current_join_id, node->uname, crm_join_phase_str(last),
	crm_join_phase_str(phase), node->id, source);
	}
	}

	static void
	start_join_round(void)
	{
	GHashTableIter iter;
	crm_node_t *peer = NULL;

	crm_debug("Starting new join round join-%d", current_join_id);

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
	crm_update_peer_join(__func__, peer, crm_join_none);
	}
	if (max_generation_from != NULL) {
	free(max_generation_from);
	max_generation_from = NULL;
	}
	if (max_generation_xml != NULL) {
	pcmk__xml_free(max_generation_xml);
	max_generation_xml = NULL;
	}
	controld_clear_fsa_input_flags(R_HAVE_CIB);
	}

	/*!
	* \internal
	* \brief Create a join message from the DC
	*
	* \param[in] join_op Join operation name
	* \param[in] host_to Recipient of message
	*/
	static xmlNode *
	create_dc_message(const char join_op, const char host_to)
	{
	xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
	CRM_SYSTEM_DC, NULL);

	/* Identify which election this is a part of */
	crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);

	/* Add a field specifying whether the DC is shutting down. This keeps the
	* joining node from fencing the old DC if it becomes the new DC.
	*/
	pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
	pcmk_is_set(controld_globals.fsa_input_register,
	R_SHUTDOWN));
	return msg;
	}

	static void
	join_make_offer(gpointer key, gpointer value, gpointer user_data)
	{
	xmlNode *offer = NULL;
	crm_node_t member = (crm_node_t )value;

	CRM_ASSERT(member != NULL);
	if (!pcmk__cluster_is_node_active(member)) {
	crm_info("Not making join-%d offer to inactive node %s",
	current_join_id,
	(member->uname? member->uname : "with unknown name"));
	if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
	/* You would think this unsafe, but in fact this plus an
	* active resource is what causes it to be fenced.
	*
	* Yes, this does mean that any node that dies at the same
	* time as the old DC and is not running resource (still)
	* won't be fenced.
	*
	* I'm not happy about this either.
	*/
	pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
	}
	return;
	}

	if (member->uname == NULL) {
	crm_info("Not making join-%d offer to node uuid %s with unknown name",
	current_join_id, member->uuid);
	return;
	}

	if (controld_globals.membership_id != crm_peer_seq) {
	controld_globals.membership_id = crm_peer_seq;
	crm_info("Making join-%d offers based on membership event %llu",
	current_join_id, crm_peer_seq);
	}

	if(user_data && member->join > crm_join_none) {
	crm_info("Not making join-%d offer to already known node %s (%s)",
	current_join_id, member->uname,
	crm_join_phase_str(member->join));
	return;
	}

	crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);

	offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);

	// Advertise our feature set so the joining node can bail if not compatible
	crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);

	crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
	pcmk__cluster_send_message(member, crm_msg_crmd, offer);
	pcmk__xml_free(offer);

	crm_update_peer_join(__func__, member, crm_join_welcomed);
	}

	/* A_DC_JOIN_OFFER_ALL */
	void
	do_dc_join_offer_all(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	int count;

	/* Reset everyone's status back to down or in_ccm in the CIB.
	* Any nodes that are active in the CIB but not in the cluster membership
	* will be seen as offline by the scheduler anyway.
	*/
	current_join_id++;
	start_join_round();

	update_dc(NULL);
	if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
	crm_info("A new node joined the cluster");
	}
	g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);

	count = crmd_join_phase_count(crm_join_welcomed);
	crm_info("Waiting on join-%d requests from %d outstanding node%s",
	current_join_id, count, pcmk__plural_s(count));

	// Don't waste time by invoking the scheduler yet
	}

	/* A_DC_JOIN_OFFER_ONE */
	void
	do_dc_join_offer_one(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	crm_node_t *member;
	ha_msg_input_t *welcome = NULL;
	int count;
	const char *join_to = NULL;

	if (msg_data->data == NULL) {
	crm_info("Making join-%d offers to any unconfirmed nodes "
	"because an unknown node joined", current_join_id);
	g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
	check_join_state(cur_state, __func__);
	return;
	}

	welcome = fsa_typed_data(fsa_dt_ha_msg);
	if (welcome == NULL) {
	// fsa_typed_data() already logged an error
	return;
	}

	join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
	if (join_to == NULL) {
	crm_err("Can't make join-%d offer to unknown node", current_join_id);
	return;
	}
	member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);

	/* It is possible that a node will have been sick or starting up when the
	* original offer was made. However, it will either re-announce itself in
	* due course, or we can re-store the original offer on the client.
	*/

	crm_update_peer_join(__func__, member, crm_join_none);
	join_make_offer(NULL, member, NULL);

	/* If the offer isn't to the local node, make an offer to the local node as
	* well, to ensure the correct value for max_generation_from.
	*/
	if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
	member = pcmk__get_node(0, controld_globals.our_nodename, NULL,
	pcmk__node_search_cluster_member);
	join_make_offer(NULL, member, NULL);
	}

	/* This was a genuine join request; cancel any existing transition and
	* invoke the scheduler.
	*/
	abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
	NULL);

	count = crmd_join_phase_count(crm_join_welcomed);
	crm_info("Waiting on join-%d requests from %d outstanding node%s",
	current_join_id, count, pcmk__plural_s(count));

	// Don't waste time by invoking the scheduler yet
	}

	static int
	compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
	{
	const char *elem_l = crm_element_value(left, field);
	const char *elem_r = crm_element_value(right, field);

	long long int_elem_l;
	long long int_elem_r;

	pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
	pcmk__scan_ll(elem_r, &int_elem_r, -1LL);

	if (int_elem_l < int_elem_r) {
	return -1;

	} else if (int_elem_l > int_elem_r) {
	return 1;
	}

	return 0;
	}

	/* A_DC_JOIN_PROCESS_REQ */
	void
	do_dc_join_filter_offer(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	xmlNode *generation = NULL;

	int cmp = 0;
	int join_id = -1;
	int count = 0;
	gint value = 0;
	gboolean ack_nack_bool = TRUE;
	ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);

	const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
	const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
	const char *join_version = crm_element_value(join_ack->msg,
	PCMK_XA_CRM_FEATURE_SET);
	crm_node_t *join_node = NULL;

	if (join_from == NULL) {
	crm_err("Ignoring invalid join request without node name");
	return;
	}
	join_node = pcmk__get_node(0, join_from, NULL,
	pcmk__node_search_cluster_member);

	crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
	if (join_id != current_join_id) {
	crm_debug("Ignoring join-%d request from %s because we are on join-%d",
	join_id, join_from, current_join_id);
	check_join_state(cur_state, __func__);
	return;
	}

	generation = join_ack->xml;
	if (max_generation_xml != NULL && generation != NULL) {
	int lpc = 0;

	const char *attributes[] = {
	PCMK_XA_ADMIN_EPOCH,
	PCMK_XA_EPOCH,
	PCMK_XA_NUM_UPDATES,
	};

	/* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE
	* element from the join client. The "if" guard is for clarity.
	*/
	if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
	for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
	cmp = compare_int_fields(max_generation_xml, generation,
	attributes[lpc]);
	}

	} else { // Should always be PCMK__XE_GENERATION_TUPLE
	CRM_LOG_ASSERT(false);
	}
	}

	if (ref == NULL) {
	ref = "none"; // for logging only
	}

	if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
	crm_err("Rejecting join-%d request from node %s because we failed to "
	"sync its CIB in join-%d " QB_XS " ref=%s",
	join_id, join_from, value, ref);
	ack_nack_bool = FALSE;

	} else if (!pcmk__cluster_is_node_active(join_node)) {
	if (match_down_event(join_from) != NULL) {
	/* The join request was received after the node was fenced or
	* otherwise shutdown in a way that we're aware of. No need to log
	* an error in this rare occurrence; we know the client was recently
	* shut down, and receiving a lingering in-flight request is not
	* cause for alarm.
	*/
	crm_debug("Rejecting join-%d request from inactive node %s "
	QB_XS " ref=%s", join_id, join_from, ref);
	} else {
	crm_err("Rejecting join-%d request from inactive node %s "
	QB_XS " ref=%s", join_id, join_from, ref);
	}
	ack_nack_bool = FALSE;

	} else if (generation == NULL) {
	crm_err("Rejecting invalid join-%d request from node %s "
	"missing CIB generation " QB_XS " ref=%s",
	join_id, join_from, ref);
	ack_nack_bool = FALSE;

	} else if ((join_version == NULL)
	\|\| !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
	crm_err("Rejecting join-%d request from node %s because feature set %s"
	" is incompatible with ours (%s) " QB_XS " ref=%s",
	join_id, join_from, (join_version? join_version : "pre-3.1.0"),
	CRM_FEATURE_SET, ref);
	ack_nack_bool = FALSE;

	} else if (max_generation_xml == NULL) {
	const char *validation = crm_element_value(generation,
	PCMK_XA_VALIDATE_WITH);

	if (pcmk__get_schema(validation) == NULL) {
	crm_err("Rejecting join-%d request from %s (with first CIB "
	"generation) due to unknown schema version %s "
	QB_XS " ref=%s",
	join_id, join_from, pcmk__s(validation, "(missing)"), ref);
	ack_nack_bool = FALSE;

	} else {
	crm_debug("Accepting join-%d request from %s (with first CIB "
	"generation) " QB_XS " ref=%s",
	join_id, join_from, ref);
	max_generation_xml = pcmk__xml_copy(NULL, generation);
	pcmk__str_update(&max_generation_from, join_from);
	}

	} else if ((cmp < 0)
	\|\| ((cmp == 0)
	&& pcmk__str_eq(join_from, controld_globals.our_nodename,
	pcmk__str_casei))) {
	const char *validation = crm_element_value(generation,
	PCMK_XA_VALIDATE_WITH);

	if (pcmk__get_schema(validation) == NULL) {
	crm_err("Rejecting join-%d request from %s (with better CIB "
	"generation than current best from %s) due to unknown "
	"schema version %s " QB_XS " ref=%s",
	join_id, join_from, max_generation_from,
	pcmk__s(validation, "(missing)"), ref);
	ack_nack_bool = FALSE;

	} else {
	crm_debug("Accepting join-%d request from %s (with better CIB "
	"generation than current best from %s) " QB_XS " ref=%s",
	join_id, join_from, max_generation_from, ref);
	crm_log_xml_debug(max_generation_xml, "Old max generation");
	crm_log_xml_debug(generation, "New max generation");

	pcmk__xml_free(max_generation_xml);
	max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
	pcmk__str_update(&max_generation_from, join_from);
	}

	} else {
	crm_debug("Accepting join-%d request from %s " QB_XS " ref=%s",
	join_id, join_from, ref);
	}

	if (!ack_nack_bool) {
	if (compare_version(join_version, "3.17.0") < 0) {
	/* Clients with CRM_FEATURE_SET < 3.17.0 may respawn infinitely
	* after a nack message, don't send one
	*/
	crm_update_peer_join(__func__, join_node, crm_join_nack_quiet);
	} else {
	crm_update_peer_join(__func__, join_node, crm_join_nack);
	}
	pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);

	} else {
	crm_update_peer_join(__func__, join_node, crm_join_integrated);
	pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
	}

	count = crmd_join_phase_count(crm_join_integrated);
	crm_debug("%d node%s currently integrated in join-%d",
	count, pcmk__plural_s(count), join_id);

	if (check_join_state(cur_state, __func__) == FALSE) {
	// Don't waste time by invoking the scheduler yet
	count = crmd_join_phase_count(crm_join_welcomed);
	crm_debug("Waiting on join-%d requests from %d outstanding node%s",
	join_id, count, pcmk__plural_s(count));
	}
	}

	/* A_DC_JOIN_FINALIZE */
	void
	do_dc_join_finalize(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	char *sync_from = NULL;
	int rc = pcmk_ok;
	int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
	int count_finalizable = crmd_join_phase_count(crm_join_integrated)
	+ crmd_join_phase_count(crm_join_nack)
	+ crmd_join_phase_count(crm_join_nack_quiet);

	/* This we can do straight away and avoid clients timing us out
	* while we compute the latest CIB
	*/
	if (count_welcomed != 0) {
	crm_debug("Waiting on join-%d requests from %d outstanding node%s "
	"before finalizing join", current_join_id, count_welcomed,
	pcmk__plural_s(count_welcomed));
	crmd_join_phase_log(LOG_DEBUG);
	/* crmd_fsa_stall(FALSE); Needed? */
	return;

	} else if (count_finalizable == 0) {
	crm_debug("Finalization not needed for join-%d at the current time",
	current_join_id);
	crmd_join_phase_log(LOG_DEBUG);
	check_join_state(controld_globals.fsa_state, __func__);
	return;
	}

	controld_clear_fsa_input_flags(R_HAVE_CIB);
	if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
	pcmk__str_null_matches\|pcmk__str_casei)) {
	controld_set_fsa_input_flags(R_HAVE_CIB);
	}

	if (!controld_globals.transition_graph->complete) {
	crm_warn("Delaying join-%d finalization while transition in progress",
	current_join_id);
	crmd_join_phase_log(LOG_DEBUG);
	crmd_fsa_stall(FALSE);
	return;
	}

	if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
	// Send our CIB out to everyone
	sync_from = pcmk__str_copy(controld_globals.our_nodename);
	crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
	current_join_id, count_finalizable,
	pcmk__plural_s(count_finalizable));
	crm_log_xml_debug(max_generation_xml, "Requested CIB version");

	} else {
	// Ask for the agreed best CIB
	sync_from = pcmk__str_copy(max_generation_from);
	crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
	current_join_id, count_finalizable,
	pcmk__plural_s(count_finalizable), sync_from);
	crm_log_xml_notice(max_generation_xml, "Requested CIB version");
	}
	crmd_join_phase_log(LOG_DEBUG);

	rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
	sync_from, NULL, cib_none);
	fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
	}

	void
	free_max_generation(void)
	{
	free(max_generation_from);
	max_generation_from = NULL;

	pcmk__xml_free(max_generation_xml);
	max_generation_xml = NULL;
	}

	void
	finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	CRM_LOG_ASSERT(-EPERM != rc);

	if (rc != pcmk_ok) {
	const char sync_from = (const char ) user_data;

	do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
	"Could not sync CIB from %s in join-%d: %s",
	sync_from, current_join_id, pcmk_strerror(rc));

	if (rc != -pcmk_err_old_data) {
	record_failed_sync_node(sync_from, current_join_id);
	}

	/* restart the whole join process */
	register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
	__func__);

	} else if (!AM_I_DC) {
	crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);

	} else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
	crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
	"(%s)", current_join_id,
	fsa_state2string(controld_globals.fsa_state));

	} else {
	controld_set_fsa_input_flags(R_HAVE_CIB);

	/* make sure dc_uuid is re-set to us */
	if (!check_join_state(controld_globals.fsa_state, __func__)) {
	int count_finalizable = 0;

	count_finalizable = crmd_join_phase_count(crm_join_integrated)
	+ crmd_join_phase_count(crm_join_nack)
	+ crmd_join_phase_count(crm_join_nack_quiet);

	crm_debug("Notifying %d node%s of join-%d results",
	count_finalizable, pcmk__plural_s(count_finalizable),
	current_join_id);
	g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
	}
	}
	}

	static void
	join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
	xmlNode output, void user_data)
	{
	const char *node = user_data;

	if (rc != pcmk_ok) {
	fsa_data_t *msg_data = NULL; // for register_fsa_error() macro

	crm_crit("join-%d node history update (via CIB call %d) for node %s "
	"failed: %s",
	current_join_id, call_id, node, pcmk_strerror(rc));
	crm_log_xml_debug(msg, "failed");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}

	crm_debug("join-%d node history update (via CIB call %d) for node %s "
	"complete",
	current_join_id, call_id, node);
	check_join_state(controld_globals.fsa_state, __func__);
	}

	/* A_DC_JOIN_PROCESS_ACK */
	void
	do_dc_join_ack(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	int join_id = -1;
	ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);

	const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
	char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
	crm_node_t *peer = NULL;

	enum controld_section_e section = controld_section_lrm;
	char *xpath = NULL;
	xmlNode *state = join_ack->xml;
	xmlNode *execd_state = NULL;

	cib_t *cib = controld_globals.cib_conn;
	int rc = pcmk_ok;

	// Sanity checks
	if (join_from == NULL) {
	crm_warn("Ignoring message received without node identification");
	goto done;
	}
	if (op == NULL) {
	crm_warn("Ignoring message received from %s without task", join_from);
	goto done;
	}

	if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
	crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
	op, join_from, CRM_OP_JOIN_CONFIRM);
	goto done;
	}

	if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
	crm_warn("Ignoring join confirmation from %s without valid join ID",
	join_from);
	goto done;
	}

	peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
	if (peer->join != crm_join_finalized) {
	crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
	"(currently %s not %s)",
	join_id, join_from, crm_join_phase_str(peer->join),
	crm_join_phase_str(crm_join_finalized));
	goto done;
	}

	if (join_id != current_join_id) {
	crm_err("Rejecting join-%d confirmation from %s "
	"because currently on join-%d",
	join_id, join_from, current_join_id);
	crm_update_peer_join(__func__, peer, crm_join_nack);
	goto done;
	}

	crm_update_peer_join(__func__, peer, crm_join_confirmed);

	/* Update CIB with node's current executor state. A new transition will be
	* triggered later, when the CIB manager notifies us of the change.
	*
	* The delete and modify requests are part of an atomic transaction.
	*/
	rc = cib->cmds->init_transaction(cib);
	if (rc != pcmk_ok) {
	goto done;
	}

	// Delete relevant parts of node's current executor state from CIB
	if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
	section = controld_section_lrm_unlocked;
	}
	controld_node_state_deletion_strings(join_from, section, &xpath, NULL);

	rc = cib->cmds->remove(cib, xpath, NULL,
	cib_xpath\|cib_multiple\|cib_transaction);
	if (rc != pcmk_ok) {
	goto done;
	}

	// Update CIB with node's latest known executor state
	if (pcmk__str_eq(join_from, controld_globals.our_nodename,
	pcmk__str_casei)) {

	// Use the latest possible state if processing our own join ack
	execd_state = controld_query_executor_state();

	if (execd_state != NULL) {
	crm_debug("Updating local node history for join-%d from query "
	"result",
	current_join_id);
	state = execd_state;

	} else {
	crm_warn("Updating local node history from join-%d confirmation "
	"because query failed",
	current_join_id);
	}

	} else {
	crm_debug("Updating node history for %s from join-%d confirmation",
	join_from, current_join_id);
	}

	rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
	cib_can_create\|cib_transaction);
	pcmk__xml_free(execd_state);
	if (rc != pcmk_ok) {
	goto done;
	}

	// Commit the transaction
	rc = cib->cmds->end_transaction(cib, true, cib_none);
	fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);

	if (rc > 0) {
	// join_from will be freed after callback
	join_from = NULL;
	rc = pcmk_ok;
	}

	done:
	if (rc != pcmk_ok) {
	crm_crit("join-%d node history update for node %s failed: %s",
	current_join_id, join_from, pcmk_strerror(rc));
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	free(join_from);
	free(xpath);
	}

	void
	finalize_join_for(gpointer key, gpointer value, gpointer user_data)
	{
	xmlNode *acknak = NULL;
	xmlNode *tmp1 = NULL;
	crm_node_t *join_node = value;
	const char *join_to = join_node->uname;
	bool integrated = false;

	switch (join_node->join) {
	case crm_join_integrated:
	integrated = true;
	break;
	case crm_join_nack:
	case crm_join_nack_quiet:
	break;
	default:
	crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
	"for join-%d", join_to,
	crm_join_phase_str(join_node->join), current_join_id);
	return;
	}

	/* Update the <node> element with the node's name and UUID, in case they
	* weren't known before
	*/
	crm_trace("Updating node name and UUID in CIB for %s", join_to);
	tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
	crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_node_uuid(join_node));
	crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
	fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
	pcmk__xml_free(tmp1);

	if (join_node->join == crm_join_nack_quiet) {
	crm_trace("Not sending nack message to node %s with feature set older "
	"than 3.17.0", join_to);
	return;
	}

	join_node = pcmk__get_node(0, join_to, NULL,
	pcmk__node_search_cluster_member);
	if (!pcmk__cluster_is_node_active(join_node)) {
	/*
	* NACK'ing nodes that the membership layer doesn't know about yet
	* simply creates more churn
	*
	* Better to leave them waiting and let the join restart when
	* the new membership event comes in
	*
	* All other NACKs (due to versions etc) should still be processed
	*/
	pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
	return;
	}

	// Acknowledge or nack node's join request
	crm_debug("%sing join-%d request from %s",
	integrated? "Acknowledg" : "Nack", current_join_id, join_to);
	acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
	pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);

	if (integrated) {
	// No change needed for a nacked node
	crm_update_peer_join(__func__, join_node, crm_join_finalized);
	pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);

	/* Iterate through the remote peer cache and add information on which
	* node hosts each to the ACK message. This keeps new controllers in
	* sync with what has already happened.
	*/
	if (pcmk__cluster_num_remote_nodes() > 0) {
	GHashTableIter iter;
	crm_node_t *node = NULL;
	xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);

	g_hash_table_iter_init(&iter, crm_remote_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	xmlNode *remote = NULL;

	if (!node->conn_host) {
	continue;
	}

	remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
	pcmk__xe_set_props(remote,
	PCMK_XA_ID, node->uname,
	PCMK__XA_NODE_STATE, node->state,
	PCMK__XA_CONNECTION_HOST, node->conn_host,
	NULL);
	}
	}
	}
	pcmk__cluster_send_message(join_node, crm_msg_crmd, acknak);
	pcmk__xml_free(acknak);
	return;
	}

	gboolean
	check_join_state(enum crmd_fsa_state cur_state, const char *source)
	{
	static unsigned long long highest_seq = 0;

	if (controld_globals.membership_id != crm_peer_seq) {
	crm_debug("join-%d: Membership changed from %llu to %llu "
	QB_XS " highest=%llu state=%s for=%s",
	current_join_id, controld_globals.membership_id, crm_peer_seq,
	highest_seq, fsa_state2string(cur_state), source);
	if(highest_seq < crm_peer_seq) {
	/* Don't spam the FSA with duplicates */
	highest_seq = crm_peer_seq;
	register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
	}

	} else if (cur_state == S_INTEGRATION) {
	if (crmd_join_phase_count(crm_join_welcomed) == 0) {
	int count = crmd_join_phase_count(crm_join_integrated);

	crm_debug("join-%d: Integration of %d peer%s complete "
	QB_XS " state=%s for=%s",
	current_join_id, count, pcmk__plural_s(count),
	fsa_state2string(cur_state), source);
	register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
	return TRUE;
	}

	} else if (cur_state == S_FINALIZE_JOIN) {
	if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
	crm_debug("join-%d: Delaying finalization until we have CIB "
	QB_XS " state=%s for=%s",
	current_join_id, fsa_state2string(cur_state), source);
	return TRUE;

	} else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
	int count = crmd_join_phase_count(crm_join_welcomed);

	crm_debug("join-%d: Still waiting on %d welcomed node%s "
	QB_XS " state=%s for=%s",
	current_join_id, count, pcmk__plural_s(count),
	fsa_state2string(cur_state), source);
	crmd_join_phase_log(LOG_DEBUG);

	} else if (crmd_join_phase_count(crm_join_integrated) != 0) {
	int count = crmd_join_phase_count(crm_join_integrated);

	crm_debug("join-%d: Still waiting on %d integrated node%s "
	QB_XS " state=%s for=%s",
	current_join_id, count, pcmk__plural_s(count),
	fsa_state2string(cur_state), source);
	crmd_join_phase_log(LOG_DEBUG);

	} else if (crmd_join_phase_count(crm_join_finalized) != 0) {
	int count = crmd_join_phase_count(crm_join_finalized);

	crm_debug("join-%d: Still waiting on %d finalized node%s "
	QB_XS " state=%s for=%s",
	current_join_id, count, pcmk__plural_s(count),
	fsa_state2string(cur_state), source);
	crmd_join_phase_log(LOG_DEBUG);

	} else {
	crm_debug("join-%d: Complete " QB_XS " state=%s for=%s",
	current_join_id, fsa_state2string(cur_state), source);
	register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
	return TRUE;
	}
	}

	return FALSE;
	}

	void
	do_dc_join_final(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
	- crm_update_quorum(crm_have_quorum, TRUE);
	+ crm_update_quorum(pcmk__cluster_has_quorum(), TRUE);
	}

	int crmd_join_phase_count(enum crm_join_phase phase)
	{
	int count = 0;
	crm_node_t *peer;
	GHashTableIter iter;

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
	if(peer->join == phase) {
	count++;
	}
	}
	return count;
	}

	void crmd_join_phase_log(int level)
	{
	crm_node_t *peer;
	GHashTableIter iter;

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
	do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
	crm_join_phase_str(peer->join));
	}
	}
	diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c
	index 5baca399a2..1834a129bb 100644
	--- a/daemons/controld/controld_schedulerd.c
	+++ b/daemons/controld/controld_schedulerd.c
	@@ -1,510 +1,511 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <unistd.h> /* pid_t, sleep, ssize_t */

	#include <crm/cib.h>
	#include <crm/cluster.h>
	#include <crm/common/xml.h>
	#include <crm/crm.h>
	#include <crm/common/xml_internal.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipc_schedulerd.h>

	#include <pacemaker-controld.h>

	static void handle_disconnect(void);

	static pcmk_ipc_api_t *schedulerd_api = NULL;

	/*!
	* \internal
	* \brief Close any scheduler connection and free associated memory
	*/
	void
	controld_shutdown_schedulerd_ipc(void)
	{
	controld_clear_fsa_input_flags(R_PE_REQUIRED);
	pcmk_disconnect_ipc(schedulerd_api);
	handle_disconnect();

	pcmk_free_ipc_api(schedulerd_api);
	schedulerd_api = NULL;
	}

	/*!
	* \internal
	* \brief Save CIB query result to file, raising FSA error
	*
	* \param[in] msg Ignored
	* \param[in] call_id Call ID of CIB query
	* \param[in] rc Return code of CIB query
	* \param[in] output Result of CIB query
	* \param[in] user_data Unique identifier for filename
	*
	* \note This is intended to be called after a scheduler connection fails.
	*/
	static void
	save_cib_contents(xmlNode msg, int call_id, int rc, xmlNode output,
	void *user_data)
	{
	const char *id = user_data;

	register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
	CRM_CHECK(id != NULL, return);

	if (rc == pcmk_ok) {
	char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id);

	if (pcmk__xml_write_file(output, filename, true) != pcmk_rc_ok) {
	crm_err("Could not save Cluster Information Base to %s after scheduler crash",
	filename);
	} else {
	crm_notice("Saved Cluster Information Base to %s after scheduler crash",
	filename);
	}
	free(filename);
	}
	}

	/*!
	* \internal
	* \brief Respond to scheduler connection failure
	*/
	static void
	handle_disconnect(void)
	{
	// If we aren't connected to the scheduler, we can't expect a reply
	controld_expect_sched_reply(NULL);

	if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) {
	int rc = pcmk_ok;
	char *uuid_str = crm_generate_uuid();

	crm_crit("Lost connection to the scheduler "
	QB_XS " CIB will be saved to " PE_STATE_DIR "/pe-core-%s.bz2",
	uuid_str);

	/*
	* The scheduler died...
	*
	* Save the current CIB so that we have a chance of
	* figuring out what killed it.
	*
	* Delay raising the I_ERROR until the query below completes or
	* 5s is up, whichever comes first.
	*
	*/
	rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn,
	NULL, NULL, cib_none);
	fsa_register_cib_callback(rc, uuid_str, save_cib_contents);
	}

	controld_clear_fsa_input_flags(R_PE_CONNECTED);
	controld_trigger_fsa();
	return;
	}

	static void
	handle_reply(pcmk_schedulerd_api_reply_t *reply)
	{
	const char *msg_ref = NULL;

	if (!AM_I_DC) {
	return;
	}

	msg_ref = reply->data.graph.reference;

	if (msg_ref == NULL) {
	crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC);

	} else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
	pcmk__str_none)) {
	ha_msg_input_t fsa_input;
	xmlNode *crm_data_node;

	controld_stop_sched_timer();

	/* do_te_invoke (which will eventually process the fsa_input we are constructing
	* here) requires that fsa_input.xml be non-NULL. That will only happen if
	* copy_ha_msg_input (which is called by register_fsa_input_adv) sees the
	* fsa_input.msg that it is expecting. The scheduler's IPC dispatch function
	* gave us the values we need, we just need to put them into XML.
	*
	* The name of the top level element here is irrelevant. Nothing checks it.
	*/
	fsa_input.msg = pcmk__xe_create(NULL, "dummy-reply");
	crm_xml_add(fsa_input.msg, PCMK_XA_REFERENCE, msg_ref);
	crm_xml_add(fsa_input.msg, PCMK__XA_CRM_TGRAPH_IN,
	reply->data.graph.input);

	crm_data_node = pcmk__xe_create(fsa_input.msg, PCMK__XE_CRM_XML);
	pcmk__xml_copy(crm_data_node, reply->data.graph.tgraph);
	register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);

	pcmk__xml_free(fsa_input.msg);

	} else {
	crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref);
	}
	}

	static void
	scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
	crm_exit_t status, void event_data, void user_data)
	{
	pcmk_schedulerd_api_reply_t *reply = event_data;

	switch (event_type) {
	case pcmk_ipc_event_disconnect:
	handle_disconnect();
	break;

	case pcmk_ipc_event_reply:
	handle_reply(reply);
	break;

	default:
	break;
	}
	}

	static bool
	new_schedulerd_ipc_connection(void)
	{
	int rc;

	controld_set_fsa_input_flags(R_PE_REQUIRED);

	if (schedulerd_api == NULL) {
	rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd);

	if (rc != pcmk_rc_ok) {
	crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
	return false;
	}
	}

	pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);

	rc = pcmk__connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main, 3);
	if (rc != pcmk_rc_ok) {
	crm_err("Error connecting to %s: %s",
	pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc));
	return false;
	}

	controld_set_fsa_input_flags(R_PE_CONNECTED);
	return true;
	}

	static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
	xmlNode output, void user_data);

	/* A_PE_START, A_PE_STOP, O_PE_RESTART */
	void
	do_pe_control(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	if (pcmk_is_set(action, A_PE_STOP)) {
	controld_clear_fsa_input_flags(R_PE_REQUIRED);
	pcmk_disconnect_ipc(schedulerd_api);
	handle_disconnect();
	}
	if (pcmk_is_set(action, A_PE_START)
	&& !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {

	if (cur_state == S_STOPPING) {
	crm_info("Ignoring request to connect to scheduler while shutting down");

	} else if (!new_schedulerd_ipc_connection()) {
	crm_warn("Could not connect to scheduler");
	register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
	}
	}
	}

	static int fsa_pe_query = 0;
	static mainloop_timer_t *controld_sched_timer = NULL;

	// @TODO Make this a configurable cluster option if there's demand for it
	#define SCHED_TIMEOUT_MS (120000)

	/*!
	* \internal
	* \brief Handle a timeout waiting for scheduler reply
	*
	* \param[in] user_data Ignored
	*
	* \return FALSE (indicating that timer should not be restarted)
	*/
	static gboolean
	controld_sched_timeout(gpointer user_data)
	{
	if (AM_I_DC) {
	/* If this node is the DC but can't communicate with the scheduler, just
	* exit (and likely get fenced) so this node doesn't interfere with any
	* further DC elections.
	*
	* @TODO We could try something less drastic first, like disconnecting
	* and reconnecting to the scheduler, but something is likely going
	* seriously wrong, so perhaps it's better to just fail as quickly as
	* possible.
	*/
	crmd_exit(CRM_EX_FATAL);
	}
	return FALSE;
	}

	void
	controld_stop_sched_timer(void)
	{
	if ((controld_sched_timer != NULL)
	&& (controld_globals.fsa_pe_ref != NULL)) {
	crm_trace("Stopping timer for scheduler reply %s",
	controld_globals.fsa_pe_ref);
	}
	mainloop_timer_stop(controld_sched_timer);
	}

	/*!
	* \internal
	* \brief Set the scheduler request currently being waited on
	*
	* \param[in] ref Request to expect reply to (or NULL for none)
	*
	* \note This function takes ownership of \p ref.
	*/
	void
	controld_expect_sched_reply(char *ref)
	{
	if (ref) {
	if (controld_sched_timer == NULL) {
	controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
	SCHED_TIMEOUT_MS, FALSE,
	controld_sched_timeout,
	NULL);
	}
	mainloop_timer_start(controld_sched_timer);
	} else {
	controld_stop_sched_timer();
	}
	free(controld_globals.fsa_pe_ref);
	controld_globals.fsa_pe_ref = ref;
	}

	/*!
	* \internal
	* \brief Free the scheduler reply timer
	*/
	void
	controld_free_sched_timer(void)
	{
	if (controld_sched_timer != NULL) {
	mainloop_timer_del(controld_sched_timer);
	controld_sched_timer = NULL;
	}
	}

	/* A_PE_INVOKE */
	void
	do_pe_invoke(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	cib_t *cib_conn = controld_globals.cib_conn;

	if (AM_I_DC == FALSE) {
	crm_err("Not invoking scheduler because not DC: %s",
	fsa_action2string(action));
	return;
	}

	if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
	if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
	crm_err("Cannot shut down gracefully without the scheduler");
	register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);

	} else {
	crm_info("Waiting for the scheduler to connect");
	crmd_fsa_stall(FALSE);
	controld_set_fsa_action_flags(A_PE_START);
	controld_trigger_fsa();
	}
	return;
	}

	if (cur_state != S_POLICY_ENGINE) {
	crm_notice("Not invoking scheduler because in state %s",
	fsa_state2string(cur_state));
	return;
	}
	if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
	crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");

	/* start the join from scratch */
	register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
	return;
	}

	fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_none);

	crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
	fsa_state2string(controld_globals.fsa_state));

	controld_expect_sched_reply(NULL);
	fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback);
	}

	static void
	force_local_option(xmlNode xml, const char attr_name, const char *attr_value)
	{
	int max = 0;
	int lpc = 0;
	const char *xpath_base = NULL;
	char *xpath_string = NULL;
	xmlXPathObjectPtr xpathObj = NULL;

	xpath_base = pcmk_cib_xpath_for(PCMK_XE_CRM_CONFIG);
	if (xpath_base == NULL) {
	crm_err(PCMK_XE_CRM_CONFIG " CIB element not known (bug?)");
	return;
	}

	xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']",
	xpath_base, PCMK_XE_CLUSTER_PROPERTY_SET,
	attr_name);
	xpathObj = xpath_search(xml, xpath_string);
	max = numXpathResults(xpathObj);
	free(xpath_string);

	for (lpc = 0; lpc < max; lpc++) {
	xmlNode *match = getXpathResult(xpathObj, lpc);
	crm_trace("Forcing %s/%s = %s",
	pcmk__xe_id(match), attr_name, attr_value);
	crm_xml_add(match, PCMK_XA_VALUE, attr_value);
	}

	if(max == 0) {
	xmlNode *configuration = NULL;
	xmlNode *crm_config = NULL;
	xmlNode *cluster_property_set = NULL;

	crm_trace("Creating %s-%s for %s=%s",
	PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name, attr_name,
	attr_value);

	configuration = pcmk__xe_first_child(xml, PCMK_XE_CONFIGURATION, NULL,
	NULL);
	if (configuration == NULL) {
	configuration = pcmk__xe_create(xml, PCMK_XE_CONFIGURATION);
	}

	crm_config = pcmk__xe_first_child(configuration, PCMK_XE_CRM_CONFIG,
	NULL, NULL);
	if (crm_config == NULL) {
	crm_config = pcmk__xe_create(configuration, PCMK_XE_CRM_CONFIG);
	}

	cluster_property_set =
	pcmk__xe_first_child(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET, NULL,
	NULL);
	if (cluster_property_set == NULL) {
	cluster_property_set =
	pcmk__xe_create(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET);
	crm_xml_add(cluster_property_set, PCMK_XA_ID,
	PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS);
	}

	xml = pcmk__xe_create(cluster_property_set, PCMK_XE_NVPAIR);

	pcmk__xe_set_id(xml, "%s-%s",
	PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name);
	crm_xml_add(xml, PCMK_XA_NAME, attr_name);
	crm_xml_add(xml, PCMK_XA_VALUE, attr_value);
	}
	freeXpathObject(xpathObj);
	}

	static void
	do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	char *ref = NULL;
	pid_t watchdog = pcmk__locate_sbd();

	if (rc != pcmk_ok) {
	crm_err("Could not retrieve the Cluster Information Base: %s "
	QB_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
	register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
	return;

	} else if (call_id != fsa_pe_query) {
	crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
	return;

	} else if (!AM_I_DC
	\|\| !pcmk_is_set(controld_globals.fsa_input_register,
	R_PE_CONNECTED)) {
	crm_debug("No need to invoke the scheduler anymore");
	return;

	} else if (controld_globals.fsa_state != S_POLICY_ENGINE) {
	crm_debug("Discarding scheduler request in state: %s",
	fsa_state2string(controld_globals.fsa_state));
	return;

	/* this callback counts as 1 */
	} else if (num_cib_op_callbacks() > 1) {
	crm_debug("Re-asking for the CIB: %d other peer updates still pending",
	(num_cib_op_callbacks() - 1));
	sleep(1);
	controld_set_fsa_action_flags(A_PE_INVOKE);
	controld_trigger_fsa();
	return;
	}

	CRM_LOG_ASSERT(output != NULL);

	/* Refresh the remote node cache and the known node cache when the
	* scheduler is invoked */
	pcmk__refresh_node_caches_from_cib(output);

	crm_xml_add(output, PCMK_XA_DC_UUID, controld_globals.our_uuid);
	pcmk__xe_set_bool_attr(output, PCMK_XA_HAVE_QUORUM,
	pcmk_is_set(controld_globals.flags,
	controld_has_quorum));

	force_local_option(output, PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog));

	if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum)
	- && !crm_have_quorum) {
	+ && !pcmk__cluster_has_quorum()) {
	+
	crm_xml_add_int(output, PCMK_XA_NO_QUORUM_PANIC, 1);
	}

	rc = pcmk_rc2legacy(pcmk_schedulerd_api_graph(schedulerd_api, output, &ref));

	if (rc < 0) {
	crm_err("Could not contact the scheduler: %s " QB_XS " rc=%d",
	pcmk_strerror(rc), rc);
	register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
	} else {
	CRM_ASSERT(ref != NULL);
	controld_expect_sched_reply(ref);
	crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, "
	"quorate=%s",
	fsa_pe_query, controld_globals.fsa_pe_ref, crm_peer_seq,
	pcmk__flag_text(controld_globals.flags, controld_has_quorum));
	}
	}
	diff --git a/include/crm/cluster.h b/include/crm/cluster.h
	index 778c4baa3d..98fbe831c9 100644
	--- a/include/crm/cluster.h
	+++ b/include/crm/cluster.h
	@@ -1,265 +1,261 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#ifndef PCMK__CRM_CLUSTER__H
	# define PCMK__CRM_CLUSTER__H

	# include <stdint.h> // uint32_t, uint64_t
	# include <glib.h> // gboolean, GHashTable
	# include <libxml/tree.h> // xmlNode
	# include <crm/common/xml.h>
	# include <crm/common/util.h>

	#ifdef __cplusplus
	extern "C" {
	#endif

	# if SUPPORT_COROSYNC
	# include <corosync/cpg.h>
	# endif

	-// @COMPAT Make this internal when we can break API backward compatibility
	-//! \deprecated Do not use (public access will be removed in a future release)
	-extern gboolean crm_have_quorum;
	-
	// @COMPAT Make this internal when we can break API backward compatibility
	//! \deprecated Do not use (public access will be removed in a future release)
	extern GHashTable *crm_peer_cache;

	// @COMPAT Make this internal when we can break API backward compatibility
	//! \deprecated Do not use (public access will be removed in a future release)
	extern GHashTable *crm_remote_peer_cache;

	// @COMPAT Make this internal when we can break API backward compatibility
	//! \deprecated Do not use (public access will be removed in a future release)
	extern unsigned long long crm_peer_seq;

	// @COMPAT Make this internal when we can break API backward compatibility
	//! \deprecated Do not use (public access will be removed in a future release)
	#define CRM_NODE_LOST "lost"

	// @COMPAT Make this internal when we can break API backward compatibility
	//! \deprecated Do not use (public access will be removed in a future release)
	#define CRM_NODE_MEMBER "member"

	// @COMPAT Make this internal when we can break API backward compatibility
	//!@{
	//! \deprecated Do not use (public access will be removed in a future release)
	enum crm_join_phase {
	/* @COMPAT: crm_join_nack_quiet can be replaced by crm_node_t:user_data
	* at a compatibility break.
	*/
	//! Not allowed to join, but don't send a nack message
	crm_join_nack_quiet = -2,

	crm_join_nack = -1,
	crm_join_none = 0,
	crm_join_welcomed = 1,
	crm_join_integrated = 2,
	crm_join_finalized = 3,
	crm_join_confirmed = 4,
	};
	//!@}

	// @COMPAT Make this internal when we can break API backward compatibility
	//!@{
	//! \deprecated Do not use (public access will be removed in a future release)
	enum crm_node_flags {
	/* Node is not a cluster node and should not be considered for cluster
	* membership
	*/
	crm_remote_node = (1U << 0),

	// Node's cache entry is dirty
	crm_node_dirty = (1U << 1),
	};
	//!@}

	// @COMPAT Make this internal when we can break API backward compatibility
	//!@{
	//! \deprecated Do not use (public access will be removed in a future release)
	typedef struct crm_peer_node_s {
	char *uname; // Node name as known to cluster

	/* @COMPAT This is less than ideal since the value is not a valid XML ID
	* (for Corosync, it's the string equivalent of the node's numeric node ID,
	* but XML IDs can't start with a number) and the three elements should have
	* different IDs.
	*
	* Ideally, we would use something like node-NODEID, node_state-NODEID, and
	* transient_attributes-NODEID as the element IDs. Unfortunately changing it
	* would be impractical due to backward compatibility; older nodes in a
	* rolling upgrade will always write and expect the value in the old format.
	*
	* This is also named poorly, since the value is not a UUID, but at least
	* that can be changed at an API compatibility break.
	*/
	/*! Value of the PCMK_XA_ID XML attribute to use with the node's
	* PCMK_XE_NODE, PCMK_XE_NODE_STATE, and PCMK_XE_TRANSIENT_ATTRIBUTES
	* XML elements in the CIB
	*/
	char *uuid;

	char *state; // @TODO change to enum
	uint64_t flags; // Bitmask of crm_node_flags
	uint64_t last_seen; // Only needed by cluster nodes
	uint32_t processes; // @TODO most not needed, merge into flags

	/* @TODO When we can break public API compatibility, we can make the rest of
	* these members separate structs and use void *cluster_data and
	* void *user_data here instead, to abstract the cluster layer further.
	*/

	// Currently only needed by corosync stack
	uint32_t id; // Node ID
	time_t when_lost; // When CPG membership was last lost

	// Only used by controller
	enum crm_join_phase join;
	char *expected;

	time_t peer_lost;
	char *conn_host;

	time_t when_member; // Since when node has been a cluster member
	time_t when_online; // Since when peer has been online in CPG
	} crm_node_t;
	//!@}

	// Implementation of pcmk_cluster_t
	// @COMPAT Make this internal when we can break API backward compatibility
	//!@{
	//! \deprecated Do not use (public access will be removed in a future release)
	struct crm_cluster_s {
	char *uuid;
	char *uname;
	uint32_t nodeid;

	// NOTE: sbd (as of at least 1.5.2) uses this
	//! \deprecated Call pcmk_cluster_set_destroy_fn() to set this
	void (*destroy) (gpointer);

	# if SUPPORT_COROSYNC
	/* @TODO When we can break public API compatibility, make these members a
	* separate struct and use void *cluster_data here instead, to abstract the
	* cluster layer further.
	*/
	struct cpg_name group;

	// NOTE: sbd (as of at least 1.5.2) uses this
	/*!
	* \deprecated Call pcmk_cpg_set_deliver_fn() and pcmk_cpg_set_confchg_fn()
	* to set these
	*/
	cpg_callbacks_t cpg;

	cpg_handle_t cpg_handle;
	# endif

	};
	//!@}

	//! Connection to a cluster layer
	typedef struct crm_cluster_s pcmk_cluster_t;

	int pcmk_cluster_connect(pcmk_cluster_t *cluster);
	int pcmk_cluster_disconnect(pcmk_cluster_t *cluster);

	pcmk_cluster_t *pcmk_cluster_new(void);
	void pcmk_cluster_free(pcmk_cluster_t *cluster);

	int pcmk_cluster_set_destroy_fn(pcmk_cluster_t cluster, void (fn)(gpointer));
	#if SUPPORT_COROSYNC
	int pcmk_cpg_set_deliver_fn(pcmk_cluster_t *cluster, cpg_deliver_fn_t fn);
	int pcmk_cpg_set_confchg_fn(pcmk_cluster_t *cluster, cpg_confchg_fn_t fn);
	#endif // SUPPORT_COROSYNC

	/* @COMPAT Make this internal when we can break API backward compatibility. Also
	* evaluate whether we can drop this entirely. Since 2.0.0, we have sent only
	* messages with crm_class_cluster.
	*/
	//!@{
	//! \deprecated Do not use (public access will be removed in a future release)
	enum crm_ais_msg_class {
	crm_class_cluster = 0,
	};
	//!@}

	// @COMPAT Make this internal when we can break API backward compatibility
	//!@{
	//! \deprecated Do not use (public access will be removed in a future release)
	enum crm_ais_msg_types {
	crm_msg_none = 0,
	crm_msg_ais = 1, // Unused
	crm_msg_lrmd = 2,
	crm_msg_cib = 3,
	crm_msg_crmd = 4,
	crm_msg_attrd = 5,
	crm_msg_stonithd = 6, // Unused
	crm_msg_te = 7, // Unused
	crm_msg_pe = 8, // Unused
	crm_msg_stonith_ng = 9,
	};
	//!@}

	// @COMPAT Make this internal when we can break API backward compatibility
	//!@{
	//! \deprecated Do not use (public access will be removed in a future release)
	enum crm_status_type {
	crm_status_uname,
	crm_status_nstate,
	crm_status_processes,
	};
	//!@}

	/*!
	* \enum pcmk_cluster_layer
	* \brief Types of cluster layer
	*/
	enum pcmk_cluster_layer {
	pcmk_cluster_layer_unknown = 1, //!< Unknown cluster layer
	pcmk_cluster_layer_invalid = 2, //!< Invalid cluster layer
	pcmk_cluster_layer_corosync = 32, //!< Corosync Cluster Engine
	};

	enum pcmk_cluster_layer pcmk_get_cluster_layer(void);
	const char *pcmk_cluster_layer_text(enum pcmk_cluster_layer layer);

	/*
	* \brief Get log-friendly string equivalent of a join phase
	*
	* \param[in] phase Join phase
	*
	* \return Log-friendly string equivalent of \p phase
	*/
	//! \deprecated Do not use (public access will be removed in a future release)
	static inline const char *
	crm_join_phase_str(enum crm_join_phase phase)
	{
	switch (phase) {
	case crm_join_nack_quiet: return "nack_quiet";
	case crm_join_nack: return "nack";
	case crm_join_none: return "none";
	case crm_join_welcomed: return "welcomed";
	case crm_join_integrated: return "integrated";
	case crm_join_finalized: return "finalized";
	case crm_join_confirmed: return "confirmed";
	default: return "invalid";
	}
	}

	#if !defined(PCMK_ALLOW_DEPRECATED) \|\| (PCMK_ALLOW_DEPRECATED == 1)
	#include <crm/cluster/compat.h>
	#endif

	#ifdef __cplusplus
	}
	#endif

	#endif
	diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
	index afc6e843c8..39f012bc61 100644
	--- a/include/crm/cluster/internal.h
	+++ b/include/crm/cluster/internal.h
	@@ -1,193 +1,195 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#ifndef PCMK__CRM_CLUSTER_INTERNAL__H
	# define PCMK__CRM_CLUSTER_INTERNAL__H

	# include <stdbool.h>
	# include <stdint.h> // uint32_t, uint64_t

	# include <glib.h> // gboolean

	# include <crm/cluster.h>

	enum crm_proc_flag {
	/* @COMPAT When crm_node_t:processes is made internal, we can merge this
	* into node flags or turn it into a boolean. Until then, in theory
	* something could depend on these particular numeric values.
	*/
	crm_proc_none = 0x00000001,

	// Cluster layers
	crm_proc_cpg = 0x04000000,
	};

	// Used with node cache search functions
	enum pcmk__node_search_flags {
	//! Does not affect search
	pcmk__node_search_none = 0,

	//! Search for cluster nodes from membership cache
	pcmk__node_search_cluster_member = (1 << 0),

	//! Search for remote nodes
	pcmk__node_search_remote = (1 << 1),

	//! Search for cluster member nodes and remote nodes
	pcmk__node_search_any = pcmk__node_search_cluster_member
	\|pcmk__node_search_remote,

	//! Search for cluster nodes from CIB (as of last cache refresh)
	pcmk__node_search_cluster_cib = (1 << 2),
	};

	/*!
	* \internal
	* \brief Return the process bit corresponding to the current cluster stack
	*
	* \return Process flag if detectable, otherwise 0
	*/
	static inline uint32_t
	crm_get_cluster_proc(void)
	{
	switch (pcmk_get_cluster_layer()) {
	case pcmk_cluster_layer_corosync:
	return crm_proc_cpg;

	default:
	break;
	}
	return crm_proc_none;
	}

	/*!
	* \internal
	* \brief Get log-friendly string description of a Corosync return code
	*
	* \param[in] error Corosync return code
	*
	* \return Log-friendly string description corresponding to \p error
	*/
	static inline const char *
	pcmk__cs_err_str(int error)
	{
	# if SUPPORT_COROSYNC
	switch (error) {
	case CS_OK: return "OK";
	case CS_ERR_LIBRARY: return "Library error";
	case CS_ERR_VERSION: return "Version error";
	case CS_ERR_INIT: return "Initialization error";
	case CS_ERR_TIMEOUT: return "Timeout";
	case CS_ERR_TRY_AGAIN: return "Try again";
	case CS_ERR_INVALID_PARAM: return "Invalid parameter";
	case CS_ERR_NO_MEMORY: return "No memory";
	case CS_ERR_BAD_HANDLE: return "Bad handle";
	case CS_ERR_BUSY: return "Busy";
	case CS_ERR_ACCESS: return "Access error";
	case CS_ERR_NOT_EXIST: return "Doesn't exist";
	case CS_ERR_NAME_TOO_LONG: return "Name too long";
	case CS_ERR_EXIST: return "Exists";
	case CS_ERR_NO_SPACE: return "No space";
	case CS_ERR_INTERRUPT: return "Interrupt";
	case CS_ERR_NAME_NOT_FOUND: return "Name not found";
	case CS_ERR_NO_RESOURCES: return "No resources";
	case CS_ERR_NOT_SUPPORTED: return "Not supported";
	case CS_ERR_BAD_OPERATION: return "Bad operation";
	case CS_ERR_FAILED_OPERATION: return "Failed operation";
	case CS_ERR_MESSAGE_ERROR: return "Message error";
	case CS_ERR_QUEUE_FULL: return "Queue full";
	case CS_ERR_QUEUE_NOT_AVAILABLE: return "Queue not available";
	case CS_ERR_BAD_FLAGS: return "Bad flags";
	case CS_ERR_TOO_BIG: return "Too big";
	case CS_ERR_NO_SECTIONS: return "No sections";
	}
	# endif
	return "Corosync error";
	}

	# if SUPPORT_COROSYNC

	#if 0
	/* This is the new way to do it, but we still support all Corosync 2 versions,
	* and this isn't always available. A better alternative here would be to check
	* for support in the configure script and enable this conditionally.
	*/
	#define pcmk__init_cmap(handle) cmap_initialize_map((handle), CMAP_MAP_ICMAP)
	#else
	#define pcmk__init_cmap(handle) cmap_initialize(handle)
	#endif

	char *pcmk__corosync_cluster_name(void);
	bool pcmk__corosync_add_nodes(xmlNode *xml_parent);

	void pcmk__cpg_confchg_cb(cpg_handle_t handle,
	const struct cpg_name *group_name,
	const struct cpg_address *member_list,
	size_t member_list_entries,
	const struct cpg_address *left_list,
	size_t left_list_entries,
	const struct cpg_address *joined_list,
	size_t joined_list_entries);

	char *pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id,
	uint32_t pid, void content, uint32_t kind,
	const char **from);

	# endif

	const char pcmk__cluster_node_uuid(crm_node_t node);
	char *pcmk__cluster_node_name(uint32_t nodeid);
	const char *pcmk__cluster_local_node_name(void);
	const char pcmk__node_name_from_uuid(const char uuid);

	crm_node_t crm_update_peer_proc(const char source, crm_node_t * peer,
	uint32_t flag, const char *status);
	crm_node_t pcmk__update_peer_state(const char source, crm_node_t *node,
	const char *state, uint64_t membership);

	void pcmk__update_peer_expected(const char source, crm_node_t node,
	const char *expected);
	void pcmk__reap_unseen_nodes(uint64_t ring_id);

	void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
	gboolean),
	void (*destroy) (gpointer));

	enum crm_ais_msg_types pcmk__cluster_parse_msg_type(const char *text);
	bool pcmk__cluster_send_message(const crm_node_t *node,
	enum crm_ais_msg_types service,
	const xmlNode *data);

	// Membership

	+bool pcmk__cluster_has_quorum(void);
	+
	void pcmk__cluster_init_node_caches(void);
	void pcmk__cluster_destroy_node_caches(void);

	void pcmk__cluster_set_autoreap(bool enable);
	void pcmk__cluster_set_status_callback(void (*dispatch)(enum crm_status_type,
	crm_node_t *,
	const void *));

	bool pcmk__cluster_is_node_active(const crm_node_t *node);
	unsigned int pcmk__cluster_num_active_nodes(void);
	unsigned int pcmk__cluster_num_remote_nodes(void);

	crm_node_t pcmk__cluster_lookup_remote_node(const char node_name);
	void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name);
	void pcmk__cluster_forget_remote_node(const char *node_name);
	crm_node_t pcmk__search_node_caches(unsigned int id, const char uname,
	uint32_t flags);
	void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id);

	void pcmk__refresh_node_caches_from_cib(xmlNode *cib);

	crm_node_t pcmk__get_node(unsigned int id, const char uname,
	const char *uuid, uint32_t flags);

	#endif // PCMK__CRM_CLUSTER_INTERNAL__H
	diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c
	index 833ba1d990..fe2b476129 100644
	--- a/lib/cluster/corosync.c
	+++ b/lib/cluster/corosync.c
	@@ -1,811 +1,815 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <arpa/inet.h>
	#include <inttypes.h> // PRIu64, PRIx32
	#include <netdb.h>
	#include <netinet/in.h>
	#include <stdbool.h>
	#include <sys/socket.h>
	#include <sys/utsname.h>

	#include <bzlib.h>
	#include <corosync/cfg.h>
	#include <corosync/cmap.h>
	#include <corosync/corodefs.h>
	#include <corosync/corotypes.h>
	#include <corosync/hdb.h>
	#include <corosync/quorum.h>
	#include <qb/qbipcc.h>
	#include <qb/qbutil.h>

	#include <crm/cluster/internal.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipc_internal.h> // PCMK__SPECIAL_PID
	#include <crm/common/mainloop.h>
	#include <crm/common/xml.h>

	#include "crmcluster_private.h"

	static quorum_handle_t pcmk_quorum_handle = 0;

	static gboolean (*quorum_app_callback)(unsigned long long seq,
	gboolean quorate) = NULL;

	/*!
	* \internal
	* \brief Get the Corosync UUID associated with a Pacemaker node
	*
	* \param[in] node Pacemaker node
	*
	* \return Newly allocated string with node's Corosync UUID, or NULL if unknown
	* \note It is the caller's responsibility to free the result with free().
	*/
	char *
	pcmk__corosync_uuid(const crm_node_t *node)
	{
	CRM_ASSERT(pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync);

	if (node != NULL) {
	if (node->id > 0) {
	return crm_strdup_printf("%u", node->id);
	} else {
	crm_info("Node %s is not yet known by Corosync", node->uname);
	}
	}
	return NULL;
	}

	static bool
	node_name_is_valid(const char key, const char name)
	{
	int octet;

	if (name == NULL) {
	crm_trace("%s is empty", key);
	return false;

	} else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) {
	crm_trace("%s contains an IPv4 address (%s), ignoring", key, name);
	return false;

	} else if (strstr(name, ":") != NULL) {
	crm_trace("%s contains an IPv6 address (%s), ignoring", key, name);
	return false;
	}
	crm_trace("'%s: %s' is valid", key, name);
	return true;
	}

	/*
	* \internal
	* \brief Get Corosync node name corresponding to a node ID
	*
	* \param[in] cmap_handle Connection to Corosync CMAP
	* \param[in] nodeid Node ID to check
	*
	* \return Newly allocated string with name or (if no name) IP address
	* associated with first address assigned to a Corosync node ID (or NULL
	* if unknown)
	* \note It is the caller's responsibility to free the result with free().
	*/
	char *
	pcmk__corosync_name(uint64_t /cmap_handle_t / cmap_handle, uint32_t nodeid)
	{
	// Originally based on corosync-quorumtool.c:node_name()

	int lpc = 0;
	cs_error_t rc = CS_OK;
	int retries = 0;
	char *name = NULL;
	cmap_handle_t local_handle = 0;
	int fd = -1;
	uid_t found_uid = 0;
	gid_t found_gid = 0;
	pid_t found_pid = 0;
	int rv;

	if (nodeid == 0) {
	nodeid = pcmk__cpg_local_nodeid(0);
	}

	if (cmap_handle == 0 && local_handle == 0) {
	retries = 0;
	crm_trace("Initializing CMAP connection");
	do {
	rc = pcmk__init_cmap(&local_handle);
	if (rc != CS_OK) {
	retries++;
	crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
	retries);
	sleep(retries);
	}

	} while (retries < 5 && rc != CS_OK);

	if (rc != CS_OK) {
	crm_warn("Could not connect to Cluster Configuration Database API, error %s",
	cs_strerror(rc));
	local_handle = 0;
	}
	}

	if (cmap_handle == 0) {
	cmap_handle = local_handle;

	rc = cmap_fd_get(cmap_handle, &fd);
	if (rc != CS_OK) {
	crm_err("Could not obtain the CMAP API connection: %s (%d)",
	cs_strerror(rc), rc);
	goto bail;
	}

	/* CMAP provider run as root (in given user namespace, anyway)? */
	if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
	&found_uid, &found_gid))) {
	crm_err("CMAP provider is not authentic:"
	" process %lld (uid: %lld, gid: %lld)",
	(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
	(long long) found_uid, (long long) found_gid);
	goto bail;
	} else if (rv < 0) {
	crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
	strerror(-rv), -rv);
	goto bail;
	}
	}

	while (name == NULL && cmap_handle != 0) {
	uint32_t id = 0;
	char *key = NULL;

	key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
	rc = cmap_get_uint32(cmap_handle, key, &id);
	crm_trace("Checking %u vs %u from %s", nodeid, id, key);
	free(key);

	if (rc != CS_OK) {
	break;
	}

	if (nodeid == id) {
	crm_trace("Searching for node name for %u in nodelist.node.%d %s",
	nodeid, lpc, pcmk__s(name, "<null>"));
	if (name == NULL) {
	key = crm_strdup_printf("nodelist.node.%d.name", lpc);
	cmap_get_string(cmap_handle, key, &name);
	crm_trace("%s = %s", key, pcmk__s(name, "<null>"));
	free(key);
	}
	if (name == NULL) {
	key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc);
	cmap_get_string(cmap_handle, key, &name);
	crm_trace("%s = %s", key, pcmk__s(name, "<null>"));

	if (!node_name_is_valid(key, name)) {
	free(name);
	name = NULL;
	}
	free(key);
	}
	break;
	}

	lpc++;
	}

	bail:
	if(local_handle) {
	cmap_finalize(local_handle);
	}

	if (name == NULL) {
	crm_info("Unable to get node name for nodeid %u", nodeid);
	}
	return name;
	}

	/*!
	* \internal
	* \brief Disconnect from Corosync cluster
	*
	* \param[in,out] cluster Cluster object to disconnect
	*/
	void
	pcmk__corosync_disconnect(pcmk_cluster_t *cluster)
	{
	pcmk__cpg_disconnect(cluster);

	if (pcmk_quorum_handle != 0) {
	quorum_finalize(pcmk_quorum_handle);
	pcmk_quorum_handle = 0;
	}
	crm_notice("Disconnected from Corosync");
	}

	/*!
	* \internal
	* \brief Dispatch function for quorum connection file descriptor
	*
	* \param[in] user_data Ignored
	*
	* \return 0 on success, -1 on error (per mainloop_io_t interface)
	*/
	static int
	quorum_dispatch_cb(gpointer user_data)
	{
	int rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL);

	if (rc < 0) {
	crm_err("Connection to the Quorum API failed: %d", rc);
	quorum_finalize(pcmk_quorum_handle);
	pcmk_quorum_handle = 0;
	return -1;
	}
	return 0;
	}

	/*!
	* \internal
	* \brief Notification callback for Corosync quorum connection
	*
	* \param[in] handle Corosync quorum connection
	* \param[in] quorate Whether cluster is quorate
	* \param[in] ring_id Corosync ring ID
	* \param[in] view_list_entries Number of entries in \p view_list
	* \param[in] view_list Corosync node IDs in membership
	*/
	static void
	quorum_notification_cb(quorum_handle_t handle, uint32_t quorate,
	uint64_t ring_id, uint32_t view_list_entries,
	uint32_t *view_list)
	{
	int i;
	GHashTableIter iter;
	crm_node_t *node = NULL;
	static gboolean init_phase = TRUE;

	- if (quorate != crm_have_quorum) {
	- if (quorate) {
	- crm_notice("Quorum acquired " QB_XS " membership=%" PRIu64 " members=%lu",
	- ring_id, (long unsigned int)view_list_entries);
	- } else {
	- crm_warn("Quorum lost " QB_XS " membership=%" PRIu64 " members=%lu",
	- ring_id, (long unsigned int)view_list_entries);
	- }
	- crm_have_quorum = quorate;
	+ bool is_quorate = (quorate != 0);
	+ bool was_quorate = pcmk__cluster_has_quorum();
	+
	+ if (is_quorate && !was_quorate) {
	+ crm_notice("Quorum acquired " QB_XS " membership=%" PRIu64
	+ " members=%" PRIu32,
	+ ring_id, view_list_entries);
	+ pcmk__cluster_set_quorum(true);
	+
	+ } else if (!is_quorate && was_quorate) {
	+ crm_warn("Quorum lost " QB_XS " membership=%" PRIu64 " members=" PRIu32,
	+ ring_id, view_list_entries);
	+ pcmk__cluster_set_quorum(false);

	} else {
	- crm_info("Quorum %s " QB_XS " membership=%" PRIu64 " members=%lu",
	- (quorate? "retained" : "still lost"), ring_id,
	- (long unsigned int)view_list_entries);
	+ crm_info("Quorum %s " QB_XS " membership=%" PRIu64 " members=%" PRIu32,
	+ (is_quorate? "retained" : "still lost"), ring_id,
	+ view_list_entries);
	}

	if (view_list_entries == 0 && init_phase) {
	crm_info("Corosync membership is still forming, ignoring");
	return;
	}

	init_phase = FALSE;

	/* Reset last_seen for all cached nodes so we can tell which ones aren't
	* in the view list */
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	node->last_seen = 0;
	}

	/* Update the peer cache for each node in view list */
	for (i = 0; i < view_list_entries; i++) {
	uint32_t id = view_list[i];

	crm_debug("Member[%d] %u ", i, id);

	/* Get this node's peer cache entry (adding one if not already there) */
	node = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member);
	if (node->uname == NULL) {
	char *name = pcmk__corosync_name(0, id);

	crm_info("Obtaining name for new node %u", id);
	node = pcmk__get_node(id, name, NULL,
	pcmk__node_search_cluster_member);
	free(name);
	}

	/* Update the node state (including updating last_seen to ring_id) */
	pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, ring_id);
	}

	/* Remove any peer cache entries we didn't update */
	pcmk__reap_unseen_nodes(ring_id);

	if (quorum_app_callback) {
	- quorum_app_callback(ring_id, quorate);
	+ quorum_app_callback(ring_id, is_quorate);
	}
	}

	/*!
	* \internal
	* \brief Connect to Corosync quorum service
	*
	* \param[in] dispatch Connection dispatch callback
	* \param[in] destroy Connection destroy callback
	*/
	void
	pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
	gboolean),
	void (*destroy)(gpointer))
	{
	cs_error_t rc;
	int fd = 0;
	int quorate = 0;
	uint32_t quorum_type = 0;
	struct mainloop_fd_callbacks quorum_fd_callbacks;
	uid_t found_uid = 0;
	gid_t found_gid = 0;
	pid_t found_pid = 0;
	int rv;

	quorum_fd_callbacks.dispatch = quorum_dispatch_cb;
	quorum_fd_callbacks.destroy = destroy;

	crm_debug("Configuring Pacemaker to obtain quorum from Corosync");

	{
	#if 0
	// New way but not supported by all Corosync 2 versions
	quorum_model_v0_data_t quorum_model_data = {
	.model = QUORUM_MODEL_V0,
	.quorum_notify_fn = quorum_notification_cb,
	};

	rc = quorum_model_initialize(&pcmk_quorum_handle, QUORUM_MODEL_V0,
	(quorum_model_data_t *) &quorum_model_data,
	&quorum_type, NULL);
	#else
	quorum_callbacks_t quorum_callbacks = {
	.quorum_notify_fn = quorum_notification_cb,
	};

	rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks,
	&quorum_type);
	#endif
	}

	if (rc != CS_OK) {
	crm_err("Could not connect to the Quorum API: %s (%d)",
	cs_strerror(rc), rc);
	goto bail;

	} else if (quorum_type != QUORUM_SET) {
	crm_err("Corosync quorum is not configured");
	goto bail;
	}

	rc = quorum_fd_get(pcmk_quorum_handle, &fd);
	if (rc != CS_OK) {
	crm_err("Could not obtain the Quorum API connection: %s (%d)",
	strerror(rc), rc);
	goto bail;
	}

	/* Quorum provider run as root (in given user namespace, anyway)? */
	if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
	&found_uid, &found_gid))) {
	crm_err("Quorum provider is not authentic:"
	" process %lld (uid: %lld, gid: %lld)",
	(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
	(long long) found_uid, (long long) found_gid);
	rc = CS_ERR_ACCESS;
	goto bail;
	} else if (rv < 0) {
	crm_err("Could not verify authenticity of Quorum provider: %s (%d)",
	strerror(-rv), -rv);
	rc = CS_ERR_ACCESS;
	goto bail;
	}

	rc = quorum_getquorate(pcmk_quorum_handle, &quorate);
	if (rc != CS_OK) {
	crm_err("Could not obtain the current Quorum API state: %d", rc);
	goto bail;
	}

	if (quorate) {
	crm_notice("Quorum acquired");
	} else {
	crm_warn("No quorum");
	}
	quorum_app_callback = dispatch;
	- crm_have_quorum = quorate;
	+ pcmk__cluster_set_quorum(quorate != 0);

	rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES \| CS_TRACK_CURRENT);
	if (rc != CS_OK) {
	crm_err("Could not setup Quorum API notifications: %d", rc);
	goto bail;
	}

	mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks);

	pcmk__corosync_add_nodes(NULL);

	bail:
	if (rc != CS_OK) {
	quorum_finalize(pcmk_quorum_handle);
	}
	}

	/*!
	* \internal
	* \brief Connect to Corosync cluster layer
	*
	* \param[in,out] cluster Initialized cluster object to connect
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk__corosync_connect(pcmk_cluster_t *cluster)
	{
	crm_node_t *peer = NULL;
	const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
	const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
	int rc = pcmk_rc_ok;

	pcmk__cluster_init_node_caches();

	if (cluster_layer != pcmk_cluster_layer_corosync) {
	crm_err("Invalid cluster layer: %s " QB_XS " cluster_layer=%d",
	cluster_layer_s, cluster_layer);
	return EINVAL;
	}

	rc = pcmk__cpg_connect(cluster);
	if (rc != pcmk_rc_ok) {
	// Error message was logged by pcmk__cpg_connect()
	return rc;
	}
	crm_info("Connection to %s established", cluster_layer_s);

	cluster->nodeid = pcmk__cpg_local_nodeid(0);
	if (cluster->nodeid == 0) {
	crm_err("Could not determine local node ID");
	return ENXIO;
	}

	cluster->uname = pcmk__cluster_node_name(0);
	if (cluster->uname == NULL) {
	crm_err("Could not determine local node name");
	return ENXIO;
	}

	// Ensure local node always exists in peer cache
	peer = pcmk__get_node(cluster->nodeid, cluster->uname, NULL,
	pcmk__node_search_cluster_member);
	cluster->uuid = pcmk__corosync_uuid(peer);

	return pcmk_rc_ok;
	}

	/*!
	* \internal
	* \brief Check whether a Corosync cluster is active
	*
	* \return \c true if Corosync is found active, or \c false otherwise
	*/
	bool
	pcmk__corosync_is_active(void)
	{
	cmap_handle_t handle;
	int rc = pcmk__init_cmap(&handle);

	if (rc == CS_OK) {
	cmap_finalize(handle);
	return true;
	}

	crm_info("Failed to initialize the cmap API: %s (%d)",
	pcmk__cs_err_str(rc), rc);
	return false;
	}

	/*!
	* \internal
	* \brief Check whether a Corosync cluster peer is active
	*
	* \param[in] node Node to check
	*
	* \return \c true if \p node is an active Corosync peer, or \c false otherwise
	*/
	bool
	pcmk__corosync_is_peer_active(const crm_node_t *node)
	{
	if (node == NULL) {
	crm_trace("Corosync peer inactive: NULL");
	return false;
	}
	if (!pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_none)) {
	crm_trace("Corosync peer %s inactive: state=%s",
	node->uname, node->state);
	return false;
	}
	if (!pcmk_is_set(node->processes, crm_proc_cpg)) {
	crm_trace("Corosync peer %s inactive " QB_XS " processes=%.16" PRIx32,
	node->uname, node->processes);
	return false;
	}
	return true;
	}

	/*!
	* \internal
	* \brief Load Corosync node list (via CMAP) into peer cache and optionally XML
	*
	* \param[in,out] xml_parent If not NULL, add <node> entry here for each node
	*
	* \return true if any nodes were found, false otherwise
	*/
	bool
	pcmk__corosync_add_nodes(xmlNode *xml_parent)
	{
	int lpc = 0;
	cs_error_t rc = CS_OK;
	int retries = 0;
	bool any = false;
	cmap_handle_t cmap_handle;
	int fd = -1;
	uid_t found_uid = 0;
	gid_t found_gid = 0;
	pid_t found_pid = 0;
	int rv;

	do {
	rc = pcmk__init_cmap(&cmap_handle);
	if (rc != CS_OK) {
	retries++;
	crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
	retries);
	sleep(retries);
	}

	} while (retries < 5 && rc != CS_OK);

	if (rc != CS_OK) {
	crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
	return false;
	}

	rc = cmap_fd_get(cmap_handle, &fd);
	if (rc != CS_OK) {
	crm_err("Could not obtain the CMAP API connection: %s (%d)",
	cs_strerror(rc), rc);
	goto bail;
	}

	/* CMAP provider run as root (in given user namespace, anyway)? */
	if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
	&found_uid, &found_gid))) {
	crm_err("CMAP provider is not authentic:"
	" process %lld (uid: %lld, gid: %lld)",
	(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
	(long long) found_uid, (long long) found_gid);
	goto bail;
	} else if (rv < 0) {
	crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
	strerror(-rv), -rv);
	goto bail;
	}

	pcmk__cluster_init_node_caches();
	crm_trace("Initializing Corosync node list");
	for (lpc = 0; TRUE; lpc++) {
	uint32_t nodeid = 0;
	char *name = NULL;
	char *key = NULL;

	key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
	rc = cmap_get_uint32(cmap_handle, key, &nodeid);
	free(key);

	if (rc != CS_OK) {
	break;
	}

	name = pcmk__corosync_name(cmap_handle, nodeid);
	if (name != NULL) {
	GHashTableIter iter;
	crm_node_t *node = NULL;

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if(node && node->uname && strcasecmp(node->uname, name) == 0) {
	if (node->id && node->id != nodeid) {
	crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id,
	nodeid, name);
	crm_exit(CRM_EX_FATAL);
	}
	}
	}
	}

	if (nodeid > 0 \|\| name != NULL) {
	crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name);
	pcmk__get_node(nodeid, name, NULL, pcmk__node_search_cluster_member);
	}

	if (nodeid > 0 && name != NULL) {
	any = true;

	if (xml_parent) {
	xmlNode *node = pcmk__xe_create(xml_parent, PCMK_XE_NODE);

	pcmk__xe_set_id(node, "%u", nodeid);
	crm_xml_add(node, PCMK_XA_UNAME, name);
	}
	}

	free(name);
	}
	bail:
	cmap_finalize(cmap_handle);
	return any;
	}

	/*!
	* \internal
	* \brief Get cluster name from Corosync configuration (via CMAP)
	*
	* \return Newly allocated string with cluster name if configured, or NULL
	*/
	char *
	pcmk__corosync_cluster_name(void)
	{
	cmap_handle_t handle;
	char *cluster_name = NULL;
	cs_error_t rc = CS_OK;
	int fd = -1;
	uid_t found_uid = 0;
	gid_t found_gid = 0;
	pid_t found_pid = 0;
	int rv;

	rc = pcmk__init_cmap(&handle);
	if (rc != CS_OK) {
	crm_info("Failed to initialize the cmap API: %s (%d)",
	cs_strerror(rc), rc);
	return NULL;
	}

	rc = cmap_fd_get(handle, &fd);
	if (rc != CS_OK) {
	crm_err("Could not obtain the CMAP API connection: %s (%d)",
	cs_strerror(rc), rc);
	goto bail;
	}

	/* CMAP provider run as root (in given user namespace, anyway)? */
	if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
	&found_uid, &found_gid))) {
	crm_err("CMAP provider is not authentic:"
	" process %lld (uid: %lld, gid: %lld)",
	(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
	(long long) found_uid, (long long) found_gid);
	goto bail;
	} else if (rv < 0) {
	crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
	strerror(-rv), -rv);
	goto bail;
	}

	rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name);
	if (rc != CS_OK) {
	crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc);

	} else {
	crm_debug("cmap totem.cluster_name = '%s'", cluster_name);
	}

	bail:
	cmap_finalize(handle);
	return cluster_name;
	}

	/*!
	* \internal
	* \brief Check (via CMAP) whether Corosync configuration has a node list
	*
	* \return true if Corosync has node list, otherwise false
	*/
	bool
	pcmk__corosync_has_nodelist(void)
	{
	cs_error_t cs_rc = CS_OK;
	int retries = 0;
	cmap_handle_t cmap_handle;
	cmap_iter_handle_t iter_handle;
	char key_name[CMAP_KEYNAME_MAXLEN + 1];
	int fd = -1;
	uid_t found_uid = 0;
	gid_t found_gid = 0;
	pid_t found_pid = 0;
	int rc = pcmk_ok;

	static bool got_result = false;
	static bool result = false;

	if (got_result) {
	return result;
	}

	// Connect to CMAP
	do {
	cs_rc = pcmk__init_cmap(&cmap_handle);
	if (cs_rc != CS_OK) {
	retries++;
	crm_debug("CMAP connection failed: %s (rc=%d, retrying in %ds)",
	cs_strerror(cs_rc), cs_rc, retries);
	sleep(retries);
	}
	} while ((retries < 5) && (cs_rc != CS_OK));
	if (cs_rc != CS_OK) {
	crm_warn("Assuming Corosync does not have node list: "
	"CMAP connection failed (%s) " QB_XS " rc=%d",
	cs_strerror(cs_rc), cs_rc);
	return false;
	}

	// Get CMAP connection file descriptor
	cs_rc = cmap_fd_get(cmap_handle, &fd);
	if (cs_rc != CS_OK) {
	crm_warn("Assuming Corosync does not have node list: "
	"CMAP unusable (%s) " QB_XS " rc=%d",
	cs_strerror(cs_rc), cs_rc);
	goto bail;
	}

	// Check whether CMAP connection is authentic (i.e. provided by root)
	rc = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0,
	&found_pid, &found_uid, &found_gid);
	if (rc == 0) {
	crm_warn("Assuming Corosync does not have node list: "
	"CMAP provider is inauthentic "
	QB_XS " pid=%lld uid=%lld gid=%lld",
	(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
	(long long) found_uid, (long long) found_gid);
	goto bail;
	} else if (rc < 0) {
	crm_warn("Assuming Corosync does not have node list: "
	"Could not verify CMAP authenticity (%s) " QB_XS " rc=%d",
	pcmk_strerror(rc), rc);
	goto bail;
	}

	// Check whether nodelist section is presetn
	cs_rc = cmap_iter_init(cmap_handle, "nodelist", &iter_handle);
	if (cs_rc != CS_OK) {
	crm_warn("Assuming Corosync does not have node list: "
	"CMAP not readable (%s) " QB_XS " rc=%d",
	cs_strerror(cs_rc), cs_rc);
	goto bail;
	}

	cs_rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL);
	if (cs_rc == CS_OK) {
	result = true;
	}

	cmap_iter_finalize(cmap_handle, iter_handle);
	got_result = true;
	crm_debug("Corosync %s node list", (result? "has" : "does not have"));

	bail:
	cmap_finalize(cmap_handle);
	return result;
	}
	diff --git a/lib/cluster/crmcluster_private.h b/lib/cluster/crmcluster_private.h
	index 4d06f99cdc..a9675bbb04 100644
	--- a/lib/cluster/crmcluster_private.h
	+++ b/lib/cluster/crmcluster_private.h
	@@ -1,67 +1,70 @@
	/*
	* Copyright 2020-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#ifndef PCMK__CRMCLUSTER_PRIVATE__H
	# define PCMK__CRMCLUSTER_PRIVATE__H

	/* This header is for the sole use of libcrmcluster, so that functions can be
	* declared with G_GNUC_INTERNAL for efficiency.
	*/

	#include <stdint.h> // uint32_t, uint64_t

	#include <glib.h> // G_GNUC_INTERNAL, gboolean
	#include <libxml/tree.h> // xmlNode

	#if SUPPORT_COROSYNC
	#include <corosync/cpg.h> // cpg_handle_t
	#endif // SUPPORT_COROSYNC

	#include <crm/cluster.h> // crm_node_t

	+G_GNUC_INTERNAL
	+void pcmk__cluster_set_quorum(bool quorate);
	+
	#if SUPPORT_COROSYNC

	G_GNUC_INTERNAL
	bool pcmk__corosync_is_active(void);

	G_GNUC_INTERNAL
	bool pcmk__corosync_has_nodelist(void);

	G_GNUC_INTERNAL
	char pcmk__corosync_uuid(const crm_node_t peer);

	G_GNUC_INTERNAL
	char pcmk__corosync_name(uint64_t /cmap_handle_t */ cmap_handle,
	uint32_t nodeid);

	G_GNUC_INTERNAL
	int pcmk__corosync_connect(pcmk_cluster_t *cluster);

	G_GNUC_INTERNAL
	void pcmk__corosync_disconnect(pcmk_cluster_t *cluster);

	G_GNUC_INTERNAL
	bool pcmk__corosync_is_peer_active(const crm_node_t *node);

	G_GNUC_INTERNAL
	int pcmk__cpg_connect(pcmk_cluster_t *cluster);

	G_GNUC_INTERNAL
	void pcmk__cpg_disconnect(pcmk_cluster_t *cluster);

	G_GNUC_INTERNAL
	uint32_t pcmk__cpg_local_nodeid(cpg_handle_t handle);

	G_GNUC_INTERNAL
	bool pcmk__cpg_send_xml(const xmlNode msg, const crm_node_t node,
	enum crm_ais_msg_types dest);

	#endif // SUPPORT_COROSYNC

	#endif // PCMK__CRMCLUSTER_PRIVATE__H
	diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
	index f792097f6b..2c354f5986 100644
	--- a/lib/cluster/membership.c
	+++ b/lib/cluster/membership.c
	@@ -1,1454 +1,1481 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#ifndef _GNU_SOURCE
	# define _GNU_SOURCE
	#endif

	#include <inttypes.h> // PRIu32
	+#include <stdbool.h> // bool
	+#include <stdio.h>
	+#include <string.h>
	#include <sys/param.h>
	#include <sys/types.h>
	-#include <stdio.h>
	#include <unistd.h>
	-#include <string.h>
	+
	#include <glib.h>
	+
	#include <crm/common/ipc.h>
	#include <crm/common/xml_internal.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/xml.h>
	#include <crm/stonith-ng.h>
	#include "crmcluster_private.h"

	/* The peer cache remembers cluster nodes that have been seen.
	* This is managed mostly automatically by libcluster, based on
	* cluster membership events.
	*
	* Because cluster nodes can have conflicting names or UUIDs,
	* the hash table key is a uniquely generated ID.
	*
	* @COMPAT When this is internal, rename to cluster_node_member_cache and make
	* static.
	*/
	GHashTable *crm_peer_cache = NULL;

	/*
	* The remote peer cache tracks pacemaker_remote nodes. While the
	* value has the same type as the peer cache's, it is tracked separately for
	* three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
	* so the name (which is also the UUID) is used as the hash table key; there
	* is no equivalent of membership events, so management is not automatic; and
	* most users of the peer cache need to exclude pacemaker_remote nodes.
	*
	* That said, using a single cache would be more logical and less error-prone,
	* so it would be a good idea to merge them one day.
	*
	* libcluster provides two avenues for populating the cache:
	* pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node()
	* directly manage it, while refresh_remote_nodes() populates it via the CIB.
	*/
	GHashTable *crm_remote_peer_cache = NULL;

	/*
	* The CIB cluster node cache tracks cluster nodes that have been seen in
	* the CIB. It is useful mainly when a caller needs to know about a node that
	* may no longer be in the membership, but doesn't want to add the node to the
	* main peer cache tables.
	*/
	static GHashTable *cluster_node_cib_cache = NULL;

	unsigned long long crm_peer_seq = 0;
	-gboolean crm_have_quorum = FALSE;
	static bool autoreap = true;
	+static bool has_quorum = false;

	// Flag setting and clearing for crm_node_t:flags

	#define set_peer_flags(peer, flags_to_set) do { \
	(peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
	"Peer", (peer)->uname, \
	(peer)->flags, (flags_to_set), \
	#flags_to_set); \
	} while (0)

	#define clear_peer_flags(peer, flags_to_clear) do { \
	(peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
	LOG_TRACE, \
	"Peer", (peer)->uname, \
	(peer)->flags, (flags_to_clear), \
	#flags_to_clear); \
	} while (0)

	static void update_peer_uname(crm_node_t node, const char uname);
	static crm_node_t find_cib_cluster_node(const char id, const char *uname);

	+/*!
	+ * \internal
	+ * \brief Check whether the cluster currently has quorum
	+ *
	+ * \return \c true if the cluster has quorum, or \c false otherwise
	+ */
	+bool
	+pcmk__cluster_has_quorum(void)
	+{
	+ return has_quorum;
	+}
	+
	+/*!
	+ * \internal
	+ * \brief Set whether the cluster currently has quorum
	+ *
	+ * \param[in] quorate \c true if the cluster has quorum, or \c false otherwise
	+ */
	+void
	+pcmk__cluster_set_quorum(bool quorate)
	+{
	+ has_quorum = quorate;
	+}
	+
	/*!
	* \internal
	* \brief Get the number of Pacemaker Remote nodes that have been seen
	*
	* \return Number of cached Pacemaker Remote nodes
	*/
	unsigned int
	pcmk__cluster_num_remote_nodes(void)
	{
	if (crm_remote_peer_cache == NULL) {
	return 0U;
	}
	return g_hash_table_size(crm_remote_peer_cache);
	}

	/*!
	* \internal
	* \brief Get a remote node cache entry, creating it if necessary
	*
	* \param[in] node_name Name of remote node
	*
	* \return Cache entry for node on success, or \c NULL (and set \c errno)
	* otherwise
	*
	* \note When creating a new entry, this will leave the node state undetermined.
	* The caller should also call \c pcmk__update_peer_state() if the state
	* is known.
	* \note Because this can add and remove cache entries, callers should not
	* assume any previously obtained cache entry pointers remain valid.
	*/
	crm_node_t *
	pcmk__cluster_lookup_remote_node(const char *node_name)
	{
	crm_node_t *node;
	char *node_name_copy = NULL;

	if (node_name == NULL) {
	errno = EINVAL;
	return NULL;
	}

	/* It's theoretically possible that the node was added to the cluster peer
	* cache before it was known to be a Pacemaker Remote node. Remove that
	* entry unless it has a node ID, which means the name actually is
	* associated with a cluster node. (@TODO return an error in that case?)
	*/
	node = pcmk__search_node_caches(0, node_name,
	pcmk__node_search_cluster_member);
	if ((node != NULL) && (node->uuid == NULL)) {
	/* node_name could be a pointer into the cache entry being removed, so
	* reassign it to a copy before the original gets freed
	*/
	node_name_copy = strdup(node_name);
	if (node_name_copy == NULL) {
	errno = ENOMEM;
	return NULL;
	}
	node_name = node_name_copy;
	pcmk__cluster_forget_cluster_node(0, node_name);
	}

	/* Return existing cache entry if one exists */
	node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
	if (node) {
	free(node_name_copy);
	return node;
	}

	/* Allocate a new entry */
	node = calloc(1, sizeof(crm_node_t));
	if (node == NULL) {
	free(node_name_copy);
	return NULL;
	}

	/* Populate the essential information */
	set_peer_flags(node, crm_remote_node);
	node->uuid = strdup(node_name);
	if (node->uuid == NULL) {
	free(node);
	errno = ENOMEM;
	free(node_name_copy);
	return NULL;
	}

	/* Add the new entry to the cache */
	g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
	crm_trace("added %s to remote cache", node_name);

	/* Update the entry's uname, ensuring peer status callbacks are called */
	update_peer_uname(node, node_name);
	free(node_name_copy);
	return node;
	}

	/*!
	* \internal
	* \brief Remove a node from the Pacemaker Remote node cache
	*
	* \param[in] node_name Name of node to remove from cache
	*
	* \note The caller must be careful not to use \p node_name after calling this
	* function if it might be a pointer into the cache entry being removed.
	*/
	void
	pcmk__cluster_forget_remote_node(const char *node_name)
	{
	/* Do a lookup first, because node_name could be a pointer within the entry
	* being removed -- we can't log it after removing it.
	*/
	if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) {
	crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
	g_hash_table_remove(crm_remote_peer_cache, node_name);
	}
	}

	/*!
	* \internal
	* \brief Return node status based on a CIB status entry
	*
	* \param[in] node_state XML of node state
	*
	* \return \c CRM_NODE_MEMBER if \c PCMK__XA_IN_CCM is true in
	* \c PCMK__XE_NODE_STATE, or \c CRM_NODE_LOST otherwise
	*/
	static const char *
	remote_state_from_cib(const xmlNode *node_state)
	{
	bool in_ccm = false;

	if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM,
	&in_ccm) == pcmk_rc_ok) && in_ccm) {
	return CRM_NODE_MEMBER;
	}
	return CRM_NODE_LOST;
	}

	/* user data for looping through remote node xpath searches */
	struct refresh_data {
	const char field; / XML attribute to check for node name */
	gboolean has_state; /* whether to update node state based on XML */
	};

	/*!
	* \internal
	* \brief Process one pacemaker_remote node xpath search result
	*
	* \param[in] result XML search result
	* \param[in] user_data what to look for in the XML
	*/
	static void
	remote_cache_refresh_helper(xmlNode result, void user_data)
	{
	const struct refresh_data *data = user_data;
	const char *remote = crm_element_value(result, data->field);
	const char *state = NULL;
	crm_node_t *node;

	CRM_CHECK(remote != NULL, return);

	/* Determine node's state, if the result has it */
	if (data->has_state) {
	state = remote_state_from_cib(result);
	}

	/* Check whether cache already has entry for node */
	node = g_hash_table_lookup(crm_remote_peer_cache, remote);

	if (node == NULL) {
	/* Node is not in cache, so add a new entry for it */
	node = pcmk__cluster_lookup_remote_node(remote);
	CRM_ASSERT(node);
	if (state) {
	pcmk__update_peer_state(__func__, node, state, 0);
	}

	} else if (pcmk_is_set(node->flags, crm_node_dirty)) {
	/* Node is in cache and hasn't been updated already, so mark it clean */
	clear_peer_flags(node, crm_node_dirty);
	if (state) {
	pcmk__update_peer_state(__func__, node, state, 0);
	}
	}
	}

	static void
	mark_dirty(gpointer key, gpointer value, gpointer user_data)
	{
	set_peer_flags((crm_node_t *) value, crm_node_dirty);
	}

	static gboolean
	is_dirty(gpointer key, gpointer value, gpointer user_data)
	{
	return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
	}

	/*!
	* \internal
	* \brief Repopulate the remote node cache based on CIB XML
	*
	* \param[in] cib CIB XML to parse
	*/
	static void
	refresh_remote_nodes(xmlNode *cib)
	{
	struct refresh_data data;

	pcmk__cluster_init_node_caches();

	/* First, we mark all existing cache entries as dirty,
	* so that later we can remove any that weren't in the CIB.
	* We don't empty the cache, because we need to detect changes in state.
	*/
	g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);

	/* Look for guest nodes and remote nodes in the status section */
	data.field = PCMK_XA_ID;
	data.has_state = TRUE;
	crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS,
	remote_cache_refresh_helper, &data);

	/* Look for guest nodes and remote nodes in the configuration section,
	* because they may have just been added and not have a status entry yet.
	* In that case, the cached node state will be left NULL, so that the
	* peer status callback isn't called until we're sure the node started
	* successfully.
	*/
	data.field = PCMK_XA_VALUE;
	data.has_state = FALSE;
	crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG,
	remote_cache_refresh_helper, &data);
	data.field = PCMK_XA_ID;
	data.has_state = FALSE;
	crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG,
	remote_cache_refresh_helper, &data);

	/* Remove all old cache entries that weren't seen in the CIB */
	g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
	}

	/*!
	* \internal
	* \brief Check whether a node is an active cluster node
	*
	* Remote nodes are never considered active. This guarantees that they can never
	* become DC.
	*
	* \param[in] node Node to check
	*
	* \return \c true if the node is an active cluster node, or \c false otherwise
	*/
	bool
	pcmk__cluster_is_node_active(const crm_node_t *node)
	{
	const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();

	if ((node == NULL) \|\| pcmk_is_set(node->flags, crm_remote_node)) {
	return false;
	}

	switch (cluster_layer) {
	case pcmk_cluster_layer_corosync:
	#if SUPPORT_COROSYNC
	return pcmk__corosync_is_peer_active(node);
	#else
	break;
	#endif // SUPPORT_COROSYNC
	default:
	break;
	}

	crm_err("Unhandled cluster layer: %s",
	pcmk_cluster_layer_text(cluster_layer));
	return false;
	}

	/*!
	* \internal
	* \brief Check if a node's entry should be removed from the cluster node cache
	*
	* A node should be removed from the cache if it's inactive and matches another
	* \c crm_node_t (the search object). The node is considered a mismatch if any
	* of the following are true:
	* * The search object is \c NULL.
	* * The search object has an ID set and the cached node's ID does not match it.
	* * The search object does not have an ID set, and the cached node's name does
	* not match the search node's name. (If both names are \c NULL, it's a
	* match.)
	*
	* Otherwise, the node is considered a match.
	*
	* Note that if the search object has both an ID and a name set, the name is
	* ignored for matching purposes.
	*
	* \param[in] key Ignored
	* \param[in] value \c crm_node_t object from cluster node cache
	* \param[in] user_data \c crm_node_t object to match against (search object)
	*
	* \return \c TRUE if the node entry should be removed from \c crm_peer_cache,
	* or \c FALSE otherwise
	*/
	static gboolean
	should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
	{
	crm_node_t *node = value;
	crm_node_t *search = user_data;

	if (search == NULL) {
	return FALSE;
	}
	if ((search->id != 0) && (node->id != search->id)) {
	return FALSE;
	}
	if ((search->id == 0)
	&& !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
	// @TODO Consider name even if ID is set?
	return FALSE;
	}
	if (pcmk__cluster_is_node_active(value)) {
	return FALSE;
	}

	crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership "
	"cache",
	pcmk__s(node->uname, "(unknown)"), node->id);
	return TRUE;
	}

	/*!
	* \internal
	* \brief Remove one or more inactive nodes from the cluster node cache
	*
	* All inactive nodes matching \p id and \p node_name as described in
	* \c should_forget_cluster_node documentation are removed from the cache.
	*
	* If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed
	* from the cache regardless of ID and name. This differs from clearing the
	* cache, in that entries for active nodes are preserved.
	*
	* \param[in] id ID of node to remove from cache (0 to ignore)
	* \param[in] node_name Name of node to remove from cache (ignored if \p id is
	* nonzero)
	*
	* \note \p node_name is not modified directly, but it will be freed if it's a
	* pointer into a cache entry that is removed.
	*/
	void
	pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
	{
	crm_node_t search = { 0, };
	char *criterion = NULL; // For logging
	guint matches = 0;

	if (crm_peer_cache == NULL) {
	crm_trace("Membership cache not initialized, ignoring removal request");
	return;
	}

	search.id = id;
	search.uname = pcmk__str_copy(node_name); // May log after original freed

	if (id > 0) {
	criterion = crm_strdup_printf(PCMK_XA_ID "=%" PRIu32, id);

	} else if (node_name != NULL) {
	criterion = crm_strdup_printf(PCMK_XA_UNAME "=%s", node_name);
	}

	matches = g_hash_table_foreach_remove(crm_peer_cache,
	should_forget_cluster_node, &search);
	if (matches > 0) {
	if (criterion != NULL) {
	crm_notice("Removed %u inactive node%s with %s from the membership "
	"cache",
	matches, pcmk__plural_s(matches), criterion);
	} else {
	crm_notice("Removed all (%u) inactive cluster nodes from the "
	"membership cache",
	matches);
	}

	} else {
	crm_info("No inactive cluster nodes%s%s to remove from the membership "
	"cache",
	((criterion != NULL)? " with " : ""), pcmk__s(criterion, ""));
	}

	free(search.uname);
	free(criterion);
	}

	static void
	count_peer(gpointer key, gpointer value, gpointer user_data)
	{
	unsigned int *count = user_data;
	crm_node_t *node = value;

	if (pcmk__cluster_is_node_active(node)) {
	count = count + 1;
	}
	}

	/*!
	* \internal
	* \brief Get the number of active cluster nodes that have been seen
	*
	* Remote nodes are never considered active. This guarantees that they can never
	* become DC.
	*
	* \return Number of active nodes in the cluster node cache
	*/
	unsigned int
	pcmk__cluster_num_active_nodes(void)
	{
	unsigned int count = 0;

	if (crm_peer_cache != NULL) {
	g_hash_table_foreach(crm_peer_cache, count_peer, &count);
	}
	return count;
	}

	static void
	destroy_crm_node(gpointer data)
	{
	crm_node_t *node = data;

	crm_trace("Destroying entry for node %u: %s", node->id, node->uname);

	free(node->uname);
	free(node->state);
	free(node->uuid);
	free(node->expected);
	free(node->conn_host);
	free(node);
	}

	/*!
	* \internal
	* \brief Initialize node caches
	*/
	void
	pcmk__cluster_init_node_caches(void)
	{
	if (crm_peer_cache == NULL) {
	crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node);
	}

	if (crm_remote_peer_cache == NULL) {
	crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
	}

	if (cluster_node_cib_cache == NULL) {
	cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
	}
	}

	/*!
	* \internal
	* \brief Initialize node caches
	*/
	void
	pcmk__cluster_destroy_node_caches(void)
	{
	if (crm_peer_cache != NULL) {
	crm_trace("Destroying peer cache with %d members",
	g_hash_table_size(crm_peer_cache));
	g_hash_table_destroy(crm_peer_cache);
	crm_peer_cache = NULL;
	}

	if (crm_remote_peer_cache != NULL) {
	crm_trace("Destroying remote peer cache with %d members",
	pcmk__cluster_num_remote_nodes());
	g_hash_table_destroy(crm_remote_peer_cache);
	crm_remote_peer_cache = NULL;
	}

	if (cluster_node_cib_cache != NULL) {
	crm_trace("Destroying configured cluster node cache with %d members",
	g_hash_table_size(cluster_node_cib_cache));
	g_hash_table_destroy(cluster_node_cib_cache);
	cluster_node_cib_cache = NULL;
	}
	}

	static void (peer_status_callback)(enum crm_status_type, crm_node_t ,
	const void *) = NULL;

	/*!
	* \internal
	* \brief Set a client function that will be called after peer status changes
	*
	* \param[in] dispatch Pointer to function to use as callback
	*
	* \note Client callbacks should do only client-specific handling. Callbacks
	* must not add or remove entries in the peer caches.
	*/
	void
	pcmk__cluster_set_status_callback(void (*dispatch)(enum crm_status_type,
	crm_node_t , const void ))
	{
	// @TODO Improve documentation of peer_status_callback
	peer_status_callback = dispatch;
	}

	/*!
	* \internal
	* \brief Tell the library whether to automatically reap lost nodes
	*
	* If \c true (the default), calling \c crm_update_peer_proc() will also update
	* the peer state to \c CRM_NODE_MEMBER or \c CRM_NODE_LOST, and updating the
	* peer state will reap peers whose state changes to anything other than
	* \c CRM_NODE_MEMBER.
	*
	* Callers should leave this enabled unless they plan to manage the cache
	* separately on their own.
	*
	* \param[in] enable \c true to enable automatic reaping, \c false to disable
	*/
	void
	pcmk__cluster_set_autoreap(bool enable)
	{
	autoreap = enable;
	}

	static void
	dump_peer_hash(int level, const char *caller)
	{
	GHashTableIter iter;
	const char *id = NULL;
	crm_node_t *node = NULL;

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, (gpointer ) &id, (gpointer ) &node)) {
	do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
	}
	}

	static gboolean
	hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
	{
	return value == user_data;
	}

	/*!
	* \internal
	* \brief Search cluster member node cache
	*
	* \param[in] id If not 0, cluster node ID to search for
	* \param[in] uname If not NULL, node name to search for
	* \param[in] uuid If not NULL while id is 0, node UUID instead of cluster
	* node ID to search for
	*
	* \return Cluster node cache entry if found, otherwise NULL
	*/
	static crm_node_t *
	search_cluster_member_cache(unsigned int id, const char *uname,
	const char *uuid)
	{
	GHashTableIter iter;
	crm_node_t *node = NULL;
	crm_node_t *by_id = NULL;
	crm_node_t *by_name = NULL;

	CRM_ASSERT(id > 0 \|\| uname != NULL);

	pcmk__cluster_init_node_caches();

	if (uname != NULL) {
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if(node->uname && strcasecmp(node->uname, uname) == 0) {
	crm_trace("Name match: %s = %p", node->uname, node);
	by_name = node;
	break;
	}
	}
	}

	if (id > 0) {
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if(node->id == id) {
	crm_trace("ID match: %u = %p", node->id, node);
	by_id = node;
	break;
	}
	}

	} else if (uuid != NULL) {
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
	crm_trace("UUID match: %s = %p", node->uuid, node);
	by_id = node;
	break;
	}
	}
	}

	node = by_id; /* Good default */
	if(by_id == by_name) {
	/* Nothing to do if they match (both NULL counts) */
	crm_trace("Consistent: %p for %u/%s", by_id, id, uname);

	} else if(by_id == NULL && by_name) {
	crm_trace("Only one: %p for %u/%s", by_name, id, uname);

	if(id && by_name->id) {
	dump_peer_hash(LOG_WARNING, __func__);
	crm_crit("Node %u and %u share the same name '%s'",
	id, by_name->id, uname);
	node = NULL; /* Create a new one */

	} else {
	node = by_name;
	}

	} else if(by_name == NULL && by_id) {
	crm_trace("Only one: %p for %u/%s", by_id, id, uname);

	if(uname && by_id->uname) {
	dump_peer_hash(LOG_WARNING, __func__);
	crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
	uname, by_id->uname, id, uname);
	}

	} else if(uname && by_id->uname) {
	if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
	crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
	g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);

	} else {
	crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
	dump_peer_hash(LOG_INFO, __func__);
	crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
	TRUE);
	}

	} else if(id && by_name->id) {
	crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);

	} else {
	/* Simple merge */

	/* Only corosync-based clusters use node IDs. The functions that call
	* pcmk__update_peer_state() and crm_update_peer_proc() only know
	* nodeid, so 'by_id' is authoritative when merging.
	*/
	dump_peer_hash(LOG_DEBUG, __func__);

	crm_info("Merging %p into %p", by_name, by_id);
	g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
	}

	return node;
	}

	/*!
	* \internal
	* \brief Search caches for a node (cluster or Pacemaker Remote)
	*
	* \param[in] id If not 0, cluster node ID to search for
	* \param[in] uname If not NULL, node name to search for
	* \param[in] flags Group of enum pcmk__node_search_flags
	*
	* \return Node cache entry if found, otherwise NULL
	*/
	crm_node_t *
	pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
	{
	crm_node_t *node = NULL;

	CRM_ASSERT(id > 0 \|\| uname != NULL);

	pcmk__cluster_init_node_caches();

	if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) {
	node = g_hash_table_lookup(crm_remote_peer_cache, uname);
	}

	if ((node == NULL)
	&& pcmk_is_set(flags, pcmk__node_search_cluster_member)) {

	node = search_cluster_member_cache(id, uname, NULL);
	}

	if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
	char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);

	node = find_cib_cluster_node(id_str, uname);
	free(id_str);
	}

	return node;
	}

	/*!
	* \internal
	* \brief Purge a node from cache (both cluster and Pacemaker Remote)
	*
	* \param[in] node_name If not NULL, purge only nodes with this name
	* \param[in] node_id If not 0, purge cluster nodes only if they have this ID
	*
	* \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged.
	* If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote
	* nodes that match \p node_name will be purged, and cluster nodes that
	* match both \p node_name and \p node_id will be purged.
	* \note The caller must be careful not to use \p node_name after calling this
	* function if it might be a pointer into a cache entry being removed.
	*/
	void
	pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
	{
	char *node_name_copy = NULL;

	if ((node_name == NULL) && (node_id == 0U)) {
	return;
	}

	// Purge from Pacemaker Remote node cache
	if ((node_name != NULL)
	&& (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) {
	/* node_name could be a pointer into the cache entry being purged,
	* so reassign it to a copy before the original gets freed
	*/
	node_name_copy = pcmk__str_copy(node_name);
	node_name = node_name_copy;

	crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
	g_hash_table_remove(crm_remote_peer_cache, node_name);
	}

	pcmk__cluster_forget_cluster_node(node_id, node_name);
	free(node_name_copy);
	}

	#if SUPPORT_COROSYNC
	static guint
	remove_conflicting_peer(crm_node_t *node)
	{
	int matches = 0;
	GHashTableIter iter;
	crm_node_t *existing_node = NULL;

	if (node->id == 0 \|\| node->uname == NULL) {
	return 0;
	}

	if (!pcmk__corosync_has_nodelist()) {
	return 0;
	}

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
	if (existing_node->id > 0
	&& existing_node->id != node->id
	&& existing_node->uname != NULL
	&& strcasecmp(existing_node->uname, node->uname) == 0) {

	if (pcmk__cluster_is_node_active(existing_node)) {
	continue;
	}

	crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
	existing_node->id, existing_node->uname, node->id);

	g_hash_table_iter_remove(&iter);
	matches++;
	}
	}

	return matches;
	}
	#endif

	/*!
	* \internal
	* \brief Get a cluster node cache entry, possibly creating one if not found
	*
	* If \c pcmk__node_search_cluster_member is set in \p flags, the return value
	* is guaranteed not to be \c NULL. A new cache entry is created if one does not
	* already exist.
	*
	* \param[in] id If not 0, cluster node ID to search for
	* \param[in] uname If not NULL, node name to search for
	* \param[in] uuid If not NULL while id is 0, node UUID instead of cluster
	* node ID to search for
	* \param[in] flags Group of enum pcmk__node_search_flags
	*
	* \return (Possibly newly created) cluster node cache entry
	*/
	/* coverity[-alloc] Memory is referenced in one or both hashtables */
	crm_node_t *
	pcmk__get_node(unsigned int id, const char uname, const char uuid,
	uint32_t flags)
	{
	crm_node_t *node = NULL;
	char *uname_lookup = NULL;

	CRM_ASSERT(id > 0 \|\| uname != NULL);

	pcmk__cluster_init_node_caches();

	// Check the Pacemaker Remote node cache first
	if (pcmk_is_set(flags, pcmk__node_search_remote)) {
	node = g_hash_table_lookup(crm_remote_peer_cache, uname);
	if (node != NULL) {
	return node;
	}
	}

	if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
	return NULL;
	}

	node = search_cluster_member_cache(id, uname, uuid);

	/* if uname wasn't provided, and find_peer did not turn up a uname based on id.
	* we need to do a lookup of the node name using the id in the cluster membership. */
	if ((node == NULL \|\| node->uname == NULL) && (uname == NULL)) {
	uname_lookup = pcmk__cluster_node_name(id);
	}

	if (uname_lookup) {
	uname = uname_lookup;
	crm_trace("Inferred a name of '%s' for node %u", uname, id);

	/* try to turn up the node one more time now that we know the uname. */
	if (node == NULL) {
	node = search_cluster_member_cache(id, uname, uuid);
	}
	}

	if (node == NULL) {
	char *uniqueid = crm_generate_uuid();

	node = pcmk__assert_alloc(1, sizeof(crm_node_t));

	crm_info("Created entry %s/%p for node %s/%u (%d total)",
	uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
	g_hash_table_replace(crm_peer_cache, uniqueid, node);
	}

	if(id > 0 && uname && (node->id == 0 \|\| node->uname == NULL)) {
	crm_info("Node %u is now known as %s", id, uname);
	}

	if(id > 0 && node->id == 0) {
	node->id = id;
	}

	if (uname && (node->uname == NULL)) {
	update_peer_uname(node, uname);
	}

	if(node->uuid == NULL) {
	if (uuid == NULL) {
	uuid = pcmk__cluster_node_uuid(node);
	}

	if (uuid) {
	crm_info("Node %u has uuid %s", id, uuid);

	} else {
	crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
	}
	}

	free(uname_lookup);

	return node;
	}

	/*!
	* \internal
	* \brief Update a node's uname
	*
	* \param[in,out] node Node object to update
	* \param[in] uname New name to set
	*
	* \note This function should not be called within a peer cache iteration,
	* because in some cases it can remove conflicting cache entries,
	* which would invalidate the iterator.
	*/
	static void
	update_peer_uname(crm_node_t node, const char uname)
	{
	CRM_CHECK(uname != NULL,
	crm_err("Bug: can't update node name without name"); return);
	CRM_CHECK(node != NULL,
	crm_err("Bug: can't update node name to %s without node", uname);
	return);

	if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
	crm_debug("Node uname '%s' did not change", uname);
	return;
	}

	for (const char c = uname; c; ++c) {
	if ((c >= 'A') && (c <= 'Z')) {
	crm_warn("Node names with capitals are discouraged, consider changing '%s'",
	uname);
	break;
	}
	}

	pcmk__str_update(&node->uname, uname);

	if (peer_status_callback != NULL) {
	peer_status_callback(crm_status_uname, node, NULL);
	}

	#if SUPPORT_COROSYNC
	if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)
	&& !pcmk_is_set(node->flags, crm_remote_node)) {

	remove_conflicting_peer(node);
	}
	#endif
	}

	/*!
	* \internal
	* \brief Get log-friendly string equivalent of a process flag
	*
	* \param[in] proc Process flag
	*
	* \return Log-friendly string equivalent of \p proc
	*/
	static inline const char *
	proc2text(enum crm_proc_flag proc)
	{
	const char *text = "unknown";

	switch (proc) {
	case crm_proc_none:
	text = "none";
	break;
	case crm_proc_cpg:
	text = "corosync-cpg";
	break;
	}
	return text;
	}

	/*!
	* \internal
	* \brief Update a node's process information (and potentially state)
	*
	* \param[in] source Caller's function name (for log messages)
	* \param[in,out] node Node object to update
	* \param[in] flag Bitmask of new process information
	* \param[in] status node status (online, offline, etc.)
	*
	* \return NULL if any node was reaped from peer caches, value of node otherwise
	*
	* \note If this function returns NULL, the supplied node object was likely
	* freed and should not be used again. This function should not be
	* called within a cache iteration if reaping is possible, otherwise
	* reaping could invalidate the iterator.
	*/
	crm_node_t *
	crm_update_peer_proc(const char source, crm_node_t node, uint32_t flag, const char *status)
	{
	uint32_t last = 0;
	gboolean changed = FALSE;

	CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
	source, proc2text(flag), status);
	return NULL);

	/* Pacemaker doesn't spawn processes on remote nodes */
	if (pcmk_is_set(node->flags, crm_remote_node)) {
	return node;
	}

	last = node->processes;
	if (status == NULL) {
	node->processes = flag;
	if (node->processes != last) {
	changed = TRUE;
	}

	} else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
	if ((node->processes & flag) != flag) {
	node->processes = pcmk__set_flags_as(__func__, __LINE__,
	LOG_TRACE, "Peer process",
	node->uname, node->processes,
	flag, "processes");
	changed = TRUE;
	}

	} else if (node->processes & flag) {
	node->processes = pcmk__clear_flags_as(__func__, __LINE__,
	LOG_TRACE, "Peer process",
	node->uname, node->processes,
	flag, "processes");
	changed = TRUE;
	}

	if (changed) {
	if (status == NULL && flag <= crm_proc_none) {
	crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
	node->id);
	} else {
	crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
	proc2text(flag), status);
	}

	if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
	node->when_online = time(NULL);

	} else {
	node->when_online = 0;
	}

	/* Call the client callback first, then update the peer state,
	* in case the node will be reaped
	*/
	if (peer_status_callback != NULL) {
	peer_status_callback(crm_status_processes, node, &last);
	}

	/* The client callback shouldn't touch the peer caches,
	* but as a safety net, bail if the peer cache was destroyed.
	*/
	if (crm_peer_cache == NULL) {
	return NULL;
	}

	if (autoreap) {
	const char *peer_state = NULL;

	if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
	peer_state = CRM_NODE_MEMBER;
	} else {
	peer_state = CRM_NODE_LOST;
	}
	node = pcmk__update_peer_state(__func__, node, peer_state, 0);
	}
	} else {
	crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
	proc2text(flag), status);
	}
	return node;
	}

	/*!
	* \internal
	* \brief Update a cluster node cache entry's expected join state
	*
	* \param[in] source Caller's function name (for logging)
	* \param[in,out] node Node to update
	* \param[in] expected Node's new join state
	*/
	void
	pcmk__update_peer_expected(const char source, crm_node_t node,
	const char *expected)
	{
	char *last = NULL;
	gboolean changed = FALSE;

	CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
	return);

	/* Remote nodes don't participate in joins */
	if (pcmk_is_set(node->flags, crm_remote_node)) {
	return;
	}

	last = node->expected;
	if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
	node->expected = strdup(expected);
	changed = TRUE;
	}

	if (changed) {
	crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
	expected, last);
	free(last);
	} else {
	crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
	node->id, expected);
	}
	}

	/*!
	* \internal
	* \brief Update a node's state and membership information
	*
	* \param[in] source Caller's function name (for log messages)
	* \param[in,out] node Node object to update
	* \param[in] state Node's new state
	* \param[in] membership Node's new membership ID
	* \param[in,out] iter If not NULL, pointer to node's peer cache iterator
	*
	* \return NULL if any node was reaped, value of node otherwise
	*
	* \note If this function returns NULL, the supplied node object was likely
	* freed and should not be used again. This function may be called from
	* within a peer cache iteration if the iterator is supplied.
	*/
	static crm_node_t *
	update_peer_state_iter(const char source, crm_node_t node, const char *state,
	uint64_t membership, GHashTableIter *iter)
	{
	gboolean is_member;

	CRM_CHECK(node != NULL,
	crm_err("Could not set state for unknown host to %s"
	QB_XS " source=%s", state, source);
	return NULL);

	is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
	if (is_member) {
	node->when_lost = 0;
	if (membership) {
	node->last_seen = membership;
	}
	}

	if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
	char *last = node->state;

	if (is_member) {
	node->when_member = time(NULL);

	} else {
	node->when_member = 0;
	}

	node->state = strdup(state);
	crm_notice("Node %s state is now %s " QB_XS
	" nodeid=%u previous=%s source=%s", node->uname, state,
	node->id, (last? last : "unknown"), source);
	if (peer_status_callback != NULL) {
	peer_status_callback(crm_status_nstate, node, last);
	}
	free(last);

	if (autoreap && !is_member
	&& !pcmk_is_set(node->flags, crm_remote_node)) {
	/* We only autoreap from the peer cache, not the remote peer cache,
	* because the latter should be managed only by
	* refresh_remote_nodes().
	*/
	if(iter) {
	crm_notice("Purged 1 peer with " PCMK_XA_ID
	"=%u and/or uname=%s from the membership cache",
	node->id, node->uname);
	g_hash_table_iter_remove(iter);

	} else {
	pcmk__cluster_forget_cluster_node(node->id, node->uname);
	}
	node = NULL;
	}

	} else {
	crm_trace("Node %s state is unchanged (%s) " QB_XS
	" nodeid=%u source=%s", node->uname, state, node->id, source);
	}
	return node;
	}

	/*!
	* \brief Update a node's state and membership information
	*
	* \param[in] source Caller's function name (for log messages)
	* \param[in,out] node Node object to update
	* \param[in] state Node's new state
	* \param[in] membership Node's new membership ID
	*
	* \return NULL if any node was reaped, value of node otherwise
	*
	* \note If this function returns NULL, the supplied node object was likely
	* freed and should not be used again. This function should not be
	* called within a cache iteration if reaping is possible,
	* otherwise reaping could invalidate the iterator.
	*/
	crm_node_t *
	pcmk__update_peer_state(const char source, crm_node_t node,
	const char *state, uint64_t membership)
	{
	return update_peer_state_iter(source, node, state, membership, NULL);
	}

	/*!
	* \internal
	* \brief Reap all nodes from cache whose membership information does not match
	*
	* \param[in] membership Membership ID of nodes to keep
	*/
	void
	pcmk__reap_unseen_nodes(uint64_t membership)
	{
	GHashTableIter iter;
	crm_node_t *node = NULL;

	crm_trace("Reaping unseen nodes...");
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
	if (node->last_seen != membership) {
	if (node->state) {
	/*
	* Calling update_peer_state_iter() allows us to
	* remove the node from crm_peer_cache without
	* invalidating our iterator
	*/
	update_peer_state_iter(__func__, node, CRM_NODE_LOST,
	membership, &iter);

	} else {
	crm_info("State of node %s[%u] is still unknown",
	node->uname, node->id);
	}
	}
	}
	}

	static crm_node_t *
	find_cib_cluster_node(const char id, const char uname)
	{
	GHashTableIter iter;
	crm_node_t *node = NULL;
	crm_node_t *by_id = NULL;
	crm_node_t *by_name = NULL;

	if (uname) {
	g_hash_table_iter_init(&iter, cluster_node_cib_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if (node->uname && strcasecmp(node->uname, uname) == 0) {
	crm_trace("Name match: %s = %p", node->uname, node);
	by_name = node;
	break;
	}
	}
	}

	if (id) {
	g_hash_table_iter_init(&iter, cluster_node_cib_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if(strcasecmp(node->uuid, id) == 0) {
	crm_trace("ID match: %s= %p", id, node);
	by_id = node;
	break;
	}
	}
	}

	node = by_id; /* Good default */
	if (by_id == by_name) {
	/* Nothing to do if they match (both NULL counts) */
	crm_trace("Consistent: %p for %s/%s", by_id, id, uname);

	} else if (by_id == NULL && by_name) {
	crm_trace("Only one: %p for %s/%s", by_name, id, uname);

	if (id) {
	node = NULL;

	} else {
	node = by_name;
	}

	} else if (by_name == NULL && by_id) {
	crm_trace("Only one: %p for %s/%s", by_id, id, uname);

	if (uname) {
	node = NULL;
	}

	} else if (uname && by_id->uname
	&& pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
	/* Multiple nodes have the same uname in the CIB.
	* Return by_id. */

	} else if (id && by_name->uuid
	&& pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
	/* Multiple nodes have the same id in the CIB.
	* Return by_name. */
	node = by_name;

	} else {
	node = NULL;
	}

	if (node == NULL) {
	crm_debug("Couldn't find node%s%s%s%s",
	id? " " : "",
	id? id : "",
	uname? " with name " : "",
	uname? uname : "");
	}

	return node;
	}

	static void
	cluster_node_cib_cache_refresh_helper(xmlNode xml_node, void user_data)
	{
	const char *id = crm_element_value(xml_node, PCMK_XA_ID);
	const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
	crm_node_t * node = NULL;

	CRM_CHECK(id != NULL && uname !=NULL, return);
	node = find_cib_cluster_node(id, uname);

	if (node == NULL) {
	char *uniqueid = crm_generate_uuid();

	node = pcmk__assert_alloc(1, sizeof(crm_node_t));

	node->uname = pcmk__str_copy(uname);
	node->uuid = pcmk__str_copy(id);

	g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);

	} else if (pcmk_is_set(node->flags, crm_node_dirty)) {
	pcmk__str_update(&node->uname, uname);

	/* Node is in cache and hasn't been updated already, so mark it clean */
	clear_peer_flags(node, crm_node_dirty);
	}

	}

	static void
	refresh_cluster_node_cib_cache(xmlNode *cib)
	{
	pcmk__cluster_init_node_caches();

	g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);

	crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG,
	cluster_node_cib_cache_refresh_helper, NULL);

	// Remove all old cache entries that weren't seen in the CIB
	g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
	}

	void
	pcmk__refresh_node_caches_from_cib(xmlNode *cib)
	{
	refresh_remote_nodes(cib);
	refresh_cluster_node_cib_cache(cib);
	}

	// Deprecated functions kept only for backward API compatibility
	// LCOV_EXCL_START

	#include <crm/cluster/compat.h>

	void
	crm_peer_init(void)
	{
	pcmk__cluster_init_node_caches();
	}

	// LCOV_EXCL_STOP
	// End deprecated API

File Metadata

Mime Type: text/x-diff
Expires: Sat, Nov 23, 4:39 PM (16 h, 56 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1018925
Default Alt Text: (146 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions