No OneTemporary
Actions

Size

89 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
	index 400061bd3e..7a6577f074 100644
	--- a/lib/cluster/cluster.c
	+++ b/lib/cluster/cluster.c
	@@ -1,550 +1,551 @@
	/*
	* Copyright 2004-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>
	#include <dlfcn.h>

	#include <inttypes.h> // PRIu32
	#include <stdbool.h>
	#include <stdio.h>
	#include <unistd.h>
	#include <string.h>
	#include <stdlib.h>
	#include <time.h>
	#include <sys/param.h>
	#include <sys/types.h>
	#include <sys/utsname.h> // uname()

	#include <glib.h> // gboolean

	#include <crm/crm.h>

	#include <crm/common/ipc.h>
	#include <crm/common/xml.h>
	#include <crm/cluster/internal.h>
	#include "crmcluster_private.h"

	CRM_TRACE_INIT_DATA(cluster);

	/*!
	* \internal
	* \brief Get the message type equivalent of a string
	*
	* \param[in] text String of message type
	*
	* \return Message type equivalent of \p text
	*/
	enum crm_ais_msg_types
	pcmk__cluster_parse_msg_type(const char *text)
	{
	CRM_CHECK(text != NULL, return crm_msg_none);

	text = pcmk__message_name(text);

	if (pcmk__str_eq(text, "ais", pcmk__str_none)) {
	return crm_msg_ais;
	}
	if (pcmk__str_eq(text, CRM_SYSTEM_CIB, pcmk__str_none)) {
	return crm_msg_cib;
	}
	if (pcmk__str_any_of(text, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL)) {
	return crm_msg_crmd;
	}
	if (pcmk__str_eq(text, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
	return crm_msg_te;
	}
	if (pcmk__str_eq(text, CRM_SYSTEM_PENGINE, pcmk__str_none)) {
	return crm_msg_pe;
	}
	if (pcmk__str_eq(text, CRM_SYSTEM_LRMD, pcmk__str_none)) {
	return crm_msg_lrmd;
	}
	if (pcmk__str_eq(text, CRM_SYSTEM_STONITHD, pcmk__str_none)) {
	return crm_msg_stonithd;
	}
	if (pcmk__str_eq(text, "stonith-ng", pcmk__str_none)) {
	return crm_msg_stonith_ng;
	}
	if (pcmk__str_eq(text, "attrd", pcmk__str_none)) {
	return crm_msg_attrd;
	}
	return crm_msg_none;
	}

	/*!
	* \internal
	* \brief Get a node's XML ID in the CIB, setting it if not already set
	*
	* \param[in,out] node Node to check
	*
	* \return CIB XML ID of \p node if known, otherwise \c NULL
	*/
	const char *
	pcmk__cluster_get_xml_id(crm_node_t *node)
	{
	const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();

	if (node == NULL) {
	return NULL;
	}
	if (node->uuid != NULL) {
	return node->uuid;
	}

	// xml_id is always set when a Pacemaker Remote node entry is created
	CRM_CHECK(!pcmk_is_set(node->flags, crm_remote_node), return NULL);

	switch (cluster_layer) {
	#if SUPPORT_COROSYNC
	case pcmk_cluster_layer_corosync:
	node->uuid = pcmk__corosync_uuid(node);
	return node->uuid;
	#endif // SUPPORT_COROSYNC

	default:
	crm_err("Unsupported cluster layer %s",
	pcmk_cluster_layer_text(cluster_layer));
	return NULL;
	}
	}

	/*!
	* \internal
	* \brief Connect to the cluster layer
	*
	* \param[in,out] cluster Initialized cluster object to connect
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk_cluster_connect(pcmk_cluster_t *cluster)
	{
	const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
	const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);

	// cts-lab looks for this message
	crm_notice("Connecting to %s cluster layer", cluster_layer_s);

	switch (cluster_layer) {
	#if SUPPORT_COROSYNC
	case pcmk_cluster_layer_corosync:
	return pcmk__corosync_connect(cluster);
	#endif // SUPPORT_COROSYNC

	default:
	break;
	}

	crm_err("Failed to connect to unsupported cluster layer %s",
	cluster_layer_s);
	return EPROTONOSUPPORT;
	}

	/*!
	* \brief Disconnect from the cluster layer
	*
	* \param[in,out] cluster Cluster object to disconnect
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk_cluster_disconnect(pcmk_cluster_t *cluster)
	{
	const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
	const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);

	crm_info("Disconnecting from %s cluster layer", cluster_layer_s);

	switch (cluster_layer) {
	#if SUPPORT_COROSYNC
	case pcmk_cluster_layer_corosync:
	pcmk__corosync_disconnect(cluster);
	pcmk__cluster_destroy_node_caches();
	return pcmk_rc_ok;
	#endif // SUPPORT_COROSYNC

	default:
	break;
	}

	crm_err("Failed to disconnect from unsupported cluster layer %s",
	cluster_layer_s);
	return EPROTONOSUPPORT;
	}

	/*!
	* \brief Allocate a new \p pcmk_cluster_t object
	*
	* \return A newly allocated \p pcmk_cluster_t object (guaranteed not \c NULL)
	* \note The caller is responsible for freeing the return value using
	* \p pcmk_cluster_free().
	*/
	pcmk_cluster_t *
	pcmk_cluster_new(void)
	{
	return (pcmk_cluster_t *) pcmk__assert_alloc(1, sizeof(pcmk_cluster_t));
	}

	/*!
	* \brief Free a \p pcmk_cluster_t object and its dynamically allocated members
	*
	* \param[in,out] cluster Cluster object to free
	*/
	void
	pcmk_cluster_free(pcmk_cluster_t *cluster)
	{
	if (cluster == NULL) {
	return;
	}
	free(cluster->uuid);
	free(cluster->uname);
	free(cluster);
	}

	/*!
	* \brief Set the destroy function for a cluster object
	*
	* \param[in,out] cluster Cluster object
	* \param[in] fn Destroy function to set
	*
	* \return Standard Pacemaker return code
	*/
	int
	pcmk_cluster_set_destroy_fn(pcmk_cluster_t cluster, void (fn)(gpointer))
	{
	if (cluster == NULL) {
	return EINVAL;
	}
	cluster->destroy = fn;
	return pcmk_rc_ok;
	}

	/*!
	* \internal
	* \brief Send an XML message via the cluster messaging layer
	*
	* \param[in] node Cluster node to send message to
	* \param[in] service Message type to use in message host info
	* \param[in] data XML message to send
	*
	* \return \c true on success, or \c false otherwise
	*/
	bool
	pcmk__cluster_send_message(const crm_node_t *node,
	enum crm_ais_msg_types service, const xmlNode *data)
	{
	// @TODO Return standard Pacemaker return code
	switch (pcmk_get_cluster_layer()) {
	#if SUPPORT_COROSYNC
	case pcmk_cluster_layer_corosync:
	return pcmk__cpg_send_xml(data, node, service);
	#endif // SUPPORT_COROSYNC

	default:
	break;
	}
	return false;
	}

	/*!
	* \internal
	* \brief Get the node name corresponding to a cluster-layer node ID
	*
	* Get the node name from the cluster layer if possible. Otherwise, if for the
	* local node, call \c uname() and get the \c nodename member from the
	* <tt>struct utsname</tt> object.
	*
	* \param[in] nodeid Node ID to check (or 0 for the local node)
	*
	* \return Node name corresponding to \p nodeid
	*
	* \note This will fatally exit if \c uname() fails to get the local node name
	* or we run out of memory.
	* \note The caller is responsible for freeing the return value using \c free().
	*/
	char *
	pcmk__cluster_node_name(uint32_t nodeid)
	{
	char *name = NULL;
	const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
	const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);

	switch (cluster_layer) {
	#if SUPPORT_COROSYNC
	case pcmk_cluster_layer_corosync:
	name = pcmk__corosync_name(0, nodeid);
	if (name != NULL) {
	return name;
	}
	break;
	#endif // SUPPORT_COROSYNC

	default:
	crm_err("Unsupported cluster layer: %s", cluster_layer_s);
	break;
	}

	if (nodeid == 0) {
	struct utsname hostinfo;

	crm_notice("Could not get local node name from %s cluster layer, "
	"defaulting to local hostname",
	cluster_layer_s);

	if (uname(&hostinfo) < 0) {
	// @TODO Maybe let the caller decide what to do
	crm_err("Failed to get the local hostname");
	crm_exit(CRM_EX_FATAL);
	}
	return pcmk__str_copy(hostinfo.nodename);
	}

	crm_notice("Could not obtain a node name for node with "
	PCMK_XA_ID "=" PRIu32,
	nodeid);
	return NULL;
	}

	/*!
	* \internal
	* \brief Get the local node's cluster-layer node name
	*
	* If getting the node name from the cluster layer is impossible, call
	* \c uname() and get the \c nodename member from the <tt>struct utsname</tt>
	* object.
	*
	* \return Local node's name
	*
	* \note This will fatally exit if \c uname() fails to get the local node name
	* or we run out of memory.
	*/
	const char *
	pcmk__cluster_local_node_name(void)
	{
	// @TODO Refactor to avoid trivially leaking name at exit
	static char *name = NULL;

	if (name == NULL) {
	name = pcmk__cluster_node_name(0);
	}
	return name;
	}

	/*!
	* \internal
	* \brief Get the node name corresonding to a node UUID
	*
	* Look for the UUID in both the remote node cache and the cluster member cache.
	*
	* \param[in] uuid UUID to search for
	*
	* \return Node name corresponding to \p uuid if found, or \c NULL otherwise
	*/
	const char *
	pcmk__node_name_from_uuid(const char *uuid)
	{
	/* @TODO There are too many functions in libcrmcluster that look up a node
	* from the node caches (possibly creating a cache entry if none exists).
	* There are at least the following:
	* * pcmk__cluster_lookup_remote_node()
	* * pcmk__get_node()
	* * pcmk__node_name_from_uuid()
	* * pcmk__search_node_caches()
	*
	* There's a lot of duplication among them, but they all do slightly
	* different things. We should try to clean them up and consolidate them to
	* the extent possible, likely with new helper functions.
	*/
	GHashTableIter iter;
	crm_node_t *node = NULL;

	CRM_CHECK(uuid != NULL, return NULL);

	// Remote nodes have the same uname and uuid
	if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
	return uuid;
	}

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	- if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
	+ if (pcmk__str_eq(uuid, pcmk__cluster_get_xml_id(node),
	+ pcmk__str_none)) {
	return node->uname;
	}
	}
	return NULL;
	}

	/*!
	* \brief Get a log-friendly string equivalent of a cluster layer
	*
	* \param[in] layer Cluster layer
	*
	* \return Log-friendly string corresponding to \p layer
	*/
	const char *
	pcmk_cluster_layer_text(enum pcmk_cluster_layer layer)
	{
	switch (layer) {
	case pcmk_cluster_layer_corosync:
	return "corosync";
	case pcmk_cluster_layer_unknown:
	return "unknown";
	case pcmk_cluster_layer_invalid:
	return "invalid";
	default:
	crm_err("Invalid cluster layer: %d", layer);
	return "invalid";
	}
	}

	/*!
	* \brief Get and validate the local cluster layer
	*
	* If a cluster layer is not configured via the \c PCMK__ENV_CLUSTER_TYPE local
	* option, this will try to detect an active cluster from among the supported
	* cluster layers.
	*
	* \return Local cluster layer
	*
	* \note This will fatally exit if the configured cluster layer is invalid.
	*/
	enum pcmk_cluster_layer
	pcmk_get_cluster_layer(void)
	{
	static enum pcmk_cluster_layer cluster_layer = pcmk_cluster_layer_unknown;
	const char *cluster = NULL;

	// Cluster layer is stable once set
	if (cluster_layer != pcmk_cluster_layer_unknown) {
	return cluster_layer;
	}

	cluster = pcmk__env_option(PCMK__ENV_CLUSTER_TYPE);

	if (cluster != NULL) {
	crm_info("Verifying configured cluster layer '%s'", cluster);
	cluster_layer = pcmk_cluster_layer_invalid;

	#if SUPPORT_COROSYNC
	if (pcmk__str_eq(cluster, PCMK_VALUE_COROSYNC, pcmk__str_casei)) {
	cluster_layer = pcmk_cluster_layer_corosync;
	}
	#endif // SUPPORT_COROSYNC

	if (cluster_layer == pcmk_cluster_layer_invalid) {
	crm_notice("This installation does not support the '%s' cluster "
	"infrastructure: terminating",
	cluster);
	crm_exit(CRM_EX_FATAL);
	}
	crm_info("Assuming an active '%s' cluster", cluster);

	} else {
	// Nothing configured, so test supported cluster layers
	#if SUPPORT_COROSYNC
	crm_debug("Testing with Corosync");
	if (pcmk__corosync_is_active()) {
	cluster_layer = pcmk_cluster_layer_corosync;
	}
	#endif // SUPPORT_COROSYNC

	if (cluster_layer == pcmk_cluster_layer_unknown) {
	crm_notice("Could not determine the current cluster layer");
	} else {
	crm_info("Detected an active '%s' cluster",
	pcmk_cluster_layer_text(cluster_layer));
	}
	}

	return cluster_layer;
	}

	// Deprecated functions kept only for backward API compatibility
	// LCOV_EXCL_START

	#include <crm/cluster/compat.h>

	void
	set_uuid(xmlNode xml, const char attr, crm_node_t *node)
	{
	crm_xml_add(xml, attr, pcmk__cluster_get_xml_id(node));
	}

	gboolean
	crm_cluster_connect(pcmk_cluster_t *cluster)
	{
	return pcmk_cluster_connect(cluster) == pcmk_rc_ok;
	}

	void
	crm_cluster_disconnect(pcmk_cluster_t *cluster)
	{
	pcmk_cluster_disconnect(cluster);
	}

	const char *
	name_for_cluster_type(enum cluster_type_e type)
	{
	switch (type) {
	case pcmk_cluster_corosync:
	return "corosync";
	case pcmk_cluster_unknown:
	return "unknown";
	case pcmk_cluster_invalid:
	return "invalid";
	}
	crm_err("Invalid cluster type: %d", type);
	return "invalid";
	}

	enum cluster_type_e
	get_cluster_type(void)
	{
	return (enum cluster_type_e) pcmk_get_cluster_layer();
	}

	gboolean
	is_corosync_cluster(void)
	{
	return pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync;
	}

	gboolean
	send_cluster_message(const crm_node_t *node, enum crm_ais_msg_types service,
	const xmlNode *data, gboolean ordered)
	{
	return pcmk__cluster_send_message(node, service, data);
	}

	const char *
	crm_peer_uuid(crm_node_t *peer)
	{
	return pcmk__cluster_get_xml_id(peer);
	}

	char *
	get_node_name(uint32_t nodeid)
	{
	return pcmk__cluster_node_name(nodeid);
	}

	const char *
	get_local_node_name(void)
	{
	return pcmk__cluster_local_node_name();
	}

	const char *
	crm_peer_uname(const char *uuid)
	{
	return pcmk__node_name_from_uuid(uuid);
	}

	// LCOV_EXCL_STOP
	// End deprecated API
	diff --git a/lib/cluster/election.c b/lib/cluster/election.c
	index 98cd716826..c3e4cd126c 100644
	--- a/lib/cluster/election.c
	+++ b/lib/cluster/election.c
	@@ -1,726 +1,727 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/time.h>
	#include <sys/resource.h>

	#include <crm/common/xml.h>

	#include <crm/common/mainloop.h>
	#include <crm/cluster/internal.h>
	#include <crm/cluster/election_internal.h>
	#include <crm/crm.h>

	#define STORM_INTERVAL 2 /* in seconds */

	struct election_s {
	enum election_result state;
	guint count; // How many times local node has voted
	char *name; // Descriptive name for this election
	char *uname; // Local node's name
	GSourceFunc cb; // Function to call if election is won
	GHashTable *voted; // Key = node name, value = how node voted
	mainloop_timer_t *timeout; // When to abort if all votes not received
	int election_wins; // Track wins, for storm detection
	bool wrote_blackbox; // Write a storm blackbox at most once
	time_t expires; // When storm detection period ends
	time_t last_election_loss; // When dampening period ends
	};

	static void
	election_complete(election_t *e)
	{
	e->state = election_won;
	if (e->cb != NULL) {
	e->cb(e);
	}
	election_reset(e);
	}

	static gboolean
	election_timer_cb(gpointer user_data)
	{
	election_t *e = user_data;

	crm_info("%s timed out, declaring local node as winner", e->name);
	election_complete(e);
	return FALSE;
	}

	/*!
	* \brief Get current state of an election
	*
	* \param[in] e Election object
	*
	* \return Current state of \e
	*/
	enum election_result
	election_state(const election_t *e)
	{
	return (e == NULL)? election_error : e->state;
	}

	/*!
	* \brief Create a new election object
	*
	* Every node that wishes to participate in an election must create an election
	* object. Typically, this should be done once, at start-up. A caller should
	* only create a single election object.
	*
	* \param[in] name Label for election (for logging)
	* \param[in] uname Local node's name
	* \param[in] period_ms How long to wait for all peers to vote
	* \param[in] cb Function to call if local node wins election
	*
	* \return Newly allocated election object on success, NULL on error
	* \note The caller is responsible for freeing the returned value using
	* election_fini().
	*/
	election_t *
	election_init(const char name, const char uname, guint period_ms, GSourceFunc cb)
	{
	election_t *e = NULL;

	static guint count = 0;

	CRM_CHECK(uname != NULL, return NULL);

	e = calloc(1, sizeof(election_t));
	if (e == NULL) {
	crm_perror(LOG_CRIT, "Cannot create election");
	return NULL;
	}

	e->uname = strdup(uname);
	if (e->uname == NULL) {
	crm_perror(LOG_CRIT, "Cannot create election");
	free(e);
	return NULL;
	}

	e->name = name? crm_strdup_printf("election-%s", name)
	: crm_strdup_printf("election-%u", count++);
	e->cb = cb;
	e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
	election_timer_cb, e);
	crm_trace("Created %s", e->name);
	return e;
	}

	/*!
	* \brief Disregard any previous vote by specified peer
	*
	* This discards any recorded vote from a specified peer. Election users should
	* call this whenever a voting peer becomes inactive.
	*
	* \param[in,out] e Election object
	* \param[in] uname Name of peer to disregard
	*/
	void
	election_remove(election_t e, const char uname)
	{
	if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) {
	crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
	g_hash_table_remove(e->voted, uname);
	}
	}

	/*!
	* \brief Stop election timer and disregard all votes
	*
	* \param[in,out] e Election object
	*/
	void
	election_reset(election_t *e)
	{
	if (e != NULL) {
	crm_trace("Resetting election %s", e->name);
	mainloop_timer_stop(e->timeout);
	if (e->voted) {
	crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
	g_hash_table_destroy(e->voted);
	e->voted = NULL;
	}
	}
	}

	/*!
	* \brief Free an election object
	*
	* Free all memory associated with an election object, stopping its
	* election timer (if running).
	*
	* \param[in,out] e Election object
	*/
	void
	election_fini(election_t *e)
	{
	if (e != NULL) {
	election_reset(e);
	crm_trace("Destroying %s", e->name);
	mainloop_timer_del(e->timeout);
	free(e->uname);
	free(e->name);
	free(e);
	}
	}

	static void
	election_timeout_start(election_t *e)
	{
	if (e != NULL) {
	mainloop_timer_start(e->timeout);
	}
	}

	/*!
	* \brief Stop an election's timer, if running
	*
	* \param[in,out] e Election object
	*/
	void
	election_timeout_stop(election_t *e)
	{
	if (e != NULL) {
	mainloop_timer_stop(e->timeout);
	}
	}

	/*!
	* \brief Change an election's timeout (restarting timer if running)
	*
	* \param[in,out] e Election object
	* \param[in] period New timeout
	*/
	void
	election_timeout_set_period(election_t *e, guint period)
	{
	if (e != NULL) {
	mainloop_timer_set_period(e->timeout, period);
	} else {
	crm_err("No election defined");
	}
	}

	static int
	get_uptime(struct timeval *output)
	{
	static time_t expires = 0;
	static struct rusage info;

	time_t tm_now = time(NULL);

	if (expires < tm_now) {
	int rc = 0;

	info.ru_utime.tv_sec = 0;
	info.ru_utime.tv_usec = 0;
	rc = getrusage(RUSAGE_SELF, &info);

	output->tv_sec = 0;
	output->tv_usec = 0;

	if (rc < 0) {
	crm_perror(LOG_ERR, "Could not calculate the current uptime");
	expires = 0;
	return -1;
	}

	crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
	(long)info.ru_utime.tv_usec);
	}

	expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */
	output->tv_sec = info.ru_utime.tv_sec;
	output->tv_usec = info.ru_utime.tv_usec;

	return 1;
	}

	static int
	compare_age(struct timeval your_age)
	{
	struct timeval our_age;

	get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */

	if (our_age.tv_sec > your_age.tv_sec) {
	crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
	return 1;
	} else if (our_age.tv_sec < your_age.tv_sec) {
	crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
	return -1;
	} else if (our_age.tv_usec > your_age.tv_usec) {
	crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
	(long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
	return 1;
	} else if (our_age.tv_usec < your_age.tv_usec) {
	crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
	(long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
	return -1;
	}

	return 0;
	}

	/*!
	* \brief Start a new election by offering local node's candidacy
	*
	* Broadcast a "vote" election message containing the local node's ID,
	* (incremented) election counter, and uptime, and start the election timer.
	*
	* \param[in,out] e Election object
	*
	* \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
	* all active peers do so, or if the election times out, the local node
	* wins the election. (If we lose to any peer vote, we will stop the
	* timer, so a timeout means we did not lose -- either some peer did not
	* vote, or we did not call election_check() in time.)
	*/
	void
	election_vote(election_t *e)
	{
	struct timeval age;
	xmlNode *vote = NULL;
	crm_node_t *our_node;

	if (e == NULL) {
	crm_trace("Election vote requested, but no election available");
	return;
	}

	our_node = pcmk__get_node(0, e->uname, NULL,
	pcmk__node_search_cluster_member);
	if (!pcmk__cluster_is_node_active(our_node)) {
	crm_trace("Cannot vote in %s yet: local node not connected to cluster",
	e->name);
	return;
	}

	election_reset(e);
	e->state = election_in_progress;
	vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

	e->count++;
	- crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->uuid);
	+ crm_xml_add(vote, PCMK__XA_ELECTION_OWNER,
	+ pcmk__cluster_get_xml_id(our_node));
	crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, e->count);

	// Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds
	get_uptime(&age);
	crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC,
	PCMK__XA_ELECTION_AGE_NANO_SEC, &age);

	pcmk__cluster_send_message(NULL, crm_msg_crmd, vote);
	free_xml(vote);

	crm_debug("Started %s round %d", e->name, e->count);
	election_timeout_start(e);
	return;
	}

	/*!
	* \brief Check whether local node has won an election
	*
	* If all known peers have sent no-vote messages, stop the election timer, set
	* the election state to won, and call any registered win callback.
	*
	* \param[in,out] e Election object
	*
	* \return TRUE if local node has won, FALSE otherwise
	* \note If all known peers have sent no-vote messages, but the election owner
	* does not call this function, the election will not be won (and the
	* callback will not be called) until the election times out.
	* \note This should be called when election_count_vote() returns
	* \c election_in_progress.
	*/
	bool
	election_check(election_t *e)
	{
	int voted_size = 0;
	int num_members = 0;

	if (e == NULL) {
	crm_trace("Election check requested, but no election available");
	return FALSE;
	}
	if (e->voted == NULL) {
	crm_trace("%s check requested, but no votes received yet", e->name);
	return FALSE;
	}

	voted_size = g_hash_table_size(e->voted);
	num_members = pcmk__cluster_num_active_nodes();

	/* in the case of #voted > #members, it is better to
	* wait for the timeout and give the cluster time to
	* stabilize
	*/
	if (voted_size >= num_members) {
	/* we won and everyone has voted */
	election_timeout_stop(e);
	if (voted_size > num_members) {
	GHashTableIter gIter;
	const crm_node_t *node;
	char *key = NULL;

	crm_warn("Received too many votes in %s", e->name);
	g_hash_table_iter_init(&gIter, crm_peer_cache);
	while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
	if (pcmk__cluster_is_node_active(node)) {
	crm_warn("* expected vote: %s", node->uname);
	}
	}

	g_hash_table_iter_init(&gIter, e->voted);
	while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
	crm_warn("* actual vote: %s", key);
	}

	}

	crm_info("%s won by local node", e->name);
	election_complete(e);
	return TRUE;

	} else {
	crm_debug("%s still waiting on %d of %d votes",
	e->name, num_members - voted_size, num_members);
	}

	return FALSE;
	}

	#define LOSS_DAMPEN 2 /* in seconds */

	struct vote {
	const char *op;
	const char *from;
	const char *version;
	const char *election_owner;
	int election_id;
	struct timeval age;
	};

	/*!
	* \brief Unpack an election message
	*
	* \param[in] e Election object (for logging only)
	* \param[in] message Election message XML
	* \param[out] vote Parsed fields from message
	*
	* \return TRUE if election message and election are valid, FALSE otherwise
	* \note The parsed struct's pointer members are valid only for the lifetime of
	* the message argument.
	*/
	static bool
	parse_election_message(const election_t e, const xmlNode message,
	struct vote *vote)
	{
	CRM_CHECK(message && vote, return FALSE);

	vote->election_id = -1;
	vote->age.tv_sec = -1;
	vote->age.tv_usec = -1;

	vote->op = crm_element_value(message, PCMK__XA_CRM_TASK);
	vote->from = crm_element_value(message, PCMK__XA_SRC);
	vote->version = crm_element_value(message, PCMK_XA_VERSION);
	vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER);

	crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id));

	if ((vote->op == NULL) \|\| (vote->from == NULL) \|\| (vote->version == NULL)
	\|\| (vote->election_owner == NULL) \|\| (vote->election_id < 0)) {

	crm_warn("Invalid %s message from %s in %s ",
	(vote->op? vote->op : "election"),
	(vote->from? vote->from : "unspecified node"),
	(e? e->name : "election"));
	return FALSE;
	}

	// Op-specific validation

	if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
	/* Only vote ops have uptime.
	Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds.
	*/
	crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC,
	PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age));
	if ((vote->age.tv_sec < 0) \|\| (vote->age.tv_usec < 0)) {
	crm_warn("Cannot count %s %s from %s because it is missing uptime",
	(e? e->name : "election"), vote->op, vote->from);
	return FALSE;
	}

	} else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
	crm_info("Cannot process %s message from %s because %s is not a known election op",
	(e? e->name : "election"), vote->from, vote->op);
	return FALSE;
	}

	// Election validation

	if (e == NULL) {
	crm_info("Cannot count %s from %s because no election available",
	vote->op, vote->from);
	return FALSE;
	}

	/* If the membership cache is NULL, we REALLY shouldn't be voting --
	* the question is how we managed to get here.
	*/
	if (crm_peer_cache == NULL) {
	crm_info("Cannot count %s %s from %s because no peer information available",
	e->name, vote->op, vote->from);
	return FALSE;
	}
	return TRUE;
	}

	static void
	record_vote(election_t e, struct vote vote)
	{
	pcmk__assert(e && vote && vote->from && vote->op);

	if (e->voted == NULL) {
	e->voted = pcmk__strkey_table(free, free);
	}
	pcmk__insert_dup(e->voted, vote->from, vote->op);
	}

	static void
	send_no_vote(crm_node_t peer, struct vote vote)
	{
	// @TODO probably shouldn't hardcode CRM_SYSTEM_CRMD and crm_msg_crmd

	xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
	CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

	crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner);
	crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id);

	pcmk__cluster_send_message(peer, crm_msg_crmd, novote);
	free_xml(novote);
	}

	/*!
	* \brief Process an election message (vote or no-vote) from a peer
	*
	* \param[in,out] e Election object
	* \param[in] message Election message XML from peer
	* \param[in] can_win Whether local node is eligible to win
	*
	* \return Election state after new vote is considered
	* \note If the peer message is a vote, and we prefer the peer to win, this will
	* send a no-vote reply to the peer.
	* \note The situations "we lost to this vote" from "this is a late no-vote
	* after we've already lost" both return election_lost. If a caller needs
	* to distinguish them, it should save the current state before calling
	* this function, and then compare the result.
	*/
	enum election_result
	election_count_vote(election_t e, const xmlNode message, bool can_win)
	{
	int log_level = LOG_INFO;
	gboolean done = FALSE;
	gboolean we_lose = FALSE;
	const char *reason = "unknown";
	bool we_are_owner = FALSE;
	crm_node_t our_node = NULL, your_node = NULL;
	time_t tm_now = time(NULL);
	struct vote vote;

	CRM_CHECK(message != NULL, return election_error);
	if (parse_election_message(e, message, &vote) == FALSE) {
	return election_error;
	}

	your_node = pcmk__get_node(0, vote.from, NULL,
	pcmk__node_search_cluster_member);
	our_node = pcmk__get_node(0, e->uname, NULL,
	pcmk__node_search_cluster_member);
	we_are_owner = (our_node != NULL)
	- && pcmk__str_eq(our_node->uuid, vote.election_owner,
	- pcmk__str_none);
	+ && pcmk__str_eq(pcmk__cluster_get_xml_id(our_node),
	+ vote.election_owner, pcmk__str_none);

	if (!can_win) {
	reason = "Not eligible";
	we_lose = TRUE;

	} else if (!pcmk__cluster_is_node_active(our_node)) {
	reason = "We are not part of the cluster";
	log_level = LOG_ERR;
	we_lose = TRUE;

	} else if (we_are_owner && (vote.election_id != e->count)) {
	log_level = LOG_TRACE;
	reason = "Superseded";
	done = TRUE;

	} else if (!pcmk__cluster_is_node_active(your_node)) {
	/* Possibly we cached the message in the FSA queue at a point that it wasn't */
	reason = "Peer is not part of our cluster";
	log_level = LOG_WARNING;
	done = TRUE;

	} else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
	\|\| pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
	/* Receiving our own broadcast vote, or a no-vote from peer, is a vote
	* for us to win
	*/
	if (!we_are_owner) {
	crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
	e->name, vote.election_id, vote.op, vote.from,
	vote.election_owner);
	return election_error;
	}
	if (e->state != election_in_progress) {
	// Should only happen if we already lost
	crm_debug("Not counting %s round %d %s from %s because no election in progress",
	e->name, vote.election_id, vote.op, vote.from);
	return e->state;
	}
	record_vote(e, &vote);
	reason = "Recorded";
	done = TRUE;

	} else {
	// A peer vote requires a comparison to determine which node is better
	int age_result = compare_age(vote.age);
	int version_result = compare_version(vote.version, CRM_FEATURE_SET);

	if (version_result < 0) {
	reason = "Version";
	we_lose = TRUE;

	} else if (version_result > 0) {
	reason = "Version";

	} else if (age_result < 0) {
	reason = "Uptime";
	we_lose = TRUE;

	} else if (age_result > 0) {
	reason = "Uptime";

	} else if (strcasecmp(e->uname, vote.from) > 0) {
	reason = "Host name";
	we_lose = TRUE;

	} else {
	reason = "Host name";
	}
	}

	if (e->expires < tm_now) {
	e->election_wins = 0;
	e->expires = tm_now + STORM_INTERVAL;

	} else if (done == FALSE && we_lose == FALSE) {
	int peers = 1 + g_hash_table_size(crm_peer_cache);

	/* If every node has to vote down every other node, thats N*(N-1) total elections
	* Allow some leeway before _really_ complaining
	*/
	e->election_wins++;
	if (e->election_wins > (peers * peers)) {
	crm_warn("%s election storm detected: %d wins in %d seconds",
	e->name, e->election_wins, STORM_INTERVAL);
	e->election_wins = 0;
	e->expires = tm_now + STORM_INTERVAL;
	if (e->wrote_blackbox == FALSE) {
	/* It's questionable whether a black box (from every node in the
	* cluster) would be truly helpful in diagnosing an election
	* storm. It's also highly doubtful a production environment
	* would get multiple election storms from distinct causes, so
	* saving one blackbox per process lifetime should be
	* sufficient. Alternatives would be to save a timestamp of the
	* last blackbox write instead of a boolean, and write a new one
	* if some amount of time has passed; or to save a storm count,
	* write a blackbox on every Nth occurrence.
	*/
	crm_write_blackbox(0, NULL);
	e->wrote_blackbox = TRUE;
	}
	}
	}

	if (done) {
	do_crm_log(log_level + 1,
	"Processed %s round %d %s (current round %d) from %s (%s)",
	e->name, vote.election_id, vote.op, e->count, vote.from,
	reason);
	return e->state;

	} else if (we_lose == FALSE) {
	/* We track the time of the last election loss to implement an election
	* dampening period, reducing the likelihood of an election storm. If
	* this node has lost within the dampening period, don't start a new
	* election, even if we win against a peer's vote -- the peer we lost to
	* should win again.
	*
	* @TODO This has a problem case: if an election winner immediately
	* leaves the cluster, and a new election is immediately called, all
	* nodes could lose, with no new winner elected. The ideal solution
	* would be to tie the election structure with the peer caches, which
	* would allow us to clear the dampening when the previous winner
	* leaves (and would allow other improvements as well).
	*/
	if ((e->last_election_loss == 0)
	\|\| ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {

	do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
	e->name, vote.election_id, vote.election_owner, vote.op,
	vote.from, reason);

	e->last_election_loss = 0;
	election_timeout_stop(e);

	/* Start a new election by voting down this, and other, peers */
	e->state = election_start;
	return e->state;
	} else {
	char *loss_time = ctime(&e->last_election_loss);

	if (loss_time) {
	// Show only HH:MM:SS
	loss_time += 11;
	loss_time[8] = '\0';
	}
	crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
	e->name, vote.election_id, vote.election_owner, vote.from,
	LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
	}
	}

	e->last_election_loss = tm_now;

	do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
	e->name, vote.election_id, vote.election_owner, vote.op,
	vote.from, reason);

	election_reset(e);
	send_no_vote(your_node, &vote);
	e->state = election_lost;
	return e->state;
	}

	/*!
	* \brief Reset any election dampening currently in effect
	*
	* \param[in,out] e Election object to clear
	*/
	void
	election_clear_dampening(election_t *e)
	{
	e->last_election_loss = 0;
	}
	diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
	index d10d127c21..3195f374a5 100644
	--- a/lib/cluster/membership.c
	+++ b/lib/cluster/membership.c
	@@ -1,1581 +1,1585 @@
	/*
	* Copyright 2004-2024 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#ifndef _GNU_SOURCE
	# define _GNU_SOURCE
	#endif

	#include <inttypes.h> // PRIu32
	#include <sys/param.h>
	#include <sys/types.h>
	#include <stdio.h>
	#include <unistd.h>
	#include <string.h>
	#include <glib.h>
	#include <crm/common/ipc.h>
	#include <crm/common/xml_internal.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/xml.h>
	#include <crm/stonith-ng.h>
	#include "crmcluster_private.h"

	/* The peer cache remembers cluster nodes that have been seen.
	* This is managed mostly automatically by libcluster, based on
	* cluster membership events.
	*
	* Because cluster nodes can have conflicting names or UUIDs,
	* the hash table key is a uniquely generated ID.
	*
	* @COMPAT When this is internal, rename to cluster_node_member_cache and make
	* static.
	*/
	GHashTable *crm_peer_cache = NULL;

	/*
	* The remote peer cache tracks pacemaker_remote nodes. While the
	* value has the same type as the peer cache's, it is tracked separately for
	* three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
	* so the name (which is also the UUID) is used as the hash table key; there
	* is no equivalent of membership events, so management is not automatic; and
	* most users of the peer cache need to exclude pacemaker_remote nodes.
	*
	* That said, using a single cache would be more logical and less error-prone,
	* so it would be a good idea to merge them one day.
	*
	* libcluster provides two avenues for populating the cache:
	* pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node()
	* directly manage it, while refresh_remote_nodes() populates it via the CIB.
	*/
	GHashTable *crm_remote_peer_cache = NULL;

	/*
	* The CIB cluster node cache tracks cluster nodes that have been seen in
	* the CIB. It is useful mainly when a caller needs to know about a node that
	* may no longer be in the membership, but doesn't want to add the node to the
	* main peer cache tables.
	*/
	static GHashTable *cluster_node_cib_cache = NULL;

	unsigned long long crm_peer_seq = 0;
	gboolean crm_have_quorum = FALSE;
	static bool autoreap = true;

	// Flag setting and clearing for crm_node_t:flags

	#define set_peer_flags(peer, flags_to_set) do { \
	(peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
	"Peer", (peer)->uname, \
	(peer)->flags, (flags_to_set), \
	#flags_to_set); \
	} while (0)

	#define clear_peer_flags(peer, flags_to_clear) do { \
	(peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
	LOG_TRACE, \
	"Peer", (peer)->uname, \
	(peer)->flags, (flags_to_clear), \
	#flags_to_clear); \
	} while (0)

	static void update_peer_uname(crm_node_t node, const char uname);
	static crm_node_t find_cib_cluster_node(const char id, const char *uname);

	/*!
	* \internal
	* \brief Get the number of Pacemaker Remote nodes that have been seen
	*
	* \return Number of cached Pacemaker Remote nodes
	*/
	unsigned int
	pcmk__cluster_num_remote_nodes(void)
	{
	if (crm_remote_peer_cache == NULL) {
	return 0U;
	}
	return g_hash_table_size(crm_remote_peer_cache);
	}

	/*!
	* \internal
	* \brief Get a remote node cache entry, creating it if necessary
	*
	* \param[in] node_name Name of remote node
	*
	* \return Cache entry for node on success, or \c NULL (and set \c errno)
	* otherwise
	*
	* \note When creating a new entry, this will leave the node state undetermined.
	* The caller should also call \c pcmk__update_peer_state() if the state
	* is known.
	* \note Because this can add and remove cache entries, callers should not
	* assume any previously obtained cache entry pointers remain valid.
	*/
	crm_node_t *
	pcmk__cluster_lookup_remote_node(const char *node_name)
	{
	crm_node_t *node;
	char *node_name_copy = NULL;

	if (node_name == NULL) {
	errno = EINVAL;
	return NULL;
	}

	/* It's theoretically possible that the node was added to the cluster peer
	* cache before it was known to be a Pacemaker Remote node. Remove that
	- * entry unless it has a node ID, which means the name actually is
	+ * entry unless it has an XML ID, which means the name actually is
	* associated with a cluster node. (@TODO return an error in that case?)
	*/
	node = pcmk__search_node_caches(0, node_name, NULL,
	pcmk__node_search_cluster_member);
	if ((node != NULL) && (node->uuid == NULL)) {
	/* node_name could be a pointer into the cache entry being removed, so
	* reassign it to a copy before the original gets freed
	*/
	node_name_copy = strdup(node_name);
	if (node_name_copy == NULL) {
	errno = ENOMEM;
	return NULL;
	}
	node_name = node_name_copy;
	pcmk__cluster_forget_cluster_node(0, node_name);
	}

	/* Return existing cache entry if one exists */
	node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
	if (node) {
	free(node_name_copy);
	return node;
	}

	/* Allocate a new entry */
	node = calloc(1, sizeof(crm_node_t));
	if (node == NULL) {
	free(node_name_copy);
	return NULL;
	}

	/* Populate the essential information */
	set_peer_flags(node, crm_remote_node);
	node->uuid = strdup(node_name);
	if (node->uuid == NULL) {
	free(node);
	errno = ENOMEM;
	free(node_name_copy);
	return NULL;
	}

	/* Add the new entry to the cache */
	g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
	crm_trace("added %s to remote cache", node_name);

	/* Update the entry's uname, ensuring peer status callbacks are called */
	update_peer_uname(node, node_name);
	free(node_name_copy);
	return node;
	}

	/*!
	* \internal
	* \brief Remove a node from the Pacemaker Remote node cache
	*
	* \param[in] node_name Name of node to remove from cache
	*
	* \note The caller must be careful not to use \p node_name after calling this
	* function if it might be a pointer into the cache entry being removed.
	*/
	void
	pcmk__cluster_forget_remote_node(const char *node_name)
	{
	/* Do a lookup first, because node_name could be a pointer within the entry
	* being removed -- we can't log it after removing it.
	*/
	if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) {
	crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
	g_hash_table_remove(crm_remote_peer_cache, node_name);
	}
	}

	/*!
	* \internal
	* \brief Return node status based on a CIB status entry
	*
	* \param[in] node_state XML of node state
	*
	* \return \c CRM_NODE_LOST if \c PCMK__XA_IN_CCM is false in
	* \c PCMK__XE_NODE_STATE, \c CRM_NODE_MEMBER otherwise
	* \note Unlike most boolean XML attributes, this one defaults to true, for
	* backward compatibility with older controllers that don't set it.
	*/
	static const char *
	remote_state_from_cib(const xmlNode *node_state)
	{
	bool status = false;

	if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM,
	&status) == pcmk_rc_ok) && !status) {
	return CRM_NODE_LOST;
	} else {
	return CRM_NODE_MEMBER;
	}
	}

	/* user data for looping through remote node xpath searches */
	struct refresh_data {
	const char field; / XML attribute to check for node name */
	gboolean has_state; /* whether to update node state based on XML */
	};

	/*!
	* \internal
	* \brief Process one pacemaker_remote node xpath search result
	*
	* \param[in] result XML search result
	* \param[in] user_data what to look for in the XML
	*/
	static void
	remote_cache_refresh_helper(xmlNode result, void user_data)
	{
	const struct refresh_data *data = user_data;
	const char *remote = crm_element_value(result, data->field);
	const char *state = NULL;
	crm_node_t *node;

	CRM_CHECK(remote != NULL, return);

	/* Determine node's state, if the result has it */
	if (data->has_state) {
	state = remote_state_from_cib(result);
	}

	/* Check whether cache already has entry for node */
	node = g_hash_table_lookup(crm_remote_peer_cache, remote);

	if (node == NULL) {
	/* Node is not in cache, so add a new entry for it */
	node = pcmk__cluster_lookup_remote_node(remote);
	pcmk__assert(node != NULL);
	if (state) {
	pcmk__update_peer_state(__func__, node, state, 0);
	}

	} else if (pcmk_is_set(node->flags, crm_node_dirty)) {
	/* Node is in cache and hasn't been updated already, so mark it clean */
	clear_peer_flags(node, crm_node_dirty);
	if (state) {
	pcmk__update_peer_state(__func__, node, state, 0);
	}
	}
	}

	static void
	mark_dirty(gpointer key, gpointer value, gpointer user_data)
	{
	set_peer_flags((crm_node_t *) value, crm_node_dirty);
	}

	static gboolean
	is_dirty(gpointer key, gpointer value, gpointer user_data)
	{
	return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
	}

	/*!
	* \internal
	* \brief Repopulate the remote node cache based on CIB XML
	*
	* \param[in] cib CIB XML to parse
	*/
	static void
	refresh_remote_nodes(xmlNode *cib)
	{
	struct refresh_data data;

	pcmk__cluster_init_node_caches();

	/* First, we mark all existing cache entries as dirty,
	* so that later we can remove any that weren't in the CIB.
	* We don't empty the cache, because we need to detect changes in state.
	*/
	g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);

	/* Look for guest nodes and remote nodes in the status section */
	data.field = PCMK_XA_ID;
	data.has_state = TRUE;
	crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS,
	remote_cache_refresh_helper, &data);

	/* Look for guest nodes and remote nodes in the configuration section,
	* because they may have just been added and not have a status entry yet.
	* In that case, the cached node state will be left NULL, so that the
	* peer status callback isn't called until we're sure the node started
	* successfully.
	*/
	data.field = PCMK_XA_VALUE;
	data.has_state = FALSE;
	crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG,
	remote_cache_refresh_helper, &data);
	data.field = PCMK_XA_ID;
	data.has_state = FALSE;
	crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG,
	remote_cache_refresh_helper, &data);

	/* Remove all old cache entries that weren't seen in the CIB */
	g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
	}

	/*!
	* \internal
	* \brief Check whether a node is an active cluster node
	*
	* Remote nodes are never considered active. This guarantees that they can never
	* become DC.
	*
	* \param[in] node Node to check
	*
	* \return \c true if the node is an active cluster node, or \c false otherwise
	*/
	bool
	pcmk__cluster_is_node_active(const crm_node_t *node)
	{
	const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();

	if ((node == NULL) \|\| pcmk_is_set(node->flags, crm_remote_node)) {
	return false;
	}

	switch (cluster_layer) {
	case pcmk_cluster_layer_corosync:
	#if SUPPORT_COROSYNC
	return pcmk__corosync_is_peer_active(node);
	#else
	break;
	#endif // SUPPORT_COROSYNC
	default:
	break;
	}

	crm_err("Unhandled cluster layer: %s",
	pcmk_cluster_layer_text(cluster_layer));
	return false;
	}

	/*!
	* \internal
	* \brief Check if a node's entry should be removed from the cluster node cache
	*
	* A node should be removed from the cache if it's inactive and matches another
	* \c crm_node_t (the search object). The node is considered a mismatch if any
	* of the following are true:
	* * The search object is \c NULL.
	* * The search object has an ID set and the cached node's ID does not match it.
	* * The search object does not have an ID set, and the cached node's name does
	* not match the search node's name. (If both names are \c NULL, it's a
	* match.)
	*
	* Otherwise, the node is considered a match.
	*
	* Note that if the search object has both an ID and a name set, the name is
	* ignored for matching purposes.
	*
	* \param[in] key Ignored
	* \param[in] value \c crm_node_t object from cluster node cache
	* \param[in] user_data \c crm_node_t object to match against (search object)
	*
	* \return \c TRUE if the node entry should be removed from \c crm_peer_cache,
	* or \c FALSE otherwise
	*/
	static gboolean
	should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
	{
	crm_node_t *node = value;
	crm_node_t *search = user_data;

	if (search == NULL) {
	return FALSE;
	}
	if ((search->id != 0) && (node->id != search->id)) {
	return FALSE;
	}
	if ((search->id == 0)
	&& !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
	// @TODO Consider name even if ID is set?
	return FALSE;
	}
	if (pcmk__cluster_is_node_active(value)) {
	return FALSE;
	}

	crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership "
	"cache",
	pcmk__s(node->uname, "(unknown)"), node->id);
	return TRUE;
	}

	/*!
	* \internal
	* \brief Remove one or more inactive nodes from the cluster node cache
	*
	* All inactive nodes matching \p id and \p node_name as described in
	* \c should_forget_cluster_node documentation are removed from the cache.
	*
	* If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed
	* from the cache regardless of ID and name. This differs from clearing the
	* cache, in that entries for active nodes are preserved.
	*
	* \param[in] id ID of node to remove from cache (0 to ignore)
	* \param[in] node_name Name of node to remove from cache (ignored if \p id is
	* nonzero)
	*
	* \note \p node_name is not modified directly, but it will be freed if it's a
	* pointer into a cache entry that is removed.
	*/
	void
	pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
	{
	crm_node_t search = { 0, };
	char *criterion = NULL; // For logging
	guint matches = 0;

	if (crm_peer_cache == NULL) {
	crm_trace("Membership cache not initialized, ignoring removal request");
	return;
	}

	search.id = id;
	search.uname = pcmk__str_copy(node_name); // May log after original freed

	if (id > 0) {
	criterion = crm_strdup_printf(PCMK_XA_ID "=%" PRIu32, id);

	} else if (node_name != NULL) {
	criterion = crm_strdup_printf(PCMK_XA_UNAME "=%s", node_name);
	}

	matches = g_hash_table_foreach_remove(crm_peer_cache,
	should_forget_cluster_node, &search);
	if (matches > 0) {
	if (criterion != NULL) {
	crm_notice("Removed %u inactive node%s with %s from the membership "
	"cache",
	matches, pcmk__plural_s(matches), criterion);
	} else {
	crm_notice("Removed all (%u) inactive cluster nodes from the "
	"membership cache",
	matches);
	}

	} else {
	crm_info("No inactive cluster nodes%s%s to remove from the membership "
	"cache",
	((criterion != NULL)? " with " : ""), pcmk__s(criterion, ""));
	}

	free(search.uname);
	free(criterion);
	}

	static void
	count_peer(gpointer key, gpointer value, gpointer user_data)
	{
	unsigned int *count = user_data;
	crm_node_t *node = value;

	if (pcmk__cluster_is_node_active(node)) {
	count = count + 1;
	}
	}

	/*!
	* \internal
	* \brief Get the number of active cluster nodes that have been seen
	*
	* Remote nodes are never considered active. This guarantees that they can never
	* become DC.
	*
	* \return Number of active nodes in the cluster node cache
	*/
	unsigned int
	pcmk__cluster_num_active_nodes(void)
	{
	unsigned int count = 0;

	if (crm_peer_cache != NULL) {
	g_hash_table_foreach(crm_peer_cache, count_peer, &count);
	}
	return count;
	}

	static void
	destroy_crm_node(gpointer data)
	{
	crm_node_t *node = data;

	crm_trace("Destroying entry for node %u: %s", node->id, node->uname);

	free(node->uname);
	free(node->state);
	free(node->uuid);
	free(node->expected);
	free(node->conn_host);
	free(node);
	}

	/*!
	* \internal
	* \brief Initialize node caches
	*/
	void
	pcmk__cluster_init_node_caches(void)
	{
	if (crm_peer_cache == NULL) {
	crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node);
	}

	if (crm_remote_peer_cache == NULL) {
	crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
	}

	if (cluster_node_cib_cache == NULL) {
	cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
	}
	}

	/*!
	* \internal
	* \brief Initialize node caches
	*/
	void
	pcmk__cluster_destroy_node_caches(void)
	{
	if (crm_peer_cache != NULL) {
	crm_trace("Destroying peer cache with %d members",
	g_hash_table_size(crm_peer_cache));
	g_hash_table_destroy(crm_peer_cache);
	crm_peer_cache = NULL;
	}

	if (crm_remote_peer_cache != NULL) {
	crm_trace("Destroying remote peer cache with %d members",
	pcmk__cluster_num_remote_nodes());
	g_hash_table_destroy(crm_remote_peer_cache);
	crm_remote_peer_cache = NULL;
	}

	if (cluster_node_cib_cache != NULL) {
	crm_trace("Destroying configured cluster node cache with %d members",
	g_hash_table_size(cluster_node_cib_cache));
	g_hash_table_destroy(cluster_node_cib_cache);
	cluster_node_cib_cache = NULL;
	}
	}

	static void (peer_status_callback)(enum crm_status_type, crm_node_t ,
	const void *) = NULL;

	/*!
	* \internal
	* \brief Set a client function that will be called after peer status changes
	*
	* \param[in] dispatch Pointer to function to use as callback
	*
	* \note Client callbacks should do only client-specific handling. Callbacks
	* must not add or remove entries in the peer caches.
	*/
	void
	pcmk__cluster_set_status_callback(void (*dispatch)(enum crm_status_type,
	crm_node_t , const void ))
	{
	// @TODO Improve documentation of peer_status_callback
	peer_status_callback = dispatch;
	}

	/*!
	* \internal
	* \brief Tell the library whether to automatically reap lost nodes
	*
	* If \c true (the default), calling \c crm_update_peer_proc() will also update
	* the peer state to \c CRM_NODE_MEMBER or \c CRM_NODE_LOST, and updating the
	* peer state will reap peers whose state changes to anything other than
	* \c CRM_NODE_MEMBER.
	*
	* Callers should leave this enabled unless they plan to manage the cache
	* separately on their own.
	*
	* \param[in] enable \c true to enable automatic reaping, \c false to disable
	*/
	void
	pcmk__cluster_set_autoreap(bool enable)
	{
	autoreap = enable;
	}

	static void
	dump_peer_hash(int level, const char *caller)
	{
	GHashTableIter iter;
	const char *id = NULL;
	crm_node_t *node = NULL;

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, (gpointer ) &id, (gpointer ) &node)) {
	do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
	}
	}

	static gboolean
	hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
	{
	return value == user_data;
	}

	/*!
	* \internal
	* \brief Search cluster member node cache
	*
	* \param[in] id If not 0, cluster node ID to search for
	* \param[in] uname If not NULL, node name to search for
	* \param[in] uuid If not NULL while id is 0, node UUID instead of cluster
	* node ID to search for
	*
	* \return Cluster node cache entry if found, otherwise NULL
	*/
	static crm_node_t *
	search_cluster_member_cache(unsigned int id, const char *uname,
	const char *uuid)
	{
	GHashTableIter iter;
	crm_node_t *node = NULL;
	crm_node_t *by_id = NULL;
	crm_node_t *by_name = NULL;

	pcmk__assert((id > 0) \|\| (uname != NULL));

	pcmk__cluster_init_node_caches();

	if (uname != NULL) {
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if(node->uname && strcasecmp(node->uname, uname) == 0) {
	crm_trace("Name match: %s = %p", node->uname, node);
	by_name = node;
	break;
	}
	}
	}

	if (id > 0) {
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if(node->id == id) {
	crm_trace("ID match: %u = %p", node->id, node);
	by_id = node;
	break;
	}
	}

	} else if (uuid != NULL) {
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	- if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
	- crm_trace("UUID match: %s = %p", node->uuid, node);
	+ const char *this_xml_id = pcmk__cluster_get_xml_id(node);
	+
	+ if (pcmk__str_eq(uuid, this_xml_id, pcmk__str_none)) {
	+ crm_trace("Found cluster node cache entry by XML ID %s",
	+ this_xml_id);
	by_id = node;
	break;
	}
	}
	}

	node = by_id; /* Good default */
	if(by_id == by_name) {
	/* Nothing to do if they match (both NULL counts) */
	crm_trace("Consistent: %p for %u/%s", by_id, id, uname);

	} else if(by_id == NULL && by_name) {
	crm_trace("Only one: %p for %u/%s", by_name, id, uname);

	if(id && by_name->id) {
	dump_peer_hash(LOG_WARNING, __func__);
	crm_crit("Node %u and %u share the same name '%s'",
	id, by_name->id, uname);
	node = NULL; /* Create a new one */

	} else {
	node = by_name;
	}

	} else if(by_name == NULL && by_id) {
	crm_trace("Only one: %p for %u/%s", by_id, id, uname);

	if(uname && by_id->uname) {
	dump_peer_hash(LOG_WARNING, __func__);
	crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
	uname, by_id->uname, id, uname);
	}

	} else if(uname && by_id->uname) {
	if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
	crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
	g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);

	} else {
	crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
	dump_peer_hash(LOG_INFO, __func__);
	crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
	TRUE);
	}

	} else if(id && by_name->id) {
	crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);

	} else {
	/* Simple merge */

	/* Only corosync-based clusters use node IDs. The functions that call
	* pcmk__update_peer_state() and crm_update_peer_proc() only know
	* nodeid, so 'by_id' is authoritative when merging.
	*/
	dump_peer_hash(LOG_DEBUG, __func__);

	crm_info("Merging %p into %p", by_name, by_id);
	g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
	}

	return node;
	}

	/*!
	* \internal
	* \brief Search caches for a node (cluster or Pacemaker Remote)
	*
	* \param[in] id If not 0, cluster node ID to search for
	* \param[in] uname If not NULL, node name to search for
	* \param[in] xml_id If not NULL, CIB XML ID of node to search for
	* \param[in] flags Group of enum pcmk__node_search_flags
	*
	* \return Node cache entry if found, otherwise NULL
	*/
	crm_node_t *
	pcmk__search_node_caches(unsigned int id, const char *uname,
	const char *xml_id, uint32_t flags)
	{
	crm_node_t *node = NULL;

	pcmk__assert((id > 0) \|\| (uname != NULL) \|\| (xml_id != NULL));

	pcmk__cluster_init_node_caches();

	if (pcmk_is_set(flags, pcmk__node_search_remote)) {
	if (uname != NULL) {
	node = g_hash_table_lookup(crm_remote_peer_cache, uname);
	} else if (xml_id != NULL) {
	node = g_hash_table_lookup(crm_remote_peer_cache, xml_id);
	}
	}

	if ((node == NULL)
	&& pcmk_is_set(flags, pcmk__node_search_cluster_member)) {

	node = search_cluster_member_cache(id, uname, xml_id);
	}

	if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
	if (xml_id != NULL) {
	node = find_cib_cluster_node(xml_id, uname);
	} else {
	// Assumes XML ID is node ID as string (as with Corosync)
	char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);

	node = find_cib_cluster_node(id_str, uname);
	free(id_str);
	}
	}

	return node;
	}

	/*!
	* \internal
	* \brief Purge a node from cache (both cluster and Pacemaker Remote)
	*
	* \param[in] node_name If not NULL, purge only nodes with this name
	* \param[in] node_id If not 0, purge cluster nodes only if they have this ID
	*
	* \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged.
	* If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote
	* nodes that match \p node_name will be purged, and cluster nodes that
	* match both \p node_name and \p node_id will be purged.
	* \note The caller must be careful not to use \p node_name after calling this
	* function if it might be a pointer into a cache entry being removed.
	*/
	void
	pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
	{
	char *node_name_copy = NULL;

	if ((node_name == NULL) && (node_id == 0U)) {
	return;
	}

	// Purge from Pacemaker Remote node cache
	if ((node_name != NULL)
	&& (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) {
	/* node_name could be a pointer into the cache entry being purged,
	* so reassign it to a copy before the original gets freed
	*/
	node_name_copy = pcmk__str_copy(node_name);
	node_name = node_name_copy;

	crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
	g_hash_table_remove(crm_remote_peer_cache, node_name);
	}

	pcmk__cluster_forget_cluster_node(node_id, node_name);
	free(node_name_copy);
	}

	#if SUPPORT_COROSYNC
	static guint
	remove_conflicting_peer(crm_node_t *node)
	{
	int matches = 0;
	GHashTableIter iter;
	crm_node_t *existing_node = NULL;

	if (node->id == 0 \|\| node->uname == NULL) {
	return 0;
	}

	if (!pcmk__corosync_has_nodelist()) {
	return 0;
	}

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
	if (existing_node->id > 0
	&& existing_node->id != node->id
	&& existing_node->uname != NULL
	&& strcasecmp(existing_node->uname, node->uname) == 0) {

	if (pcmk__cluster_is_node_active(existing_node)) {
	continue;
	}

	crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
	existing_node->id, existing_node->uname, node->id);

	g_hash_table_iter_remove(&iter);
	matches++;
	}
	}

	return matches;
	}
	#endif

	/*!
	* \internal
	* \brief Get a cluster node cache entry, possibly creating one if not found
	*
	* If \c pcmk__node_search_cluster_member is set in \p flags, the return value
	* is guaranteed not to be \c NULL. A new cache entry is created if one does not
	* already exist.
	*
	* \param[in] id If not 0, cluster node ID to search for
	* \param[in] uname If not NULL, node name to search for
	* \param[in] uuid If not NULL while id is 0, node UUID instead of cluster
	* node ID to search for
	* \param[in] flags Group of enum pcmk__node_search_flags
	*
	* \return (Possibly newly created) cluster node cache entry
	*/
	/* coverity[-alloc] Memory is referenced in one or both hashtables */
	crm_node_t *
	pcmk__get_node(unsigned int id, const char uname, const char uuid,
	uint32_t flags)
	{
	crm_node_t *node = NULL;
	char *uname_lookup = NULL;

	pcmk__assert((id > 0) \|\| (uname != NULL));

	pcmk__cluster_init_node_caches();

	// Check the Pacemaker Remote node cache first
	if (pcmk_is_set(flags, pcmk__node_search_remote)) {
	node = g_hash_table_lookup(crm_remote_peer_cache, uname);
	if (node != NULL) {
	return node;
	}
	}

	if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
	return NULL;
	}

	node = search_cluster_member_cache(id, uname, uuid);

	/* if uname wasn't provided, and find_peer did not turn up a uname based on id.
	* we need to do a lookup of the node name using the id in the cluster membership. */
	if ((node == NULL \|\| node->uname == NULL) && (uname == NULL)) {
	uname_lookup = pcmk__cluster_node_name(id);
	}

	if (uname_lookup) {
	uname = uname_lookup;
	crm_trace("Inferred a name of '%s' for node %u", uname, id);

	/* try to turn up the node one more time now that we know the uname. */
	if (node == NULL) {
	node = search_cluster_member_cache(id, uname, uuid);
	}
	}

	if (node == NULL) {
	char *uniqueid = crm_generate_uuid();

	node = pcmk__assert_alloc(1, sizeof(crm_node_t));

	crm_info("Created entry %s/%p for node %s/%u (%d total)",
	uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
	g_hash_table_replace(crm_peer_cache, uniqueid, node);
	}

	if(id > 0 && uname && (node->id == 0 \|\| node->uname == NULL)) {
	crm_info("Node %u is now known as %s", id, uname);
	}

	if(id > 0 && node->id == 0) {
	node->id = id;
	}

	if (uname && (node->uname == NULL)) {
	update_peer_uname(node, uname);
	}

	if(node->uuid == NULL) {
	if (uuid == NULL) {
	uuid = pcmk__cluster_get_xml_id(node);
	}

	if (uuid) {
	crm_info("Node %u has uuid %s", id, uuid);

	} else {
	crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
	}
	}

	free(uname_lookup);

	return node;
	}

	/*!
	* \internal
	* \brief Update a node's uname
	*
	* \param[in,out] node Node object to update
	* \param[in] uname New name to set
	*
	* \note This function should not be called within a peer cache iteration,
	* because in some cases it can remove conflicting cache entries,
	* which would invalidate the iterator.
	*/
	static void
	update_peer_uname(crm_node_t node, const char uname)
	{
	CRM_CHECK(uname != NULL,
	crm_err("Bug: can't update node name without name"); return);
	CRM_CHECK(node != NULL,
	crm_err("Bug: can't update node name to %s without node", uname);
	return);

	if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
	crm_debug("Node uname '%s' did not change", uname);
	return;
	}

	for (const char c = uname; c; ++c) {
	if ((c >= 'A') && (c <= 'Z')) {
	crm_warn("Node names with capitals are discouraged, consider changing '%s'",
	uname);
	break;
	}
	}

	pcmk__str_update(&node->uname, uname);

	if (peer_status_callback != NULL) {
	peer_status_callback(crm_status_uname, node, NULL);
	}

	#if SUPPORT_COROSYNC
	if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)
	&& !pcmk_is_set(node->flags, crm_remote_node)) {

	remove_conflicting_peer(node);
	}
	#endif
	}

	/*!
	* \internal
	* \brief Get log-friendly string equivalent of a process flag
	*
	* \param[in] proc Process flag
	*
	* \return Log-friendly string equivalent of \p proc
	*/
	static inline const char *
	proc2text(enum crm_proc_flag proc)
	{
	const char *text = "unknown";

	switch (proc) {
	case crm_proc_none:
	text = "none";
	break;
	case crm_proc_cpg:
	text = "corosync-cpg";
	break;
	}
	return text;
	}

	/*!
	* \internal
	* \brief Update a node's process information (and potentially state)
	*
	* \param[in] source Caller's function name (for log messages)
	* \param[in,out] node Node object to update
	* \param[in] flag Bitmask of new process information
	* \param[in] status node status (online, offline, etc.)
	*
	* \return NULL if any node was reaped from peer caches, value of node otherwise
	*
	* \note If this function returns NULL, the supplied node object was likely
	* freed and should not be used again. This function should not be
	* called within a cache iteration if reaping is possible, otherwise
	* reaping could invalidate the iterator.
	*/
	crm_node_t *
	crm_update_peer_proc(const char source, crm_node_t node, uint32_t flag, const char *status)
	{
	uint32_t last = 0;
	gboolean changed = FALSE;

	CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
	source, proc2text(flag), status);
	return NULL);

	/* Pacemaker doesn't spawn processes on remote nodes */
	if (pcmk_is_set(node->flags, crm_remote_node)) {
	return node;
	}

	last = node->processes;
	if (status == NULL) {
	node->processes = flag;
	if (node->processes != last) {
	changed = TRUE;
	}

	} else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
	if ((node->processes & flag) != flag) {
	node->processes = pcmk__set_flags_as(__func__, __LINE__,
	LOG_TRACE, "Peer process",
	node->uname, node->processes,
	flag, "processes");
	changed = TRUE;
	}

	} else if (node->processes & flag) {
	node->processes = pcmk__clear_flags_as(__func__, __LINE__,
	LOG_TRACE, "Peer process",
	node->uname, node->processes,
	flag, "processes");
	changed = TRUE;
	}

	if (changed) {
	if (status == NULL && flag <= crm_proc_none) {
	crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
	node->id);
	} else {
	crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
	proc2text(flag), status);
	}

	if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
	node->when_online = time(NULL);

	} else {
	node->when_online = 0;
	}

	/* Call the client callback first, then update the peer state,
	* in case the node will be reaped
	*/
	if (peer_status_callback != NULL) {
	peer_status_callback(crm_status_processes, node, &last);
	}

	/* The client callback shouldn't touch the peer caches,
	* but as a safety net, bail if the peer cache was destroyed.
	*/
	if (crm_peer_cache == NULL) {
	return NULL;
	}

	if (autoreap) {
	const char *peer_state = NULL;

	if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
	peer_state = CRM_NODE_MEMBER;
	} else {
	peer_state = CRM_NODE_LOST;
	}
	node = pcmk__update_peer_state(__func__, node, peer_state, 0);
	}
	} else {
	crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
	proc2text(flag), status);
	}
	return node;
	}

	/*!
	* \internal
	* \brief Update a cluster node cache entry's expected join state
	*
	* \param[in] source Caller's function name (for logging)
	* \param[in,out] node Node to update
	* \param[in] expected Node's new join state
	*/
	void
	pcmk__update_peer_expected(const char source, crm_node_t node,
	const char *expected)
	{
	char *last = NULL;
	gboolean changed = FALSE;

	CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
	return);

	/* Remote nodes don't participate in joins */
	if (pcmk_is_set(node->flags, crm_remote_node)) {
	return;
	}

	last = node->expected;
	if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
	node->expected = strdup(expected);
	changed = TRUE;
	}

	if (changed) {
	crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
	expected, last);
	free(last);
	} else {
	crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
	node->id, expected);
	}
	}

	/*!
	* \internal
	* \brief Update a node's state and membership information
	*
	* \param[in] source Caller's function name (for log messages)
	* \param[in,out] node Node object to update
	* \param[in] state Node's new state
	* \param[in] membership Node's new membership ID
	* \param[in,out] iter If not NULL, pointer to node's peer cache iterator
	*
	* \return NULL if any node was reaped, value of node otherwise
	*
	* \note If this function returns NULL, the supplied node object was likely
	* freed and should not be used again. This function may be called from
	* within a peer cache iteration if the iterator is supplied.
	*/
	static crm_node_t *
	update_peer_state_iter(const char source, crm_node_t node, const char *state,
	uint64_t membership, GHashTableIter *iter)
	{
	gboolean is_member;

	CRM_CHECK(node != NULL,
	crm_err("Could not set state for unknown host to %s "
	CRM_XS " source=%s", state, source);
	return NULL);

	is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
	if (is_member) {
	node->when_lost = 0;
	if (membership) {
	node->last_seen = membership;
	}
	}

	if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
	char *last = node->state;

	if (is_member) {
	node->when_member = time(NULL);

	} else {
	node->when_member = 0;
	}

	node->state = strdup(state);
	crm_notice("Node %s state is now %s " CRM_XS
	" nodeid=%u previous=%s source=%s", node->uname, state,
	node->id, (last? last : "unknown"), source);
	if (peer_status_callback != NULL) {
	peer_status_callback(crm_status_nstate, node, last);
	}
	free(last);

	if (autoreap && !is_member
	&& !pcmk_is_set(node->flags, crm_remote_node)) {
	/* We only autoreap from the peer cache, not the remote peer cache,
	* because the latter should be managed only by
	* refresh_remote_nodes().
	*/
	if(iter) {
	crm_notice("Purged 1 peer with " PCMK_XA_ID
	"=%u and/or uname=%s from the membership cache",
	node->id, node->uname);
	g_hash_table_iter_remove(iter);

	} else {
	pcmk__cluster_forget_cluster_node(node->id, node->uname);
	}
	node = NULL;
	}

	} else {
	crm_trace("Node %s state is unchanged (%s) " CRM_XS
	" nodeid=%u source=%s", node->uname, state, node->id, source);
	}
	return node;
	}

	/*!
	* \brief Update a node's state and membership information
	*
	* \param[in] source Caller's function name (for log messages)
	* \param[in,out] node Node object to update
	* \param[in] state Node's new state
	* \param[in] membership Node's new membership ID
	*
	* \return NULL if any node was reaped, value of node otherwise
	*
	* \note If this function returns NULL, the supplied node object was likely
	* freed and should not be used again. This function should not be
	* called within a cache iteration if reaping is possible,
	* otherwise reaping could invalidate the iterator.
	*/
	crm_node_t *
	pcmk__update_peer_state(const char source, crm_node_t node,
	const char *state, uint64_t membership)
	{
	return update_peer_state_iter(source, node, state, membership, NULL);
	}

	/*!
	* \internal
	* \brief Reap all nodes from cache whose membership information does not match
	*
	* \param[in] membership Membership ID of nodes to keep
	*/
	void
	pcmk__reap_unseen_nodes(uint64_t membership)
	{
	GHashTableIter iter;
	crm_node_t *node = NULL;

	crm_trace("Reaping unseen nodes...");
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
	if (node->last_seen != membership) {
	if (node->state) {
	/*
	* Calling update_peer_state_iter() allows us to
	* remove the node from crm_peer_cache without
	* invalidating our iterator
	*/
	update_peer_state_iter(__func__, node, CRM_NODE_LOST,
	membership, &iter);

	} else {
	crm_info("State of node %s[%u] is still unknown",
	node->uname, node->id);
	}
	}
	}
	}

	static crm_node_t *
	find_cib_cluster_node(const char id, const char uname)
	{
	GHashTableIter iter;
	crm_node_t *node = NULL;
	crm_node_t *by_id = NULL;
	crm_node_t *by_name = NULL;

	if (uname) {
	g_hash_table_iter_init(&iter, cluster_node_cib_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if (node->uname && strcasecmp(node->uname, uname) == 0) {
	crm_trace("Name match: %s = %p", node->uname, node);
	by_name = node;
	break;
	}
	}
	}

	if (id) {
	g_hash_table_iter_init(&iter, cluster_node_cib_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	- if(strcasecmp(node->uuid, id) == 0) {
	+ if (pcmk__str_eq(id, pcmk__cluster_get_xml_id(node),
	+ pcmk__str_none)) {
	crm_trace("ID match: %s= %p", id, node);
	by_id = node;
	break;
	}
	}
	}

	node = by_id; /* Good default */
	if (by_id == by_name) {
	/* Nothing to do if they match (both NULL counts) */
	crm_trace("Consistent: %p for %s/%s", by_id, id, uname);

	} else if (by_id == NULL && by_name) {
	crm_trace("Only one: %p for %s/%s", by_name, id, uname);

	if (id) {
	node = NULL;

	} else {
	node = by_name;
	}

	} else if (by_name == NULL && by_id) {
	crm_trace("Only one: %p for %s/%s", by_id, id, uname);

	if (uname) {
	node = NULL;
	}

	} else if (uname && by_id->uname
	&& pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
	/* Multiple nodes have the same uname in the CIB.
	* Return by_id. */

	} else if (id && by_name->uuid
	- && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
	+ && pcmk__str_eq(id, by_name->uuid, pcmk__str_none)) {
	/* Multiple nodes have the same id in the CIB.
	* Return by_name. */
	node = by_name;

	} else {
	node = NULL;
	}

	if (node == NULL) {
	crm_debug("Couldn't find node%s%s%s%s",
	id? " " : "",
	id? id : "",
	uname? " with name " : "",
	uname? uname : "");
	}

	return node;
	}

	static void
	cluster_node_cib_cache_refresh_helper(xmlNode xml_node, void user_data)
	{
	const char *id = crm_element_value(xml_node, PCMK_XA_ID);
	const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
	crm_node_t * node = NULL;

	CRM_CHECK(id != NULL && uname !=NULL, return);
	node = find_cib_cluster_node(id, uname);

	if (node == NULL) {
	char *uniqueid = crm_generate_uuid();

	node = pcmk__assert_alloc(1, sizeof(crm_node_t));

	node->uname = pcmk__str_copy(uname);
	node->uuid = pcmk__str_copy(id);

	g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);

	} else if (pcmk_is_set(node->flags, crm_node_dirty)) {
	pcmk__str_update(&node->uname, uname);

	/* Node is in cache and hasn't been updated already, so mark it clean */
	clear_peer_flags(node, crm_node_dirty);
	}

	}

	static void
	refresh_cluster_node_cib_cache(xmlNode *cib)
	{
	pcmk__cluster_init_node_caches();

	g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);

	crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG,
	cluster_node_cib_cache_refresh_helper, NULL);

	// Remove all old cache entries that weren't seen in the CIB
	g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
	}

	void
	pcmk__refresh_node_caches_from_cib(xmlNode *cib)
	{
	refresh_remote_nodes(cib);
	refresh_cluster_node_cib_cache(cib);
	}

	// Deprecated functions kept only for backward API compatibility
	// LCOV_EXCL_START

	#include <crm/cluster/compat.h>

	int
	crm_terminate_member(int nodeid, const char uname, void unused)
	{
	return stonith_api_kick(nodeid, uname, 120, TRUE);
	}

	int
	crm_terminate_member_no_mainloop(int nodeid, const char uname, int connection)
	{
	return stonith_api_kick(nodeid, uname, 120, TRUE);
	}

	crm_node_t *
	crm_get_peer(unsigned int id, const char *uname)
	{
	return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster_member);
	}

	crm_node_t *
	crm_get_peer_full(unsigned int id, const char *uname, int flags)
	{
	return pcmk__get_node(id, uname, NULL, flags);
	}

	int
	crm_remote_peer_cache_size(void)
	{
	unsigned int count = pcmk__cluster_num_remote_nodes();

	return QB_MIN(count, INT_MAX);
	}

	void
	crm_remote_peer_cache_refresh(xmlNode *cib)
	{
	refresh_remote_nodes(cib);
	}

	crm_node_t *
	crm_remote_peer_get(const char *node_name)
	{
	return pcmk__cluster_lookup_remote_node(node_name);
	}

	void
	crm_remote_peer_cache_remove(const char *node_name)
	{
	pcmk__cluster_forget_remote_node(node_name);
	}

	gboolean
	crm_is_peer_active(const crm_node_t * node)
	{
	return pcmk__cluster_is_node_active(node);
	}

	guint
	crm_active_peers(void)
	{
	return pcmk__cluster_num_active_nodes();
	}

	guint
	reap_crm_member(uint32_t id, const char *name)
	{
	int matches = 0;
	crm_node_t search = { 0, };

	if (crm_peer_cache == NULL) {
	crm_trace("Membership cache not initialized, ignoring purge request");
	return 0;
	}

	search.id = id;
	search.uname = pcmk__str_copy(name);
	matches = g_hash_table_foreach_remove(crm_peer_cache,
	should_forget_cluster_node, &search);
	if(matches) {
	crm_notice("Purged %d peer%s with " PCMK_XA_ID
	"=%u%s%s from the membership cache",
	matches, pcmk__plural_s(matches), search.id,
	(search.uname? " and/or uname=" : ""),
	(search.uname? search.uname : ""));

	} else {
	crm_info("No peers with " PCMK_XA_ID
	"=%u%s%s to purge from the membership cache",
	search.id, (search.uname? " and/or uname=" : ""),
	(search.uname? search.uname : ""));
	}

	free(search.uname);
	return matches;
	}

	void
	crm_peer_init(void)
	{
	pcmk__cluster_init_node_caches();
	}

	void
	crm_peer_destroy(void)
	{
	pcmk__cluster_destroy_node_caches();
	}

	void
	crm_set_autoreap(gboolean enable)
	{
	pcmk__cluster_set_autoreap(enable);
	}

	void
	crm_set_status_callback(void (dispatch) (enum crm_status_type, crm_node_t , const void *))
	{
	pcmk__cluster_set_status_callback(dispatch);
	}

	// LCOV_EXCL_STOP
	// End deprecated API

File Metadata

Mime Type: text/x-diff
Expires: Sat, Jan 25, 12:32 PM (13 h, 12 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1311051
Default Alt Text: (89 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions