Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
index f868cea739..9abaf0535d 100644
--- a/daemons/controld/controld_join_dc.c
+++ b/daemons/controld/controld_join_dc.c
@@ -1,1044 +1,1044 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-controld.h>
static char *max_generation_from = NULL;
static xmlNodePtr max_generation_xml = NULL;
/*!
* \internal
* \brief Nodes from which a CIB sync has failed since the peer joined
*
* This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
* the name of a client node from which a CIB \p sync_from() call has failed in
* \p do_dc_join_finalize() since the client joined the cluster as a peer.
* \p join_id is the ID of the join round in which the \p sync_from() failed,
* and is intended for use in nack log messages.
*/
static GHashTable *failed_sync_nodes = NULL;
void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
/* Numeric counter used to identify join rounds (an unsigned int would be
* appropriate, except we get and set it in XML as int)
*/
static int current_join_id = 0;
/*!
* \internal
* \brief Destroy the hash table containing failed sync nodes
*/
void
controld_destroy_failed_sync_table(void)
{
if (failed_sync_nodes != NULL) {
g_hash_table_destroy(failed_sync_nodes);
failed_sync_nodes = NULL;
}
}
/*!
* \internal
* \brief Remove a node from the failed sync nodes table if present
*
* \param[in] node_name Node name to remove
*/
void
controld_remove_failed_sync_node(const char *node_name)
{
if (failed_sync_nodes != NULL) {
g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
}
}
/*!
* \internal
* \brief Add to a hash table a node whose CIB failed to sync
*
* \param[in] node_name Name of node whose CIB failed to sync
* \param[in] join_id Join round when the failure occurred
*/
static void
record_failed_sync_node(const char *node_name, gint join_id)
{
if (failed_sync_nodes == NULL) {
failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
}
/* If the node is already in the table then we failed to nack it during the
* filter offer step
*/
CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
GINT_TO_POINTER(join_id)));
}
/*!
* \internal
* \brief Look up a node name in the failed sync table
*
* \param[in] node_name Name of node to look up
* \param[out] join_id Where to store the join ID of when the sync failed
*
* \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
* node name was found, or \p pcmk_rc_node_unknown otherwise.
* \note \p *join_id is set to -1 if the node is not found.
*/
static int
lookup_failed_sync_node(const char *node_name, gint *join_id)
{
*join_id = -1;
if (failed_sync_nodes != NULL) {
gpointer result = g_hash_table_lookup(failed_sync_nodes,
(gchar *) node_name);
if (result != NULL) {
*join_id = GPOINTER_TO_INT(result);
return pcmk_rc_ok;
}
}
return pcmk_rc_node_unknown;
}
void
crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
{
enum crm_join_phase last = 0;
CRM_CHECK(node != NULL, return);
/* Remote nodes do not participate in joins */
if (pcmk_is_set(node->flags, crm_remote_node)) {
return;
}
last = node->join;
if(phase == last) {
crm_trace("Node %s join-%d phase is still %s "
QB_XS " nodeid=%u source=%s",
node->uname, current_join_id, crm_join_phase_str(last),
node->id, source);
} else if ((phase <= crm_join_none) || (phase == (last + 1))) {
node->join = phase;
crm_trace("Node %s join-%d phase is now %s (was %s) "
QB_XS " nodeid=%u source=%s",
node->uname, current_join_id, crm_join_phase_str(phase),
crm_join_phase_str(last), node->id, source);
} else {
crm_warn("Rejecting join-%d phase update for node %s because "
"can't go from %s to %s " QB_XS " nodeid=%u source=%s",
current_join_id, node->uname, crm_join_phase_str(last),
crm_join_phase_str(phase), node->id, source);
}
}
static void
start_join_round(void)
{
GHashTableIter iter;
crm_node_t *peer = NULL;
crm_debug("Starting new join round join-%d", current_join_id);
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
crm_update_peer_join(__func__, peer, crm_join_none);
}
if (max_generation_from != NULL) {
free(max_generation_from);
max_generation_from = NULL;
}
if (max_generation_xml != NULL) {
pcmk__xml_free(max_generation_xml);
max_generation_xml = NULL;
}
controld_clear_fsa_input_flags(R_HAVE_CIB);
}
/*!
* \internal
* \brief Create a join message from the DC
*
* \param[in] join_op Join operation name
* \param[in] host_to Recipient of message
*/
static xmlNode *
create_dc_message(const char *join_op, const char *host_to)
{
xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
CRM_SYSTEM_DC, NULL);
/* Identify which election this is a part of */
crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);
/* Add a field specifying whether the DC is shutting down. This keeps the
* joining node from fencing the old DC if it becomes the new DC.
*/
pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
pcmk_is_set(controld_globals.fsa_input_register,
R_SHUTDOWN));
return msg;
}
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *offer = NULL;
crm_node_t *member = (crm_node_t *)value;
CRM_ASSERT(member != NULL);
if (!pcmk__cluster_is_node_active(member)) {
crm_info("Not making join-%d offer to inactive node %s",
current_join_id,
(member->uname? member->uname : "with unknown name"));
if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
/* You would think this unsafe, but in fact this plus an
* active resource is what causes it to be fenced.
*
* Yes, this does mean that any node that dies at the same
* time as the old DC and is not running resource (still)
* won't be fenced.
*
* I'm not happy about this either.
*/
pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
}
return;
}
if (member->uname == NULL) {
crm_info("Not making join-%d offer to node uuid %s with unknown name",
current_join_id, member->uuid);
return;
}
if (controld_globals.membership_id != crm_peer_seq) {
controld_globals.membership_id = crm_peer_seq;
crm_info("Making join-%d offers based on membership event %llu",
current_join_id, crm_peer_seq);
}
if(user_data && member->join > crm_join_none) {
crm_info("Not making join-%d offer to already known node %s (%s)",
current_join_id, member->uname,
crm_join_phase_str(member->join));
return;
}
crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
// Advertise our feature set so the joining node can bail if not compatible
crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
pcmk__cluster_send_message(member, crm_msg_crmd, offer);
pcmk__xml_free(offer);
crm_update_peer_join(__func__, member, crm_join_welcomed);
}
/* A_DC_JOIN_OFFER_ALL */
void
do_dc_join_offer_all(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int count;
/* Reset everyone's status back to down or in_ccm in the CIB.
* Any nodes that are active in the CIB but not in the cluster membership
* will be seen as offline by the scheduler anyway.
*/
current_join_id++;
start_join_round();
update_dc(NULL);
if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
crm_info("A new node joined the cluster");
}
g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
count = crmd_join_phase_count(crm_join_welcomed);
crm_info("Waiting on join-%d requests from %d outstanding node%s",
current_join_id, count, pcmk__plural_s(count));
// Don't waste time by invoking the scheduler yet
}
/* A_DC_JOIN_OFFER_ONE */
void
do_dc_join_offer_one(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_node_t *member;
ha_msg_input_t *welcome = NULL;
int count;
const char *join_to = NULL;
if (msg_data->data == NULL) {
crm_info("Making join-%d offers to any unconfirmed nodes "
"because an unknown node joined", current_join_id);
g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
check_join_state(cur_state, __func__);
return;
}
welcome = fsa_typed_data(fsa_dt_ha_msg);
if (welcome == NULL) {
// fsa_typed_data() already logged an error
return;
}
join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
if (join_to == NULL) {
crm_err("Can't make join-%d offer to unknown node", current_join_id);
return;
}
member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);
/* It is possible that a node will have been sick or starting up when the
* original offer was made. However, it will either re-announce itself in
* due course, or we can re-store the original offer on the client.
*/
crm_update_peer_join(__func__, member, crm_join_none);
join_make_offer(NULL, member, NULL);
/* If the offer isn't to the local node, make an offer to the local node as
* well, to ensure the correct value for max_generation_from.
*/
if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
member = pcmk__get_node(0, controld_globals.our_nodename, NULL,
pcmk__node_search_cluster_member);
join_make_offer(NULL, member, NULL);
}
/* This was a genuine join request; cancel any existing transition and
* invoke the scheduler.
*/
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
NULL);
count = crmd_join_phase_count(crm_join_welcomed);
crm_info("Waiting on join-%d requests from %d outstanding node%s",
current_join_id, count, pcmk__plural_s(count));
// Don't waste time by invoking the scheduler yet
}
static int
compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
{
const char *elem_l = crm_element_value(left, field);
const char *elem_r = crm_element_value(right, field);
long long int_elem_l;
long long int_elem_r;
pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
if (int_elem_l < int_elem_r) {
return -1;
} else if (int_elem_l > int_elem_r) {
return 1;
}
return 0;
}
/* A_DC_JOIN_PROCESS_REQ */
void
do_dc_join_filter_offer(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *generation = NULL;
int cmp = 0;
int join_id = -1;
int count = 0;
gint value = 0;
gboolean ack_nack_bool = TRUE;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
const char *join_version = crm_element_value(join_ack->msg,
PCMK_XA_CRM_FEATURE_SET);
crm_node_t *join_node = NULL;
if (join_from == NULL) {
crm_err("Ignoring invalid join request without node name");
return;
}
join_node = pcmk__get_node(0, join_from, NULL,
pcmk__node_search_cluster_member);
crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
if (join_id != current_join_id) {
crm_debug("Ignoring join-%d request from %s because we are on join-%d",
join_id, join_from, current_join_id);
check_join_state(cur_state, __func__);
return;
}
generation = join_ack->xml;
if (max_generation_xml != NULL && generation != NULL) {
int lpc = 0;
const char *attributes[] = {
PCMK_XA_ADMIN_EPOCH,
PCMK_XA_EPOCH,
PCMK_XA_NUM_UPDATES,
};
/* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE
* element from the join client. The "if" guard is for clarity.
*/
if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
cmp = compare_int_fields(max_generation_xml, generation,
attributes[lpc]);
}
} else { // Should always be PCMK__XE_GENERATION_TUPLE
CRM_LOG_ASSERT(false);
}
}
if (ref == NULL) {
ref = "none"; // for logging only
}
if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
crm_err("Rejecting join-%d request from node %s because we failed to "
"sync its CIB in join-%d " QB_XS " ref=%s",
join_id, join_from, value, ref);
ack_nack_bool = FALSE;
} else if (!pcmk__cluster_is_node_active(join_node)) {
if (match_down_event(join_from) != NULL) {
/* The join request was received after the node was fenced or
* otherwise shutdown in a way that we're aware of. No need to log
* an error in this rare occurrence; we know the client was recently
* shut down, and receiving a lingering in-flight request is not
* cause for alarm.
*/
crm_debug("Rejecting join-%d request from inactive node %s "
QB_XS " ref=%s", join_id, join_from, ref);
} else {
crm_err("Rejecting join-%d request from inactive node %s "
QB_XS " ref=%s", join_id, join_from, ref);
}
ack_nack_bool = FALSE;
} else if (generation == NULL) {
crm_err("Rejecting invalid join-%d request from node %s "
"missing CIB generation " QB_XS " ref=%s",
join_id, join_from, ref);
ack_nack_bool = FALSE;
} else if ((join_version == NULL)
|| !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
crm_err("Rejecting join-%d request from node %s because feature set %s"
" is incompatible with ours (%s) " QB_XS " ref=%s",
join_id, join_from, (join_version? join_version : "pre-3.1.0"),
CRM_FEATURE_SET, ref);
ack_nack_bool = FALSE;
} else if (max_generation_xml == NULL) {
const char *validation = crm_element_value(generation,
PCMK_XA_VALIDATE_WITH);
if (pcmk__get_schema(validation) == NULL) {
crm_err("Rejecting join-%d request from %s (with first CIB "
"generation) due to unknown schema version %s "
QB_XS " ref=%s",
join_id, join_from, pcmk__s(validation, "(missing)"), ref);
ack_nack_bool = FALSE;
} else {
crm_debug("Accepting join-%d request from %s (with first CIB "
"generation) " QB_XS " ref=%s",
join_id, join_from, ref);
max_generation_xml = pcmk__xml_copy(NULL, generation);
pcmk__str_update(&max_generation_from, join_from);
}
} else if ((cmp < 0)
|| ((cmp == 0)
&& pcmk__str_eq(join_from, controld_globals.our_nodename,
pcmk__str_casei))) {
const char *validation = crm_element_value(generation,
PCMK_XA_VALIDATE_WITH);
if (pcmk__get_schema(validation) == NULL) {
crm_err("Rejecting join-%d request from %s (with better CIB "
"generation than current best from %s) due to unknown "
"schema version %s " QB_XS " ref=%s",
join_id, join_from, max_generation_from,
pcmk__s(validation, "(missing)"), ref);
ack_nack_bool = FALSE;
} else {
crm_debug("Accepting join-%d request from %s (with better CIB "
"generation than current best from %s) " QB_XS " ref=%s",
join_id, join_from, max_generation_from, ref);
crm_log_xml_debug(max_generation_xml, "Old max generation");
crm_log_xml_debug(generation, "New max generation");
pcmk__xml_free(max_generation_xml);
max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
pcmk__str_update(&max_generation_from, join_from);
}
} else {
crm_debug("Accepting join-%d request from %s " QB_XS " ref=%s",
join_id, join_from, ref);
}
if (!ack_nack_bool) {
if (compare_version(join_version, "3.17.0") < 0) {
/* Clients with CRM_FEATURE_SET < 3.17.0 may respawn infinitely
* after a nack message, don't send one
*/
crm_update_peer_join(__func__, join_node, crm_join_nack_quiet);
} else {
crm_update_peer_join(__func__, join_node, crm_join_nack);
}
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
} else {
crm_update_peer_join(__func__, join_node, crm_join_integrated);
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
}
count = crmd_join_phase_count(crm_join_integrated);
crm_debug("%d node%s currently integrated in join-%d",
count, pcmk__plural_s(count), join_id);
if (check_join_state(cur_state, __func__) == FALSE) {
// Don't waste time by invoking the scheduler yet
count = crmd_join_phase_count(crm_join_welcomed);
crm_debug("Waiting on join-%d requests from %d outstanding node%s",
join_id, count, pcmk__plural_s(count));
}
}
/* A_DC_JOIN_FINALIZE */
void
do_dc_join_finalize(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
char *sync_from = NULL;
int rc = pcmk_ok;
int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
int count_finalizable = crmd_join_phase_count(crm_join_integrated)
+ crmd_join_phase_count(crm_join_nack)
+ crmd_join_phase_count(crm_join_nack_quiet);
/* This we can do straight away and avoid clients timing us out
* while we compute the latest CIB
*/
if (count_welcomed != 0) {
crm_debug("Waiting on join-%d requests from %d outstanding node%s "
"before finalizing join", current_join_id, count_welcomed,
pcmk__plural_s(count_welcomed));
crmd_join_phase_log(LOG_DEBUG);
/* crmd_fsa_stall(FALSE); Needed? */
return;
} else if (count_finalizable == 0) {
crm_debug("Finalization not needed for join-%d at the current time",
current_join_id);
crmd_join_phase_log(LOG_DEBUG);
check_join_state(controld_globals.fsa_state, __func__);
return;
}
controld_clear_fsa_input_flags(R_HAVE_CIB);
if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
pcmk__str_null_matches|pcmk__str_casei)) {
controld_set_fsa_input_flags(R_HAVE_CIB);
}
if (!controld_globals.transition_graph->complete) {
crm_warn("Delaying join-%d finalization while transition in progress",
current_join_id);
crmd_join_phase_log(LOG_DEBUG);
crmd_fsa_stall(FALSE);
return;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
// Send our CIB out to everyone
sync_from = pcmk__str_copy(controld_globals.our_nodename);
crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
current_join_id, count_finalizable,
pcmk__plural_s(count_finalizable));
crm_log_xml_debug(max_generation_xml, "Requested CIB version");
} else {
// Ask for the agreed best CIB
sync_from = pcmk__str_copy(max_generation_from);
crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
current_join_id, count_finalizable,
pcmk__plural_s(count_finalizable), sync_from);
crm_log_xml_notice(max_generation_xml, "Requested CIB version");
}
crmd_join_phase_log(LOG_DEBUG);
rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
sync_from, NULL, cib_none);
fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
}
void
free_max_generation(void)
{
free(max_generation_from);
max_generation_from = NULL;
pcmk__xml_free(max_generation_xml);
max_generation_xml = NULL;
}
void
finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
CRM_LOG_ASSERT(-EPERM != rc);
if (rc != pcmk_ok) {
const char *sync_from = (const char *) user_data;
do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
"Could not sync CIB from %s in join-%d: %s",
sync_from, current_join_id, pcmk_strerror(rc));
if (rc != -pcmk_err_old_data) {
record_failed_sync_node(sync_from, current_join_id);
}
/* restart the whole join process */
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
__func__);
} else if (!AM_I_DC) {
crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
} else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
"(%s)", current_join_id,
fsa_state2string(controld_globals.fsa_state));
} else {
controld_set_fsa_input_flags(R_HAVE_CIB);
/* make sure dc_uuid is re-set to us */
if (!check_join_state(controld_globals.fsa_state, __func__)) {
int count_finalizable = 0;
count_finalizable = crmd_join_phase_count(crm_join_integrated)
+ crmd_join_phase_count(crm_join_nack)
+ crmd_join_phase_count(crm_join_nack_quiet);
crm_debug("Notifying %d node%s of join-%d results",
count_finalizable, pcmk__plural_s(count_finalizable),
current_join_id);
g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
}
}
}
static void
join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
xmlNode *output, void *user_data)
{
const char *node = user_data;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL; // for register_fsa_error() macro
crm_crit("join-%d node history update (via CIB call %d) for node %s "
"failed: %s",
current_join_id, call_id, node, pcmk_strerror(rc));
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
crm_debug("join-%d node history update (via CIB call %d) for node %s "
"complete",
current_join_id, call_id, node);
check_join_state(controld_globals.fsa_state, __func__);
}
/* A_DC_JOIN_PROCESS_ACK */
void
do_dc_join_ack(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int join_id = -1;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
crm_node_t *peer = NULL;
enum controld_section_e section = controld_section_lrm;
char *xpath = NULL;
xmlNode *state = join_ack->xml;
xmlNode *execd_state = NULL;
cib_t *cib = controld_globals.cib_conn;
int rc = pcmk_ok;
// Sanity checks
if (join_from == NULL) {
crm_warn("Ignoring message received without node identification");
goto done;
}
if (op == NULL) {
crm_warn("Ignoring message received from %s without task", join_from);
goto done;
}
if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
op, join_from, CRM_OP_JOIN_CONFIRM);
goto done;
}
if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
crm_warn("Ignoring join confirmation from %s without valid join ID",
join_from);
goto done;
}
peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
if (peer->join != crm_join_finalized) {
crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
"(currently %s not %s)",
join_id, join_from, crm_join_phase_str(peer->join),
crm_join_phase_str(crm_join_finalized));
goto done;
}
if (join_id != current_join_id) {
crm_err("Rejecting join-%d confirmation from %s "
"because currently on join-%d",
join_id, join_from, current_join_id);
crm_update_peer_join(__func__, peer, crm_join_nack);
goto done;
}
crm_update_peer_join(__func__, peer, crm_join_confirmed);
/* Update CIB with node's current executor state. A new transition will be
* triggered later, when the CIB manager notifies us of the change.
*
* The delete and modify requests are part of an atomic transaction.
*/
rc = cib->cmds->init_transaction(cib);
if (rc != pcmk_ok) {
goto done;
}
// Delete relevant parts of node's current executor state from CIB
if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
section = controld_section_lrm_unlocked;
}
controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
rc = cib->cmds->remove(cib, xpath, NULL,
cib_xpath|cib_multiple|cib_transaction);
if (rc != pcmk_ok) {
goto done;
}
// Update CIB with node's latest known executor state
if (pcmk__str_eq(join_from, controld_globals.our_nodename,
pcmk__str_casei)) {
// Use the latest possible state if processing our own join ack
execd_state = controld_query_executor_state();
if (execd_state != NULL) {
crm_debug("Updating local node history for join-%d from query "
"result",
current_join_id);
state = execd_state;
} else {
crm_warn("Updating local node history from join-%d confirmation "
"because query failed",
current_join_id);
}
} else {
crm_debug("Updating node history for %s from join-%d confirmation",
join_from, current_join_id);
}
rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
cib_can_create|cib_transaction);
pcmk__xml_free(execd_state);
if (rc != pcmk_ok) {
goto done;
}
// Commit the transaction
rc = cib->cmds->end_transaction(cib, true, cib_none);
fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
if (rc > 0) {
// join_from will be freed after callback
join_from = NULL;
rc = pcmk_ok;
}
done:
if (rc != pcmk_ok) {
crm_crit("join-%d node history update for node %s failed: %s",
current_join_id, join_from, pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free(join_from);
free(xpath);
}
void
finalize_join_for(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *acknak = NULL;
xmlNode *tmp1 = NULL;
crm_node_t *join_node = value;
const char *join_to = join_node->uname;
bool integrated = false;
switch (join_node->join) {
case crm_join_integrated:
integrated = true;
break;
case crm_join_nack:
case crm_join_nack_quiet:
break;
default:
crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
"for join-%d", join_to,
crm_join_phase_str(join_node->join), current_join_id);
return;
}
/* Update the <node> element with the node's name and UUID, in case they
* weren't known before
*/
crm_trace("Updating node name and UUID in CIB for %s", join_to);
tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_node_uuid(join_node));
crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
pcmk__xml_free(tmp1);
if (join_node->join == crm_join_nack_quiet) {
crm_trace("Not sending nack message to node %s with feature set older "
"than 3.17.0", join_to);
return;
}
join_node = pcmk__get_node(0, join_to, NULL,
pcmk__node_search_cluster_member);
if (!pcmk__cluster_is_node_active(join_node)) {
/*
* NACK'ing nodes that the membership layer doesn't know about yet
* simply creates more churn
*
* Better to leave them waiting and let the join restart when
* the new membership event comes in
*
* All other NACKs (due to versions etc) should still be processed
*/
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
return;
}
// Acknowledge or nack node's join request
crm_debug("%sing join-%d request from %s",
integrated? "Acknowledg" : "Nack", current_join_id, join_to);
acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
if (integrated) {
// No change needed for a nacked node
crm_update_peer_join(__func__, join_node, crm_join_finalized);
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
/* Iterate through the remote peer cache and add information on which
* node hosts each to the ACK message. This keeps new controllers in
* sync with what has already happened.
*/
if (pcmk__cluster_num_remote_nodes() > 0) {
GHashTableIter iter;
crm_node_t *node = NULL;
xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);
g_hash_table_iter_init(&iter, crm_remote_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *remote = NULL;
if (!node->conn_host) {
continue;
}
remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
pcmk__xe_set_props(remote,
PCMK_XA_ID, node->uname,
PCMK__XA_NODE_STATE, node->state,
PCMK__XA_CONNECTION_HOST, node->conn_host,
NULL);
}
}
}
pcmk__cluster_send_message(join_node, crm_msg_crmd, acknak);
pcmk__xml_free(acknak);
return;
}
gboolean
check_join_state(enum crmd_fsa_state cur_state, const char *source)
{
static unsigned long long highest_seq = 0;
if (controld_globals.membership_id != crm_peer_seq) {
crm_debug("join-%d: Membership changed from %llu to %llu "
QB_XS " highest=%llu state=%s for=%s",
current_join_id, controld_globals.membership_id, crm_peer_seq,
highest_seq, fsa_state2string(cur_state), source);
if(highest_seq < crm_peer_seq) {
/* Don't spam the FSA with duplicates */
highest_seq = crm_peer_seq;
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
}
} else if (cur_state == S_INTEGRATION) {
if (crmd_join_phase_count(crm_join_welcomed) == 0) {
int count = crmd_join_phase_count(crm_join_integrated);
crm_debug("join-%d: Integration of %d peer%s complete "
QB_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
return TRUE;
}
} else if (cur_state == S_FINALIZE_JOIN) {
if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
crm_debug("join-%d: Delaying finalization until we have CIB "
QB_XS " state=%s for=%s",
current_join_id, fsa_state2string(cur_state), source);
return TRUE;
} else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
int count = crmd_join_phase_count(crm_join_welcomed);
crm_debug("join-%d: Still waiting on %d welcomed node%s "
QB_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
crmd_join_phase_log(LOG_DEBUG);
} else if (crmd_join_phase_count(crm_join_integrated) != 0) {
int count = crmd_join_phase_count(crm_join_integrated);
crm_debug("join-%d: Still waiting on %d integrated node%s "
QB_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
crmd_join_phase_log(LOG_DEBUG);
} else if (crmd_join_phase_count(crm_join_finalized) != 0) {
int count = crmd_join_phase_count(crm_join_finalized);
crm_debug("join-%d: Still waiting on %d finalized node%s "
QB_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
crmd_join_phase_log(LOG_DEBUG);
} else {
crm_debug("join-%d: Complete " QB_XS " state=%s for=%s",
current_join_id, fsa_state2string(cur_state), source);
register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
return TRUE;
}
}
return FALSE;
}
void
do_dc_join_final(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
- crm_update_quorum(crm_have_quorum, TRUE);
+ crm_update_quorum(pcmk__cluster_has_quorum(), TRUE);
}
int crmd_join_phase_count(enum crm_join_phase phase)
{
int count = 0;
crm_node_t *peer;
GHashTableIter iter;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
if(peer->join == phase) {
count++;
}
}
return count;
}
void crmd_join_phase_log(int level)
{
crm_node_t *peer;
GHashTableIter iter;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
crm_join_phase_str(peer->join));
}
}
diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c
index 5baca399a2..1834a129bb 100644
--- a/daemons/controld/controld_schedulerd.c
+++ b/daemons/controld/controld_schedulerd.c
@@ -1,510 +1,511 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <unistd.h> /* pid_t, sleep, ssize_t */
#include <crm/cib.h>
#include <crm/cluster.h>
#include <crm/common/xml.h>
#include <crm/crm.h>
#include <crm/common/xml_internal.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_schedulerd.h>
#include <pacemaker-controld.h>
static void handle_disconnect(void);
static pcmk_ipc_api_t *schedulerd_api = NULL;
/*!
* \internal
* \brief Close any scheduler connection and free associated memory
*/
void
controld_shutdown_schedulerd_ipc(void)
{
controld_clear_fsa_input_flags(R_PE_REQUIRED);
pcmk_disconnect_ipc(schedulerd_api);
handle_disconnect();
pcmk_free_ipc_api(schedulerd_api);
schedulerd_api = NULL;
}
/*!
* \internal
* \brief Save CIB query result to file, raising FSA error
*
* \param[in] msg Ignored
* \param[in] call_id Call ID of CIB query
* \param[in] rc Return code of CIB query
* \param[in] output Result of CIB query
* \param[in] user_data Unique identifier for filename
*
* \note This is intended to be called after a scheduler connection fails.
*/
static void
save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
const char *id = user_data;
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
CRM_CHECK(id != NULL, return);
if (rc == pcmk_ok) {
char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id);
if (pcmk__xml_write_file(output, filename, true) != pcmk_rc_ok) {
crm_err("Could not save Cluster Information Base to %s after scheduler crash",
filename);
} else {
crm_notice("Saved Cluster Information Base to %s after scheduler crash",
filename);
}
free(filename);
}
}
/*!
* \internal
* \brief Respond to scheduler connection failure
*/
static void
handle_disconnect(void)
{
// If we aren't connected to the scheduler, we can't expect a reply
controld_expect_sched_reply(NULL);
if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) {
int rc = pcmk_ok;
char *uuid_str = crm_generate_uuid();
crm_crit("Lost connection to the scheduler "
QB_XS " CIB will be saved to " PE_STATE_DIR "/pe-core-%s.bz2",
uuid_str);
/*
* The scheduler died...
*
* Save the current CIB so that we have a chance of
* figuring out what killed it.
*
* Delay raising the I_ERROR until the query below completes or
* 5s is up, whichever comes first.
*
*/
rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn,
NULL, NULL, cib_none);
fsa_register_cib_callback(rc, uuid_str, save_cib_contents);
}
controld_clear_fsa_input_flags(R_PE_CONNECTED);
controld_trigger_fsa();
return;
}
static void
handle_reply(pcmk_schedulerd_api_reply_t *reply)
{
const char *msg_ref = NULL;
if (!AM_I_DC) {
return;
}
msg_ref = reply->data.graph.reference;
if (msg_ref == NULL) {
crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC);
} else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
pcmk__str_none)) {
ha_msg_input_t fsa_input;
xmlNode *crm_data_node;
controld_stop_sched_timer();
/* do_te_invoke (which will eventually process the fsa_input we are constructing
* here) requires that fsa_input.xml be non-NULL. That will only happen if
* copy_ha_msg_input (which is called by register_fsa_input_adv) sees the
* fsa_input.msg that it is expecting. The scheduler's IPC dispatch function
* gave us the values we need, we just need to put them into XML.
*
* The name of the top level element here is irrelevant. Nothing checks it.
*/
fsa_input.msg = pcmk__xe_create(NULL, "dummy-reply");
crm_xml_add(fsa_input.msg, PCMK_XA_REFERENCE, msg_ref);
crm_xml_add(fsa_input.msg, PCMK__XA_CRM_TGRAPH_IN,
reply->data.graph.input);
crm_data_node = pcmk__xe_create(fsa_input.msg, PCMK__XE_CRM_XML);
pcmk__xml_copy(crm_data_node, reply->data.graph.tgraph);
register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
pcmk__xml_free(fsa_input.msg);
} else {
crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref);
}
}
static void
scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
crm_exit_t status, void *event_data, void *user_data)
{
pcmk_schedulerd_api_reply_t *reply = event_data;
switch (event_type) {
case pcmk_ipc_event_disconnect:
handle_disconnect();
break;
case pcmk_ipc_event_reply:
handle_reply(reply);
break;
default:
break;
}
}
static bool
new_schedulerd_ipc_connection(void)
{
int rc;
controld_set_fsa_input_flags(R_PE_REQUIRED);
if (schedulerd_api == NULL) {
rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd);
if (rc != pcmk_rc_ok) {
crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
return false;
}
}
pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);
rc = pcmk__connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main, 3);
if (rc != pcmk_rc_ok) {
crm_err("Error connecting to %s: %s",
pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc));
return false;
}
controld_set_fsa_input_flags(R_PE_CONNECTED);
return true;
}
static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
xmlNode *output, void *user_data);
/* A_PE_START, A_PE_STOP, O_PE_RESTART */
void
do_pe_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (pcmk_is_set(action, A_PE_STOP)) {
controld_clear_fsa_input_flags(R_PE_REQUIRED);
pcmk_disconnect_ipc(schedulerd_api);
handle_disconnect();
}
if (pcmk_is_set(action, A_PE_START)
&& !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
if (cur_state == S_STOPPING) {
crm_info("Ignoring request to connect to scheduler while shutting down");
} else if (!new_schedulerd_ipc_connection()) {
crm_warn("Could not connect to scheduler");
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
}
}
static int fsa_pe_query = 0;
static mainloop_timer_t *controld_sched_timer = NULL;
// @TODO Make this a configurable cluster option if there's demand for it
#define SCHED_TIMEOUT_MS (120000)
/*!
* \internal
* \brief Handle a timeout waiting for scheduler reply
*
* \param[in] user_data Ignored
*
* \return FALSE (indicating that timer should not be restarted)
*/
static gboolean
controld_sched_timeout(gpointer user_data)
{
if (AM_I_DC) {
/* If this node is the DC but can't communicate with the scheduler, just
* exit (and likely get fenced) so this node doesn't interfere with any
* further DC elections.
*
* @TODO We could try something less drastic first, like disconnecting
* and reconnecting to the scheduler, but something is likely going
* seriously wrong, so perhaps it's better to just fail as quickly as
* possible.
*/
crmd_exit(CRM_EX_FATAL);
}
return FALSE;
}
void
controld_stop_sched_timer(void)
{
if ((controld_sched_timer != NULL)
&& (controld_globals.fsa_pe_ref != NULL)) {
crm_trace("Stopping timer for scheduler reply %s",
controld_globals.fsa_pe_ref);
}
mainloop_timer_stop(controld_sched_timer);
}
/*!
* \internal
* \brief Set the scheduler request currently being waited on
*
* \param[in] ref Request to expect reply to (or NULL for none)
*
* \note This function takes ownership of \p ref.
*/
void
controld_expect_sched_reply(char *ref)
{
if (ref) {
if (controld_sched_timer == NULL) {
controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
SCHED_TIMEOUT_MS, FALSE,
controld_sched_timeout,
NULL);
}
mainloop_timer_start(controld_sched_timer);
} else {
controld_stop_sched_timer();
}
free(controld_globals.fsa_pe_ref);
controld_globals.fsa_pe_ref = ref;
}
/*!
* \internal
* \brief Free the scheduler reply timer
*/
void
controld_free_sched_timer(void)
{
if (controld_sched_timer != NULL) {
mainloop_timer_del(controld_sched_timer);
controld_sched_timer = NULL;
}
}
/* A_PE_INVOKE */
void
do_pe_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
cib_t *cib_conn = controld_globals.cib_conn;
if (AM_I_DC == FALSE) {
crm_err("Not invoking scheduler because not DC: %s",
fsa_action2string(action));
return;
}
if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_err("Cannot shut down gracefully without the scheduler");
register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
} else {
crm_info("Waiting for the scheduler to connect");
crmd_fsa_stall(FALSE);
controld_set_fsa_action_flags(A_PE_START);
controld_trigger_fsa();
}
return;
}
if (cur_state != S_POLICY_ENGINE) {
crm_notice("Not invoking scheduler because in state %s",
fsa_state2string(cur_state));
return;
}
if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
/* start the join from scratch */
register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
return;
}
fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_none);
crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
fsa_state2string(controld_globals.fsa_state));
controld_expect_sched_reply(NULL);
fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback);
}
static void
force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
{
int max = 0;
int lpc = 0;
const char *xpath_base = NULL;
char *xpath_string = NULL;
xmlXPathObjectPtr xpathObj = NULL;
xpath_base = pcmk_cib_xpath_for(PCMK_XE_CRM_CONFIG);
if (xpath_base == NULL) {
crm_err(PCMK_XE_CRM_CONFIG " CIB element not known (bug?)");
return;
}
xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']",
xpath_base, PCMK_XE_CLUSTER_PROPERTY_SET,
attr_name);
xpathObj = xpath_search(xml, xpath_string);
max = numXpathResults(xpathObj);
free(xpath_string);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
crm_trace("Forcing %s/%s = %s",
pcmk__xe_id(match), attr_name, attr_value);
crm_xml_add(match, PCMK_XA_VALUE, attr_value);
}
if(max == 0) {
xmlNode *configuration = NULL;
xmlNode *crm_config = NULL;
xmlNode *cluster_property_set = NULL;
crm_trace("Creating %s-%s for %s=%s",
PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name, attr_name,
attr_value);
configuration = pcmk__xe_first_child(xml, PCMK_XE_CONFIGURATION, NULL,
NULL);
if (configuration == NULL) {
configuration = pcmk__xe_create(xml, PCMK_XE_CONFIGURATION);
}
crm_config = pcmk__xe_first_child(configuration, PCMK_XE_CRM_CONFIG,
NULL, NULL);
if (crm_config == NULL) {
crm_config = pcmk__xe_create(configuration, PCMK_XE_CRM_CONFIG);
}
cluster_property_set =
pcmk__xe_first_child(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET, NULL,
NULL);
if (cluster_property_set == NULL) {
cluster_property_set =
pcmk__xe_create(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET);
crm_xml_add(cluster_property_set, PCMK_XA_ID,
PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS);
}
xml = pcmk__xe_create(cluster_property_set, PCMK_XE_NVPAIR);
pcmk__xe_set_id(xml, "%s-%s",
PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name);
crm_xml_add(xml, PCMK_XA_NAME, attr_name);
crm_xml_add(xml, PCMK_XA_VALUE, attr_value);
}
freeXpathObject(xpathObj);
}
static void
do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
char *ref = NULL;
pid_t watchdog = pcmk__locate_sbd();
if (rc != pcmk_ok) {
crm_err("Could not retrieve the Cluster Information Base: %s "
QB_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
return;
} else if (call_id != fsa_pe_query) {
crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
return;
} else if (!AM_I_DC
|| !pcmk_is_set(controld_globals.fsa_input_register,
R_PE_CONNECTED)) {
crm_debug("No need to invoke the scheduler anymore");
return;
} else if (controld_globals.fsa_state != S_POLICY_ENGINE) {
crm_debug("Discarding scheduler request in state: %s",
fsa_state2string(controld_globals.fsa_state));
return;
/* this callback counts as 1 */
} else if (num_cib_op_callbacks() > 1) {
crm_debug("Re-asking for the CIB: %d other peer updates still pending",
(num_cib_op_callbacks() - 1));
sleep(1);
controld_set_fsa_action_flags(A_PE_INVOKE);
controld_trigger_fsa();
return;
}
CRM_LOG_ASSERT(output != NULL);
/* Refresh the remote node cache and the known node cache when the
* scheduler is invoked */
pcmk__refresh_node_caches_from_cib(output);
crm_xml_add(output, PCMK_XA_DC_UUID, controld_globals.our_uuid);
pcmk__xe_set_bool_attr(output, PCMK_XA_HAVE_QUORUM,
pcmk_is_set(controld_globals.flags,
controld_has_quorum));
force_local_option(output, PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog));
if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum)
- && !crm_have_quorum) {
+ && !pcmk__cluster_has_quorum()) {
+
crm_xml_add_int(output, PCMK_XA_NO_QUORUM_PANIC, 1);
}
rc = pcmk_rc2legacy(pcmk_schedulerd_api_graph(schedulerd_api, output, &ref));
if (rc < 0) {
crm_err("Could not contact the scheduler: %s " QB_XS " rc=%d",
pcmk_strerror(rc), rc);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
} else {
CRM_ASSERT(ref != NULL);
controld_expect_sched_reply(ref);
crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, "
"quorate=%s",
fsa_pe_query, controld_globals.fsa_pe_ref, crm_peer_seq,
pcmk__flag_text(controld_globals.flags, controld_has_quorum));
}
}
diff --git a/include/crm/cluster.h b/include/crm/cluster.h
index 778c4baa3d..98fbe831c9 100644
--- a/include/crm/cluster.h
+++ b/include/crm/cluster.h
@@ -1,265 +1,261 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_CLUSTER__H
# define PCMK__CRM_CLUSTER__H
# include <stdint.h> // uint32_t, uint64_t
# include <glib.h> // gboolean, GHashTable
# include <libxml/tree.h> // xmlNode
# include <crm/common/xml.h>
# include <crm/common/util.h>
#ifdef __cplusplus
extern "C" {
#endif
# if SUPPORT_COROSYNC
# include <corosync/cpg.h>
# endif
-// @COMPAT Make this internal when we can break API backward compatibility
-//! \deprecated Do not use (public access will be removed in a future release)
-extern gboolean crm_have_quorum;
-
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
extern GHashTable *crm_peer_cache;
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
extern GHashTable *crm_remote_peer_cache;
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
extern unsigned long long crm_peer_seq;
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
#define CRM_NODE_LOST "lost"
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
#define CRM_NODE_MEMBER "member"
// @COMPAT Make this internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
enum crm_join_phase {
/* @COMPAT: crm_join_nack_quiet can be replaced by crm_node_t:user_data
* at a compatibility break.
*/
//! Not allowed to join, but don't send a nack message
crm_join_nack_quiet = -2,
crm_join_nack = -1,
crm_join_none = 0,
crm_join_welcomed = 1,
crm_join_integrated = 2,
crm_join_finalized = 3,
crm_join_confirmed = 4,
};
//!@}
// @COMPAT Make this internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
enum crm_node_flags {
/* Node is not a cluster node and should not be considered for cluster
* membership
*/
crm_remote_node = (1U << 0),
// Node's cache entry is dirty
crm_node_dirty = (1U << 1),
};
//!@}
// @COMPAT Make this internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
typedef struct crm_peer_node_s {
char *uname; // Node name as known to cluster
/* @COMPAT This is less than ideal since the value is not a valid XML ID
* (for Corosync, it's the string equivalent of the node's numeric node ID,
* but XML IDs can't start with a number) and the three elements should have
* different IDs.
*
* Ideally, we would use something like node-NODEID, node_state-NODEID, and
* transient_attributes-NODEID as the element IDs. Unfortunately changing it
* would be impractical due to backward compatibility; older nodes in a
* rolling upgrade will always write and expect the value in the old format.
*
* This is also named poorly, since the value is not a UUID, but at least
* that can be changed at an API compatibility break.
*/
/*! Value of the PCMK_XA_ID XML attribute to use with the node's
* PCMK_XE_NODE, PCMK_XE_NODE_STATE, and PCMK_XE_TRANSIENT_ATTRIBUTES
* XML elements in the CIB
*/
char *uuid;
char *state; // @TODO change to enum
uint64_t flags; // Bitmask of crm_node_flags
uint64_t last_seen; // Only needed by cluster nodes
uint32_t processes; // @TODO most not needed, merge into flags
/* @TODO When we can break public API compatibility, we can make the rest of
* these members separate structs and use void *cluster_data and
* void *user_data here instead, to abstract the cluster layer further.
*/
// Currently only needed by corosync stack
uint32_t id; // Node ID
time_t when_lost; // When CPG membership was last lost
// Only used by controller
enum crm_join_phase join;
char *expected;
time_t peer_lost;
char *conn_host;
time_t when_member; // Since when node has been a cluster member
time_t when_online; // Since when peer has been online in CPG
} crm_node_t;
//!@}
// Implementation of pcmk_cluster_t
// @COMPAT Make this internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
struct crm_cluster_s {
char *uuid;
char *uname;
uint32_t nodeid;
// NOTE: sbd (as of at least 1.5.2) uses this
//! \deprecated Call pcmk_cluster_set_destroy_fn() to set this
void (*destroy) (gpointer);
# if SUPPORT_COROSYNC
/* @TODO When we can break public API compatibility, make these members a
* separate struct and use void *cluster_data here instead, to abstract the
* cluster layer further.
*/
struct cpg_name group;
// NOTE: sbd (as of at least 1.5.2) uses this
/*!
* \deprecated Call pcmk_cpg_set_deliver_fn() and pcmk_cpg_set_confchg_fn()
* to set these
*/
cpg_callbacks_t cpg;
cpg_handle_t cpg_handle;
# endif
};
//!@}
//! Connection to a cluster layer
typedef struct crm_cluster_s pcmk_cluster_t;
int pcmk_cluster_connect(pcmk_cluster_t *cluster);
int pcmk_cluster_disconnect(pcmk_cluster_t *cluster);
pcmk_cluster_t *pcmk_cluster_new(void);
void pcmk_cluster_free(pcmk_cluster_t *cluster);
int pcmk_cluster_set_destroy_fn(pcmk_cluster_t *cluster, void (*fn)(gpointer));
#if SUPPORT_COROSYNC
int pcmk_cpg_set_deliver_fn(pcmk_cluster_t *cluster, cpg_deliver_fn_t fn);
int pcmk_cpg_set_confchg_fn(pcmk_cluster_t *cluster, cpg_confchg_fn_t fn);
#endif // SUPPORT_COROSYNC
/* @COMPAT Make this internal when we can break API backward compatibility. Also
* evaluate whether we can drop this entirely. Since 2.0.0, we have sent only
* messages with crm_class_cluster.
*/
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
enum crm_ais_msg_class {
crm_class_cluster = 0,
};
//!@}
// @COMPAT Make this internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
enum crm_ais_msg_types {
crm_msg_none = 0,
crm_msg_ais = 1, // Unused
crm_msg_lrmd = 2,
crm_msg_cib = 3,
crm_msg_crmd = 4,
crm_msg_attrd = 5,
crm_msg_stonithd = 6, // Unused
crm_msg_te = 7, // Unused
crm_msg_pe = 8, // Unused
crm_msg_stonith_ng = 9,
};
//!@}
// @COMPAT Make this internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
enum crm_status_type {
crm_status_uname,
crm_status_nstate,
crm_status_processes,
};
//!@}
/*!
* \enum pcmk_cluster_layer
* \brief Types of cluster layer
*/
enum pcmk_cluster_layer {
pcmk_cluster_layer_unknown = 1, //!< Unknown cluster layer
pcmk_cluster_layer_invalid = 2, //!< Invalid cluster layer
pcmk_cluster_layer_corosync = 32, //!< Corosync Cluster Engine
};
enum pcmk_cluster_layer pcmk_get_cluster_layer(void);
const char *pcmk_cluster_layer_text(enum pcmk_cluster_layer layer);
/*
* \brief Get log-friendly string equivalent of a join phase
*
* \param[in] phase Join phase
*
* \return Log-friendly string equivalent of \p phase
*/
//! \deprecated Do not use (public access will be removed in a future release)
static inline const char *
crm_join_phase_str(enum crm_join_phase phase)
{
switch (phase) {
case crm_join_nack_quiet: return "nack_quiet";
case crm_join_nack: return "nack";
case crm_join_none: return "none";
case crm_join_welcomed: return "welcomed";
case crm_join_integrated: return "integrated";
case crm_join_finalized: return "finalized";
case crm_join_confirmed: return "confirmed";
default: return "invalid";
}
}
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
#include <crm/cluster/compat.h>
#endif
#ifdef __cplusplus
}
#endif
#endif
diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
index afc6e843c8..39f012bc61 100644
--- a/include/crm/cluster/internal.h
+++ b/include/crm/cluster/internal.h
@@ -1,193 +1,195 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_CLUSTER_INTERNAL__H
# define PCMK__CRM_CLUSTER_INTERNAL__H
# include <stdbool.h>
# include <stdint.h> // uint32_t, uint64_t
# include <glib.h> // gboolean
# include <crm/cluster.h>
enum crm_proc_flag {
/* @COMPAT When crm_node_t:processes is made internal, we can merge this
* into node flags or turn it into a boolean. Until then, in theory
* something could depend on these particular numeric values.
*/
crm_proc_none = 0x00000001,
// Cluster layers
crm_proc_cpg = 0x04000000,
};
// Used with node cache search functions
enum pcmk__node_search_flags {
//! Does not affect search
pcmk__node_search_none = 0,
//! Search for cluster nodes from membership cache
pcmk__node_search_cluster_member = (1 << 0),
//! Search for remote nodes
pcmk__node_search_remote = (1 << 1),
//! Search for cluster member nodes and remote nodes
pcmk__node_search_any = pcmk__node_search_cluster_member
|pcmk__node_search_remote,
//! Search for cluster nodes from CIB (as of last cache refresh)
pcmk__node_search_cluster_cib = (1 << 2),
};
/*!
* \internal
* \brief Return the process bit corresponding to the current cluster stack
*
* \return Process flag if detectable, otherwise 0
*/
static inline uint32_t
crm_get_cluster_proc(void)
{
switch (pcmk_get_cluster_layer()) {
case pcmk_cluster_layer_corosync:
return crm_proc_cpg;
default:
break;
}
return crm_proc_none;
}
/*!
* \internal
* \brief Get log-friendly string description of a Corosync return code
*
* \param[in] error Corosync return code
*
* \return Log-friendly string description corresponding to \p error
*/
static inline const char *
pcmk__cs_err_str(int error)
{
# if SUPPORT_COROSYNC
switch (error) {
case CS_OK: return "OK";
case CS_ERR_LIBRARY: return "Library error";
case CS_ERR_VERSION: return "Version error";
case CS_ERR_INIT: return "Initialization error";
case CS_ERR_TIMEOUT: return "Timeout";
case CS_ERR_TRY_AGAIN: return "Try again";
case CS_ERR_INVALID_PARAM: return "Invalid parameter";
case CS_ERR_NO_MEMORY: return "No memory";
case CS_ERR_BAD_HANDLE: return "Bad handle";
case CS_ERR_BUSY: return "Busy";
case CS_ERR_ACCESS: return "Access error";
case CS_ERR_NOT_EXIST: return "Doesn't exist";
case CS_ERR_NAME_TOO_LONG: return "Name too long";
case CS_ERR_EXIST: return "Exists";
case CS_ERR_NO_SPACE: return "No space";
case CS_ERR_INTERRUPT: return "Interrupt";
case CS_ERR_NAME_NOT_FOUND: return "Name not found";
case CS_ERR_NO_RESOURCES: return "No resources";
case CS_ERR_NOT_SUPPORTED: return "Not supported";
case CS_ERR_BAD_OPERATION: return "Bad operation";
case CS_ERR_FAILED_OPERATION: return "Failed operation";
case CS_ERR_MESSAGE_ERROR: return "Message error";
case CS_ERR_QUEUE_FULL: return "Queue full";
case CS_ERR_QUEUE_NOT_AVAILABLE: return "Queue not available";
case CS_ERR_BAD_FLAGS: return "Bad flags";
case CS_ERR_TOO_BIG: return "Too big";
case CS_ERR_NO_SECTIONS: return "No sections";
}
# endif
return "Corosync error";
}
# if SUPPORT_COROSYNC
#if 0
/* This is the new way to do it, but we still support all Corosync 2 versions,
* and this isn't always available. A better alternative here would be to check
* for support in the configure script and enable this conditionally.
*/
#define pcmk__init_cmap(handle) cmap_initialize_map((handle), CMAP_MAP_ICMAP)
#else
#define pcmk__init_cmap(handle) cmap_initialize(handle)
#endif
char *pcmk__corosync_cluster_name(void);
bool pcmk__corosync_add_nodes(xmlNode *xml_parent);
void pcmk__cpg_confchg_cb(cpg_handle_t handle,
const struct cpg_name *group_name,
const struct cpg_address *member_list,
size_t member_list_entries,
const struct cpg_address *left_list,
size_t left_list_entries,
const struct cpg_address *joined_list,
size_t joined_list_entries);
char *pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id,
uint32_t pid, void *content, uint32_t *kind,
const char **from);
# endif
const char *pcmk__cluster_node_uuid(crm_node_t *node);
char *pcmk__cluster_node_name(uint32_t nodeid);
const char *pcmk__cluster_local_node_name(void);
const char *pcmk__node_name_from_uuid(const char *uuid);
crm_node_t *crm_update_peer_proc(const char *source, crm_node_t * peer,
uint32_t flag, const char *status);
crm_node_t *pcmk__update_peer_state(const char *source, crm_node_t *node,
const char *state, uint64_t membership);
void pcmk__update_peer_expected(const char *source, crm_node_t *node,
const char *expected);
void pcmk__reap_unseen_nodes(uint64_t ring_id);
void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
gboolean),
void (*destroy) (gpointer));
enum crm_ais_msg_types pcmk__cluster_parse_msg_type(const char *text);
bool pcmk__cluster_send_message(const crm_node_t *node,
enum crm_ais_msg_types service,
const xmlNode *data);
// Membership
+bool pcmk__cluster_has_quorum(void);
+
void pcmk__cluster_init_node_caches(void);
void pcmk__cluster_destroy_node_caches(void);
void pcmk__cluster_set_autoreap(bool enable);
void pcmk__cluster_set_status_callback(void (*dispatch)(enum crm_status_type,
crm_node_t *,
const void *));
bool pcmk__cluster_is_node_active(const crm_node_t *node);
unsigned int pcmk__cluster_num_active_nodes(void);
unsigned int pcmk__cluster_num_remote_nodes(void);
crm_node_t *pcmk__cluster_lookup_remote_node(const char *node_name);
void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name);
void pcmk__cluster_forget_remote_node(const char *node_name);
crm_node_t *pcmk__search_node_caches(unsigned int id, const char *uname,
uint32_t flags);
void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id);
void pcmk__refresh_node_caches_from_cib(xmlNode *cib);
crm_node_t *pcmk__get_node(unsigned int id, const char *uname,
const char *uuid, uint32_t flags);
#endif // PCMK__CRM_CLUSTER_INTERNAL__H
diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c
index 833ba1d990..fe2b476129 100644
--- a/lib/cluster/corosync.c
+++ b/lib/cluster/corosync.c
@@ -1,811 +1,815 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <arpa/inet.h>
#include <inttypes.h> // PRIu64, PRIx32
#include <netdb.h>
#include <netinet/in.h>
#include <stdbool.h>
#include <sys/socket.h>
#include <sys/utsname.h>
#include <bzlib.h>
#include <corosync/cfg.h>
#include <corosync/cmap.h>
#include <corosync/corodefs.h>
#include <corosync/corotypes.h>
#include <corosync/hdb.h>
#include <corosync/quorum.h>
#include <qb/qbipcc.h>
#include <qb/qbutil.h>
#include <crm/cluster/internal.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h> // PCMK__SPECIAL_PID
#include <crm/common/mainloop.h>
#include <crm/common/xml.h>
#include "crmcluster_private.h"
static quorum_handle_t pcmk_quorum_handle = 0;
static gboolean (*quorum_app_callback)(unsigned long long seq,
gboolean quorate) = NULL;
/*!
* \internal
* \brief Get the Corosync UUID associated with a Pacemaker node
*
* \param[in] node Pacemaker node
*
* \return Newly allocated string with node's Corosync UUID, or NULL if unknown
* \note It is the caller's responsibility to free the result with free().
*/
char *
pcmk__corosync_uuid(const crm_node_t *node)
{
CRM_ASSERT(pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync);
if (node != NULL) {
if (node->id > 0) {
return crm_strdup_printf("%u", node->id);
} else {
crm_info("Node %s is not yet known by Corosync", node->uname);
}
}
return NULL;
}
static bool
node_name_is_valid(const char *key, const char *name)
{
int octet;
if (name == NULL) {
crm_trace("%s is empty", key);
return false;
} else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) {
crm_trace("%s contains an IPv4 address (%s), ignoring", key, name);
return false;
} else if (strstr(name, ":") != NULL) {
crm_trace("%s contains an IPv6 address (%s), ignoring", key, name);
return false;
}
crm_trace("'%s: %s' is valid", key, name);
return true;
}
/*
* \internal
* \brief Get Corosync node name corresponding to a node ID
*
* \param[in] cmap_handle Connection to Corosync CMAP
* \param[in] nodeid Node ID to check
*
* \return Newly allocated string with name or (if no name) IP address
* associated with first address assigned to a Corosync node ID (or NULL
* if unknown)
* \note It is the caller's responsibility to free the result with free().
*/
char *
pcmk__corosync_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid)
{
// Originally based on corosync-quorumtool.c:node_name()
int lpc = 0;
cs_error_t rc = CS_OK;
int retries = 0;
char *name = NULL;
cmap_handle_t local_handle = 0;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
if (nodeid == 0) {
nodeid = pcmk__cpg_local_nodeid(0);
}
if (cmap_handle == 0 && local_handle == 0) {
retries = 0;
crm_trace("Initializing CMAP connection");
do {
rc = pcmk__init_cmap(&local_handle);
if (rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
retries);
sleep(retries);
}
} while (retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API, error %s",
cs_strerror(rc));
local_handle = 0;
}
}
if (cmap_handle == 0) {
cmap_handle = local_handle;
rc = cmap_fd_get(cmap_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
}
while (name == NULL && cmap_handle != 0) {
uint32_t id = 0;
char *key = NULL;
key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
rc = cmap_get_uint32(cmap_handle, key, &id);
crm_trace("Checking %u vs %u from %s", nodeid, id, key);
free(key);
if (rc != CS_OK) {
break;
}
if (nodeid == id) {
crm_trace("Searching for node name for %u in nodelist.node.%d %s",
nodeid, lpc, pcmk__s(name, "<null>"));
if (name == NULL) {
key = crm_strdup_printf("nodelist.node.%d.name", lpc);
cmap_get_string(cmap_handle, key, &name);
crm_trace("%s = %s", key, pcmk__s(name, "<null>"));
free(key);
}
if (name == NULL) {
key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc);
cmap_get_string(cmap_handle, key, &name);
crm_trace("%s = %s", key, pcmk__s(name, "<null>"));
if (!node_name_is_valid(key, name)) {
free(name);
name = NULL;
}
free(key);
}
break;
}
lpc++;
}
bail:
if(local_handle) {
cmap_finalize(local_handle);
}
if (name == NULL) {
crm_info("Unable to get node name for nodeid %u", nodeid);
}
return name;
}
/*!
* \internal
* \brief Disconnect from Corosync cluster
*
* \param[in,out] cluster Cluster object to disconnect
*/
void
pcmk__corosync_disconnect(pcmk_cluster_t *cluster)
{
pcmk__cpg_disconnect(cluster);
if (pcmk_quorum_handle != 0) {
quorum_finalize(pcmk_quorum_handle);
pcmk_quorum_handle = 0;
}
crm_notice("Disconnected from Corosync");
}
/*!
* \internal
* \brief Dispatch function for quorum connection file descriptor
*
* \param[in] user_data Ignored
*
* \return 0 on success, -1 on error (per mainloop_io_t interface)
*/
static int
quorum_dispatch_cb(gpointer user_data)
{
int rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL);
if (rc < 0) {
crm_err("Connection to the Quorum API failed: %d", rc);
quorum_finalize(pcmk_quorum_handle);
pcmk_quorum_handle = 0;
return -1;
}
return 0;
}
/*!
* \internal
* \brief Notification callback for Corosync quorum connection
*
* \param[in] handle Corosync quorum connection
* \param[in] quorate Whether cluster is quorate
* \param[in] ring_id Corosync ring ID
* \param[in] view_list_entries Number of entries in \p view_list
* \param[in] view_list Corosync node IDs in membership
*/
static void
quorum_notification_cb(quorum_handle_t handle, uint32_t quorate,
uint64_t ring_id, uint32_t view_list_entries,
uint32_t *view_list)
{
int i;
GHashTableIter iter;
crm_node_t *node = NULL;
static gboolean init_phase = TRUE;
- if (quorate != crm_have_quorum) {
- if (quorate) {
- crm_notice("Quorum acquired " QB_XS " membership=%" PRIu64 " members=%lu",
- ring_id, (long unsigned int)view_list_entries);
- } else {
- crm_warn("Quorum lost " QB_XS " membership=%" PRIu64 " members=%lu",
- ring_id, (long unsigned int)view_list_entries);
- }
- crm_have_quorum = quorate;
+ bool is_quorate = (quorate != 0);
+ bool was_quorate = pcmk__cluster_has_quorum();
+
+ if (is_quorate && !was_quorate) {
+ crm_notice("Quorum acquired " QB_XS " membership=%" PRIu64
+ " members=%" PRIu32,
+ ring_id, view_list_entries);
+ pcmk__cluster_set_quorum(true);
+
+ } else if (!is_quorate && was_quorate) {
+ crm_warn("Quorum lost " QB_XS " membership=%" PRIu64 " members=" PRIu32,
+ ring_id, view_list_entries);
+ pcmk__cluster_set_quorum(false);
} else {
- crm_info("Quorum %s " QB_XS " membership=%" PRIu64 " members=%lu",
- (quorate? "retained" : "still lost"), ring_id,
- (long unsigned int)view_list_entries);
+ crm_info("Quorum %s " QB_XS " membership=%" PRIu64 " members=%" PRIu32,
+ (is_quorate? "retained" : "still lost"), ring_id,
+ view_list_entries);
}
if (view_list_entries == 0 && init_phase) {
crm_info("Corosync membership is still forming, ignoring");
return;
}
init_phase = FALSE;
/* Reset last_seen for all cached nodes so we can tell which ones aren't
* in the view list */
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
node->last_seen = 0;
}
/* Update the peer cache for each node in view list */
for (i = 0; i < view_list_entries; i++) {
uint32_t id = view_list[i];
crm_debug("Member[%d] %u ", i, id);
/* Get this node's peer cache entry (adding one if not already there) */
node = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member);
if (node->uname == NULL) {
char *name = pcmk__corosync_name(0, id);
crm_info("Obtaining name for new node %u", id);
node = pcmk__get_node(id, name, NULL,
pcmk__node_search_cluster_member);
free(name);
}
/* Update the node state (including updating last_seen to ring_id) */
pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, ring_id);
}
/* Remove any peer cache entries we didn't update */
pcmk__reap_unseen_nodes(ring_id);
if (quorum_app_callback) {
- quorum_app_callback(ring_id, quorate);
+ quorum_app_callback(ring_id, is_quorate);
}
}
/*!
* \internal
* \brief Connect to Corosync quorum service
*
* \param[in] dispatch Connection dispatch callback
* \param[in] destroy Connection destroy callback
*/
void
pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
gboolean),
void (*destroy)(gpointer))
{
cs_error_t rc;
int fd = 0;
int quorate = 0;
uint32_t quorum_type = 0;
struct mainloop_fd_callbacks quorum_fd_callbacks;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
quorum_fd_callbacks.dispatch = quorum_dispatch_cb;
quorum_fd_callbacks.destroy = destroy;
crm_debug("Configuring Pacemaker to obtain quorum from Corosync");
{
#if 0
// New way but not supported by all Corosync 2 versions
quorum_model_v0_data_t quorum_model_data = {
.model = QUORUM_MODEL_V0,
.quorum_notify_fn = quorum_notification_cb,
};
rc = quorum_model_initialize(&pcmk_quorum_handle, QUORUM_MODEL_V0,
(quorum_model_data_t *) &quorum_model_data,
&quorum_type, NULL);
#else
quorum_callbacks_t quorum_callbacks = {
.quorum_notify_fn = quorum_notification_cb,
};
rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks,
&quorum_type);
#endif
}
if (rc != CS_OK) {
crm_err("Could not connect to the Quorum API: %s (%d)",
cs_strerror(rc), rc);
goto bail;
} else if (quorum_type != QUORUM_SET) {
crm_err("Corosync quorum is not configured");
goto bail;
}
rc = quorum_fd_get(pcmk_quorum_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the Quorum API connection: %s (%d)",
strerror(rc), rc);
goto bail;
}
/* Quorum provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("Quorum provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
rc = CS_ERR_ACCESS;
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of Quorum provider: %s (%d)",
strerror(-rv), -rv);
rc = CS_ERR_ACCESS;
goto bail;
}
rc = quorum_getquorate(pcmk_quorum_handle, &quorate);
if (rc != CS_OK) {
crm_err("Could not obtain the current Quorum API state: %d", rc);
goto bail;
}
if (quorate) {
crm_notice("Quorum acquired");
} else {
crm_warn("No quorum");
}
quorum_app_callback = dispatch;
- crm_have_quorum = quorate;
+ pcmk__cluster_set_quorum(quorate != 0);
rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT);
if (rc != CS_OK) {
crm_err("Could not setup Quorum API notifications: %d", rc);
goto bail;
}
mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks);
pcmk__corosync_add_nodes(NULL);
bail:
if (rc != CS_OK) {
quorum_finalize(pcmk_quorum_handle);
}
}
/*!
* \internal
* \brief Connect to Corosync cluster layer
*
* \param[in,out] cluster Initialized cluster object to connect
*
* \return Standard Pacemaker return code
*/
int
pcmk__corosync_connect(pcmk_cluster_t *cluster)
{
crm_node_t *peer = NULL;
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
int rc = pcmk_rc_ok;
pcmk__cluster_init_node_caches();
if (cluster_layer != pcmk_cluster_layer_corosync) {
crm_err("Invalid cluster layer: %s " QB_XS " cluster_layer=%d",
cluster_layer_s, cluster_layer);
return EINVAL;
}
rc = pcmk__cpg_connect(cluster);
if (rc != pcmk_rc_ok) {
// Error message was logged by pcmk__cpg_connect()
return rc;
}
crm_info("Connection to %s established", cluster_layer_s);
cluster->nodeid = pcmk__cpg_local_nodeid(0);
if (cluster->nodeid == 0) {
crm_err("Could not determine local node ID");
return ENXIO;
}
cluster->uname = pcmk__cluster_node_name(0);
if (cluster->uname == NULL) {
crm_err("Could not determine local node name");
return ENXIO;
}
// Ensure local node always exists in peer cache
peer = pcmk__get_node(cluster->nodeid, cluster->uname, NULL,
pcmk__node_search_cluster_member);
cluster->uuid = pcmk__corosync_uuid(peer);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Check whether a Corosync cluster is active
*
* \return \c true if Corosync is found active, or \c false otherwise
*/
bool
pcmk__corosync_is_active(void)
{
cmap_handle_t handle;
int rc = pcmk__init_cmap(&handle);
if (rc == CS_OK) {
cmap_finalize(handle);
return true;
}
crm_info("Failed to initialize the cmap API: %s (%d)",
pcmk__cs_err_str(rc), rc);
return false;
}
/*!
* \internal
* \brief Check whether a Corosync cluster peer is active
*
* \param[in] node Node to check
*
* \return \c true if \p node is an active Corosync peer, or \c false otherwise
*/
bool
pcmk__corosync_is_peer_active(const crm_node_t *node)
{
if (node == NULL) {
crm_trace("Corosync peer inactive: NULL");
return false;
}
if (!pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_none)) {
crm_trace("Corosync peer %s inactive: state=%s",
node->uname, node->state);
return false;
}
if (!pcmk_is_set(node->processes, crm_proc_cpg)) {
crm_trace("Corosync peer %s inactive " QB_XS " processes=%.16" PRIx32,
node->uname, node->processes);
return false;
}
return true;
}
/*!
* \internal
* \brief Load Corosync node list (via CMAP) into peer cache and optionally XML
*
* \param[in,out] xml_parent If not NULL, add <node> entry here for each node
*
* \return true if any nodes were found, false otherwise
*/
bool
pcmk__corosync_add_nodes(xmlNode *xml_parent)
{
int lpc = 0;
cs_error_t rc = CS_OK;
int retries = 0;
bool any = false;
cmap_handle_t cmap_handle;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
do {
rc = pcmk__init_cmap(&cmap_handle);
if (rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
retries);
sleep(retries);
}
} while (retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
return false;
}
rc = cmap_fd_get(cmap_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
pcmk__cluster_init_node_caches();
crm_trace("Initializing Corosync node list");
for (lpc = 0; TRUE; lpc++) {
uint32_t nodeid = 0;
char *name = NULL;
char *key = NULL;
key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
rc = cmap_get_uint32(cmap_handle, key, &nodeid);
free(key);
if (rc != CS_OK) {
break;
}
name = pcmk__corosync_name(cmap_handle, nodeid);
if (name != NULL) {
GHashTableIter iter;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node && node->uname && strcasecmp(node->uname, name) == 0) {
if (node->id && node->id != nodeid) {
crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id,
nodeid, name);
crm_exit(CRM_EX_FATAL);
}
}
}
}
if (nodeid > 0 || name != NULL) {
crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name);
pcmk__get_node(nodeid, name, NULL, pcmk__node_search_cluster_member);
}
if (nodeid > 0 && name != NULL) {
any = true;
if (xml_parent) {
xmlNode *node = pcmk__xe_create(xml_parent, PCMK_XE_NODE);
pcmk__xe_set_id(node, "%u", nodeid);
crm_xml_add(node, PCMK_XA_UNAME, name);
}
}
free(name);
}
bail:
cmap_finalize(cmap_handle);
return any;
}
/*!
* \internal
* \brief Get cluster name from Corosync configuration (via CMAP)
*
* \return Newly allocated string with cluster name if configured, or NULL
*/
char *
pcmk__corosync_cluster_name(void)
{
cmap_handle_t handle;
char *cluster_name = NULL;
cs_error_t rc = CS_OK;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
rc = pcmk__init_cmap(&handle);
if (rc != CS_OK) {
crm_info("Failed to initialize the cmap API: %s (%d)",
cs_strerror(rc), rc);
return NULL;
}
rc = cmap_fd_get(handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name);
if (rc != CS_OK) {
crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc);
} else {
crm_debug("cmap totem.cluster_name = '%s'", cluster_name);
}
bail:
cmap_finalize(handle);
return cluster_name;
}
/*!
* \internal
* \brief Check (via CMAP) whether Corosync configuration has a node list
*
* \return true if Corosync has node list, otherwise false
*/
bool
pcmk__corosync_has_nodelist(void)
{
cs_error_t cs_rc = CS_OK;
int retries = 0;
cmap_handle_t cmap_handle;
cmap_iter_handle_t iter_handle;
char key_name[CMAP_KEYNAME_MAXLEN + 1];
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rc = pcmk_ok;
static bool got_result = false;
static bool result = false;
if (got_result) {
return result;
}
// Connect to CMAP
do {
cs_rc = pcmk__init_cmap(&cmap_handle);
if (cs_rc != CS_OK) {
retries++;
crm_debug("CMAP connection failed: %s (rc=%d, retrying in %ds)",
cs_strerror(cs_rc), cs_rc, retries);
sleep(retries);
}
} while ((retries < 5) && (cs_rc != CS_OK));
if (cs_rc != CS_OK) {
crm_warn("Assuming Corosync does not have node list: "
"CMAP connection failed (%s) " QB_XS " rc=%d",
cs_strerror(cs_rc), cs_rc);
return false;
}
// Get CMAP connection file descriptor
cs_rc = cmap_fd_get(cmap_handle, &fd);
if (cs_rc != CS_OK) {
crm_warn("Assuming Corosync does not have node list: "
"CMAP unusable (%s) " QB_XS " rc=%d",
cs_strerror(cs_rc), cs_rc);
goto bail;
}
// Check whether CMAP connection is authentic (i.e. provided by root)
rc = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0,
&found_pid, &found_uid, &found_gid);
if (rc == 0) {
crm_warn("Assuming Corosync does not have node list: "
"CMAP provider is inauthentic "
QB_XS " pid=%lld uid=%lld gid=%lld",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rc < 0) {
crm_warn("Assuming Corosync does not have node list: "
"Could not verify CMAP authenticity (%s) " QB_XS " rc=%d",
pcmk_strerror(rc), rc);
goto bail;
}
// Check whether nodelist section is presetn
cs_rc = cmap_iter_init(cmap_handle, "nodelist", &iter_handle);
if (cs_rc != CS_OK) {
crm_warn("Assuming Corosync does not have node list: "
"CMAP not readable (%s) " QB_XS " rc=%d",
cs_strerror(cs_rc), cs_rc);
goto bail;
}
cs_rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL);
if (cs_rc == CS_OK) {
result = true;
}
cmap_iter_finalize(cmap_handle, iter_handle);
got_result = true;
crm_debug("Corosync %s node list", (result? "has" : "does not have"));
bail:
cmap_finalize(cmap_handle);
return result;
}
diff --git a/lib/cluster/crmcluster_private.h b/lib/cluster/crmcluster_private.h
index 4d06f99cdc..a9675bbb04 100644
--- a/lib/cluster/crmcluster_private.h
+++ b/lib/cluster/crmcluster_private.h
@@ -1,67 +1,70 @@
/*
* Copyright 2020-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRMCLUSTER_PRIVATE__H
# define PCMK__CRMCLUSTER_PRIVATE__H
/* This header is for the sole use of libcrmcluster, so that functions can be
* declared with G_GNUC_INTERNAL for efficiency.
*/
#include <stdint.h> // uint32_t, uint64_t
#include <glib.h> // G_GNUC_INTERNAL, gboolean
#include <libxml/tree.h> // xmlNode
#if SUPPORT_COROSYNC
#include <corosync/cpg.h> // cpg_handle_t
#endif // SUPPORT_COROSYNC
#include <crm/cluster.h> // crm_node_t
+G_GNUC_INTERNAL
+void pcmk__cluster_set_quorum(bool quorate);
+
#if SUPPORT_COROSYNC
G_GNUC_INTERNAL
bool pcmk__corosync_is_active(void);
G_GNUC_INTERNAL
bool pcmk__corosync_has_nodelist(void);
G_GNUC_INTERNAL
char *pcmk__corosync_uuid(const crm_node_t *peer);
G_GNUC_INTERNAL
char *pcmk__corosync_name(uint64_t /*cmap_handle_t */ cmap_handle,
uint32_t nodeid);
G_GNUC_INTERNAL
int pcmk__corosync_connect(pcmk_cluster_t *cluster);
G_GNUC_INTERNAL
void pcmk__corosync_disconnect(pcmk_cluster_t *cluster);
G_GNUC_INTERNAL
bool pcmk__corosync_is_peer_active(const crm_node_t *node);
G_GNUC_INTERNAL
int pcmk__cpg_connect(pcmk_cluster_t *cluster);
G_GNUC_INTERNAL
void pcmk__cpg_disconnect(pcmk_cluster_t *cluster);
G_GNUC_INTERNAL
uint32_t pcmk__cpg_local_nodeid(cpg_handle_t handle);
G_GNUC_INTERNAL
bool pcmk__cpg_send_xml(const xmlNode *msg, const crm_node_t *node,
enum crm_ais_msg_types dest);
#endif // SUPPORT_COROSYNC
#endif // PCMK__CRMCLUSTER_PRIVATE__H
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
index f792097f6b..2c354f5986 100644
--- a/lib/cluster/membership.c
+++ b/lib/cluster/membership.c
@@ -1,1454 +1,1481 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <inttypes.h> // PRIu32
+#include <stdbool.h> // bool
+#include <stdio.h>
+#include <string.h>
#include <sys/param.h>
#include <sys/types.h>
-#include <stdio.h>
#include <unistd.h>
-#include <string.h>
+
#include <glib.h>
+
#include <crm/common/ipc.h>
#include <crm/common/xml_internal.h>
#include <crm/cluster/internal.h>
#include <crm/common/xml.h>
#include <crm/stonith-ng.h>
#include "crmcluster_private.h"
/* The peer cache remembers cluster nodes that have been seen.
* This is managed mostly automatically by libcluster, based on
* cluster membership events.
*
* Because cluster nodes can have conflicting names or UUIDs,
* the hash table key is a uniquely generated ID.
*
* @COMPAT When this is internal, rename to cluster_node_member_cache and make
* static.
*/
GHashTable *crm_peer_cache = NULL;
/*
* The remote peer cache tracks pacemaker_remote nodes. While the
* value has the same type as the peer cache's, it is tracked separately for
* three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
* so the name (which is also the UUID) is used as the hash table key; there
* is no equivalent of membership events, so management is not automatic; and
* most users of the peer cache need to exclude pacemaker_remote nodes.
*
* That said, using a single cache would be more logical and less error-prone,
* so it would be a good idea to merge them one day.
*
* libcluster provides two avenues for populating the cache:
* pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node()
* directly manage it, while refresh_remote_nodes() populates it via the CIB.
*/
GHashTable *crm_remote_peer_cache = NULL;
/*
* The CIB cluster node cache tracks cluster nodes that have been seen in
* the CIB. It is useful mainly when a caller needs to know about a node that
* may no longer be in the membership, but doesn't want to add the node to the
* main peer cache tables.
*/
static GHashTable *cluster_node_cib_cache = NULL;
unsigned long long crm_peer_seq = 0;
-gboolean crm_have_quorum = FALSE;
static bool autoreap = true;
+static bool has_quorum = false;
// Flag setting and clearing for crm_node_t:flags
#define set_peer_flags(peer, flags_to_set) do { \
(peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Peer", (peer)->uname, \
(peer)->flags, (flags_to_set), \
#flags_to_set); \
} while (0)
#define clear_peer_flags(peer, flags_to_clear) do { \
(peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Peer", (peer)->uname, \
(peer)->flags, (flags_to_clear), \
#flags_to_clear); \
} while (0)
static void update_peer_uname(crm_node_t *node, const char *uname);
static crm_node_t *find_cib_cluster_node(const char *id, const char *uname);
+/*!
+ * \internal
+ * \brief Check whether the cluster currently has quorum
+ *
+ * \return \c true if the cluster has quorum, or \c false otherwise
+ */
+bool
+pcmk__cluster_has_quorum(void)
+{
+ return has_quorum;
+}
+
+/*!
+ * \internal
+ * \brief Set whether the cluster currently has quorum
+ *
+ * \param[in] quorate \c true if the cluster has quorum, or \c false otherwise
+ */
+void
+pcmk__cluster_set_quorum(bool quorate)
+{
+ has_quorum = quorate;
+}
+
/*!
* \internal
* \brief Get the number of Pacemaker Remote nodes that have been seen
*
* \return Number of cached Pacemaker Remote nodes
*/
unsigned int
pcmk__cluster_num_remote_nodes(void)
{
if (crm_remote_peer_cache == NULL) {
return 0U;
}
return g_hash_table_size(crm_remote_peer_cache);
}
/*!
* \internal
* \brief Get a remote node cache entry, creating it if necessary
*
* \param[in] node_name Name of remote node
*
* \return Cache entry for node on success, or \c NULL (and set \c errno)
* otherwise
*
* \note When creating a new entry, this will leave the node state undetermined.
* The caller should also call \c pcmk__update_peer_state() if the state
* is known.
* \note Because this can add and remove cache entries, callers should not
* assume any previously obtained cache entry pointers remain valid.
*/
crm_node_t *
pcmk__cluster_lookup_remote_node(const char *node_name)
{
crm_node_t *node;
char *node_name_copy = NULL;
if (node_name == NULL) {
errno = EINVAL;
return NULL;
}
/* It's theoretically possible that the node was added to the cluster peer
* cache before it was known to be a Pacemaker Remote node. Remove that
* entry unless it has a node ID, which means the name actually is
* associated with a cluster node. (@TODO return an error in that case?)
*/
node = pcmk__search_node_caches(0, node_name,
pcmk__node_search_cluster_member);
if ((node != NULL) && (node->uuid == NULL)) {
/* node_name could be a pointer into the cache entry being removed, so
* reassign it to a copy before the original gets freed
*/
node_name_copy = strdup(node_name);
if (node_name_copy == NULL) {
errno = ENOMEM;
return NULL;
}
node_name = node_name_copy;
pcmk__cluster_forget_cluster_node(0, node_name);
}
/* Return existing cache entry if one exists */
node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
if (node) {
free(node_name_copy);
return node;
}
/* Allocate a new entry */
node = calloc(1, sizeof(crm_node_t));
if (node == NULL) {
free(node_name_copy);
return NULL;
}
/* Populate the essential information */
set_peer_flags(node, crm_remote_node);
node->uuid = strdup(node_name);
if (node->uuid == NULL) {
free(node);
errno = ENOMEM;
free(node_name_copy);
return NULL;
}
/* Add the new entry to the cache */
g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
crm_trace("added %s to remote cache", node_name);
/* Update the entry's uname, ensuring peer status callbacks are called */
update_peer_uname(node, node_name);
free(node_name_copy);
return node;
}
/*!
* \internal
* \brief Remove a node from the Pacemaker Remote node cache
*
* \param[in] node_name Name of node to remove from cache
*
* \note The caller must be careful not to use \p node_name after calling this
* function if it might be a pointer into the cache entry being removed.
*/
void
pcmk__cluster_forget_remote_node(const char *node_name)
{
/* Do a lookup first, because node_name could be a pointer within the entry
* being removed -- we can't log it *after* removing it.
*/
if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) {
crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
g_hash_table_remove(crm_remote_peer_cache, node_name);
}
}
/*!
* \internal
* \brief Return node status based on a CIB status entry
*
* \param[in] node_state XML of node state
*
* \return \c CRM_NODE_MEMBER if \c PCMK__XA_IN_CCM is true in
* \c PCMK__XE_NODE_STATE, or \c CRM_NODE_LOST otherwise
*/
static const char *
remote_state_from_cib(const xmlNode *node_state)
{
bool in_ccm = false;
if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM,
&in_ccm) == pcmk_rc_ok) && in_ccm) {
return CRM_NODE_MEMBER;
}
return CRM_NODE_LOST;
}
/* user data for looping through remote node xpath searches */
struct refresh_data {
const char *field; /* XML attribute to check for node name */
gboolean has_state; /* whether to update node state based on XML */
};
/*!
* \internal
* \brief Process one pacemaker_remote node xpath search result
*
* \param[in] result XML search result
* \param[in] user_data what to look for in the XML
*/
static void
remote_cache_refresh_helper(xmlNode *result, void *user_data)
{
const struct refresh_data *data = user_data;
const char *remote = crm_element_value(result, data->field);
const char *state = NULL;
crm_node_t *node;
CRM_CHECK(remote != NULL, return);
/* Determine node's state, if the result has it */
if (data->has_state) {
state = remote_state_from_cib(result);
}
/* Check whether cache already has entry for node */
node = g_hash_table_lookup(crm_remote_peer_cache, remote);
if (node == NULL) {
/* Node is not in cache, so add a new entry for it */
node = pcmk__cluster_lookup_remote_node(remote);
CRM_ASSERT(node);
if (state) {
pcmk__update_peer_state(__func__, node, state, 0);
}
} else if (pcmk_is_set(node->flags, crm_node_dirty)) {
/* Node is in cache and hasn't been updated already, so mark it clean */
clear_peer_flags(node, crm_node_dirty);
if (state) {
pcmk__update_peer_state(__func__, node, state, 0);
}
}
}
static void
mark_dirty(gpointer key, gpointer value, gpointer user_data)
{
set_peer_flags((crm_node_t *) value, crm_node_dirty);
}
static gboolean
is_dirty(gpointer key, gpointer value, gpointer user_data)
{
return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
}
/*!
* \internal
* \brief Repopulate the remote node cache based on CIB XML
*
* \param[in] cib CIB XML to parse
*/
static void
refresh_remote_nodes(xmlNode *cib)
{
struct refresh_data data;
pcmk__cluster_init_node_caches();
/* First, we mark all existing cache entries as dirty,
* so that later we can remove any that weren't in the CIB.
* We don't empty the cache, because we need to detect changes in state.
*/
g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
/* Look for guest nodes and remote nodes in the status section */
data.field = PCMK_XA_ID;
data.has_state = TRUE;
crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS,
remote_cache_refresh_helper, &data);
/* Look for guest nodes and remote nodes in the configuration section,
* because they may have just been added and not have a status entry yet.
* In that case, the cached node state will be left NULL, so that the
* peer status callback isn't called until we're sure the node started
* successfully.
*/
data.field = PCMK_XA_VALUE;
data.has_state = FALSE;
crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG,
remote_cache_refresh_helper, &data);
data.field = PCMK_XA_ID;
data.has_state = FALSE;
crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG,
remote_cache_refresh_helper, &data);
/* Remove all old cache entries that weren't seen in the CIB */
g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
}
/*!
* \internal
* \brief Check whether a node is an active cluster node
*
* Remote nodes are never considered active. This guarantees that they can never
* become DC.
*
* \param[in] node Node to check
*
* \return \c true if the node is an active cluster node, or \c false otherwise
*/
bool
pcmk__cluster_is_node_active(const crm_node_t *node)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
if ((node == NULL) || pcmk_is_set(node->flags, crm_remote_node)) {
return false;
}
switch (cluster_layer) {
case pcmk_cluster_layer_corosync:
#if SUPPORT_COROSYNC
return pcmk__corosync_is_peer_active(node);
#else
break;
#endif // SUPPORT_COROSYNC
default:
break;
}
crm_err("Unhandled cluster layer: %s",
pcmk_cluster_layer_text(cluster_layer));
return false;
}
/*!
* \internal
* \brief Check if a node's entry should be removed from the cluster node cache
*
* A node should be removed from the cache if it's inactive and matches another
* \c crm_node_t (the search object). The node is considered a mismatch if any
* of the following are true:
* * The search object is \c NULL.
* * The search object has an ID set and the cached node's ID does not match it.
* * The search object does not have an ID set, and the cached node's name does
* not match the search node's name. (If both names are \c NULL, it's a
* match.)
*
* Otherwise, the node is considered a match.
*
* Note that if the search object has both an ID and a name set, the name is
* ignored for matching purposes.
*
* \param[in] key Ignored
* \param[in] value \c crm_node_t object from cluster node cache
* \param[in] user_data \c crm_node_t object to match against (search object)
*
* \return \c TRUE if the node entry should be removed from \c crm_peer_cache,
* or \c FALSE otherwise
*/
static gboolean
should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
{
crm_node_t *node = value;
crm_node_t *search = user_data;
if (search == NULL) {
return FALSE;
}
if ((search->id != 0) && (node->id != search->id)) {
return FALSE;
}
if ((search->id == 0)
&& !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
// @TODO Consider name even if ID is set?
return FALSE;
}
if (pcmk__cluster_is_node_active(value)) {
return FALSE;
}
crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership "
"cache",
pcmk__s(node->uname, "(unknown)"), node->id);
return TRUE;
}
/*!
* \internal
* \brief Remove one or more inactive nodes from the cluster node cache
*
* All inactive nodes matching \p id and \p node_name as described in
* \c should_forget_cluster_node documentation are removed from the cache.
*
* If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed
* from the cache regardless of ID and name. This differs from clearing the
* cache, in that entries for active nodes are preserved.
*
* \param[in] id ID of node to remove from cache (0 to ignore)
* \param[in] node_name Name of node to remove from cache (ignored if \p id is
* nonzero)
*
* \note \p node_name is not modified directly, but it will be freed if it's a
* pointer into a cache entry that is removed.
*/
void
pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
{
crm_node_t search = { 0, };
char *criterion = NULL; // For logging
guint matches = 0;
if (crm_peer_cache == NULL) {
crm_trace("Membership cache not initialized, ignoring removal request");
return;
}
search.id = id;
search.uname = pcmk__str_copy(node_name); // May log after original freed
if (id > 0) {
criterion = crm_strdup_printf(PCMK_XA_ID "=%" PRIu32, id);
} else if (node_name != NULL) {
criterion = crm_strdup_printf(PCMK_XA_UNAME "=%s", node_name);
}
matches = g_hash_table_foreach_remove(crm_peer_cache,
should_forget_cluster_node, &search);
if (matches > 0) {
if (criterion != NULL) {
crm_notice("Removed %u inactive node%s with %s from the membership "
"cache",
matches, pcmk__plural_s(matches), criterion);
} else {
crm_notice("Removed all (%u) inactive cluster nodes from the "
"membership cache",
matches);
}
} else {
crm_info("No inactive cluster nodes%s%s to remove from the membership "
"cache",
((criterion != NULL)? " with " : ""), pcmk__s(criterion, ""));
}
free(search.uname);
free(criterion);
}
static void
count_peer(gpointer key, gpointer value, gpointer user_data)
{
unsigned int *count = user_data;
crm_node_t *node = value;
if (pcmk__cluster_is_node_active(node)) {
*count = *count + 1;
}
}
/*!
* \internal
* \brief Get the number of active cluster nodes that have been seen
*
* Remote nodes are never considered active. This guarantees that they can never
* become DC.
*
* \return Number of active nodes in the cluster node cache
*/
unsigned int
pcmk__cluster_num_active_nodes(void)
{
unsigned int count = 0;
if (crm_peer_cache != NULL) {
g_hash_table_foreach(crm_peer_cache, count_peer, &count);
}
return count;
}
static void
destroy_crm_node(gpointer data)
{
crm_node_t *node = data;
crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
free(node->uname);
free(node->state);
free(node->uuid);
free(node->expected);
free(node->conn_host);
free(node);
}
/*!
* \internal
* \brief Initialize node caches
*/
void
pcmk__cluster_init_node_caches(void)
{
if (crm_peer_cache == NULL) {
crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node);
}
if (crm_remote_peer_cache == NULL) {
crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
}
if (cluster_node_cib_cache == NULL) {
cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
}
}
/*!
* \internal
* \brief Initialize node caches
*/
void
pcmk__cluster_destroy_node_caches(void)
{
if (crm_peer_cache != NULL) {
crm_trace("Destroying peer cache with %d members",
g_hash_table_size(crm_peer_cache));
g_hash_table_destroy(crm_peer_cache);
crm_peer_cache = NULL;
}
if (crm_remote_peer_cache != NULL) {
crm_trace("Destroying remote peer cache with %d members",
pcmk__cluster_num_remote_nodes());
g_hash_table_destroy(crm_remote_peer_cache);
crm_remote_peer_cache = NULL;
}
if (cluster_node_cib_cache != NULL) {
crm_trace("Destroying configured cluster node cache with %d members",
g_hash_table_size(cluster_node_cib_cache));
g_hash_table_destroy(cluster_node_cib_cache);
cluster_node_cib_cache = NULL;
}
}
static void (*peer_status_callback)(enum crm_status_type, crm_node_t *,
const void *) = NULL;
/*!
* \internal
* \brief Set a client function that will be called after peer status changes
*
* \param[in] dispatch Pointer to function to use as callback
*
* \note Client callbacks should do only client-specific handling. Callbacks
* must not add or remove entries in the peer caches.
*/
void
pcmk__cluster_set_status_callback(void (*dispatch)(enum crm_status_type,
crm_node_t *, const void *))
{
// @TODO Improve documentation of peer_status_callback
peer_status_callback = dispatch;
}
/*!
* \internal
* \brief Tell the library whether to automatically reap lost nodes
*
* If \c true (the default), calling \c crm_update_peer_proc() will also update
* the peer state to \c CRM_NODE_MEMBER or \c CRM_NODE_LOST, and updating the
* peer state will reap peers whose state changes to anything other than
* \c CRM_NODE_MEMBER.
*
* Callers should leave this enabled unless they plan to manage the cache
* separately on their own.
*
* \param[in] enable \c true to enable automatic reaping, \c false to disable
*/
void
pcmk__cluster_set_autoreap(bool enable)
{
autoreap = enable;
}
static void
dump_peer_hash(int level, const char *caller)
{
GHashTableIter iter;
const char *id = NULL;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
}
}
static gboolean
hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
{
return value == user_data;
}
/*!
* \internal
* \brief Search cluster member node cache
*
* \param[in] id If not 0, cluster node ID to search for
* \param[in] uname If not NULL, node name to search for
* \param[in] uuid If not NULL while id is 0, node UUID instead of cluster
* node ID to search for
*
* \return Cluster node cache entry if found, otherwise NULL
*/
static crm_node_t *
search_cluster_member_cache(unsigned int id, const char *uname,
const char *uuid)
{
GHashTableIter iter;
crm_node_t *node = NULL;
crm_node_t *by_id = NULL;
crm_node_t *by_name = NULL;
CRM_ASSERT(id > 0 || uname != NULL);
pcmk__cluster_init_node_caches();
if (uname != NULL) {
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node->uname && strcasecmp(node->uname, uname) == 0) {
crm_trace("Name match: %s = %p", node->uname, node);
by_name = node;
break;
}
}
}
if (id > 0) {
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node->id == id) {
crm_trace("ID match: %u = %p", node->id, node);
by_id = node;
break;
}
}
} else if (uuid != NULL) {
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
crm_trace("UUID match: %s = %p", node->uuid, node);
by_id = node;
break;
}
}
}
node = by_id; /* Good default */
if(by_id == by_name) {
/* Nothing to do if they match (both NULL counts) */
crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
} else if(by_id == NULL && by_name) {
crm_trace("Only one: %p for %u/%s", by_name, id, uname);
if(id && by_name->id) {
dump_peer_hash(LOG_WARNING, __func__);
crm_crit("Node %u and %u share the same name '%s'",
id, by_name->id, uname);
node = NULL; /* Create a new one */
} else {
node = by_name;
}
} else if(by_name == NULL && by_id) {
crm_trace("Only one: %p for %u/%s", by_id, id, uname);
if(uname && by_id->uname) {
dump_peer_hash(LOG_WARNING, __func__);
crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
uname, by_id->uname, id, uname);
}
} else if(uname && by_id->uname) {
if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
} else {
crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
dump_peer_hash(LOG_INFO, __func__);
crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
TRUE);
}
} else if(id && by_name->id) {
crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
} else {
/* Simple merge */
/* Only corosync-based clusters use node IDs. The functions that call
* pcmk__update_peer_state() and crm_update_peer_proc() only know
* nodeid, so 'by_id' is authoritative when merging.
*/
dump_peer_hash(LOG_DEBUG, __func__);
crm_info("Merging %p into %p", by_name, by_id);
g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
}
return node;
}
/*!
* \internal
* \brief Search caches for a node (cluster or Pacemaker Remote)
*
* \param[in] id If not 0, cluster node ID to search for
* \param[in] uname If not NULL, node name to search for
* \param[in] flags Group of enum pcmk__node_search_flags
*
* \return Node cache entry if found, otherwise NULL
*/
crm_node_t *
pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
{
crm_node_t *node = NULL;
CRM_ASSERT(id > 0 || uname != NULL);
pcmk__cluster_init_node_caches();
if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) {
node = g_hash_table_lookup(crm_remote_peer_cache, uname);
}
if ((node == NULL)
&& pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
node = search_cluster_member_cache(id, uname, NULL);
}
if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
node = find_cib_cluster_node(id_str, uname);
free(id_str);
}
return node;
}
/*!
* \internal
* \brief Purge a node from cache (both cluster and Pacemaker Remote)
*
* \param[in] node_name If not NULL, purge only nodes with this name
* \param[in] node_id If not 0, purge cluster nodes only if they have this ID
*
* \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged.
* If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote
* nodes that match \p node_name will be purged, and cluster nodes that
* match both \p node_name and \p node_id will be purged.
* \note The caller must be careful not to use \p node_name after calling this
* function if it might be a pointer into a cache entry being removed.
*/
void
pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
{
char *node_name_copy = NULL;
if ((node_name == NULL) && (node_id == 0U)) {
return;
}
// Purge from Pacemaker Remote node cache
if ((node_name != NULL)
&& (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) {
/* node_name could be a pointer into the cache entry being purged,
* so reassign it to a copy before the original gets freed
*/
node_name_copy = pcmk__str_copy(node_name);
node_name = node_name_copy;
crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
g_hash_table_remove(crm_remote_peer_cache, node_name);
}
pcmk__cluster_forget_cluster_node(node_id, node_name);
free(node_name_copy);
}
#if SUPPORT_COROSYNC
static guint
remove_conflicting_peer(crm_node_t *node)
{
int matches = 0;
GHashTableIter iter;
crm_node_t *existing_node = NULL;
if (node->id == 0 || node->uname == NULL) {
return 0;
}
if (!pcmk__corosync_has_nodelist()) {
return 0;
}
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
if (existing_node->id > 0
&& existing_node->id != node->id
&& existing_node->uname != NULL
&& strcasecmp(existing_node->uname, node->uname) == 0) {
if (pcmk__cluster_is_node_active(existing_node)) {
continue;
}
crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
existing_node->id, existing_node->uname, node->id);
g_hash_table_iter_remove(&iter);
matches++;
}
}
return matches;
}
#endif
/*!
* \internal
* \brief Get a cluster node cache entry, possibly creating one if not found
*
* If \c pcmk__node_search_cluster_member is set in \p flags, the return value
* is guaranteed not to be \c NULL. A new cache entry is created if one does not
* already exist.
*
* \param[in] id If not 0, cluster node ID to search for
* \param[in] uname If not NULL, node name to search for
* \param[in] uuid If not NULL while id is 0, node UUID instead of cluster
* node ID to search for
* \param[in] flags Group of enum pcmk__node_search_flags
*
* \return (Possibly newly created) cluster node cache entry
*/
/* coverity[-alloc] Memory is referenced in one or both hashtables */
crm_node_t *
pcmk__get_node(unsigned int id, const char *uname, const char *uuid,
uint32_t flags)
{
crm_node_t *node = NULL;
char *uname_lookup = NULL;
CRM_ASSERT(id > 0 || uname != NULL);
pcmk__cluster_init_node_caches();
// Check the Pacemaker Remote node cache first
if (pcmk_is_set(flags, pcmk__node_search_remote)) {
node = g_hash_table_lookup(crm_remote_peer_cache, uname);
if (node != NULL) {
return node;
}
}
if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
return NULL;
}
node = search_cluster_member_cache(id, uname, uuid);
/* if uname wasn't provided, and find_peer did not turn up a uname based on id.
* we need to do a lookup of the node name using the id in the cluster membership. */
if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
uname_lookup = pcmk__cluster_node_name(id);
}
if (uname_lookup) {
uname = uname_lookup;
crm_trace("Inferred a name of '%s' for node %u", uname, id);
/* try to turn up the node one more time now that we know the uname. */
if (node == NULL) {
node = search_cluster_member_cache(id, uname, uuid);
}
}
if (node == NULL) {
char *uniqueid = crm_generate_uuid();
node = pcmk__assert_alloc(1, sizeof(crm_node_t));
crm_info("Created entry %s/%p for node %s/%u (%d total)",
uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
g_hash_table_replace(crm_peer_cache, uniqueid, node);
}
if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
crm_info("Node %u is now known as %s", id, uname);
}
if(id > 0 && node->id == 0) {
node->id = id;
}
if (uname && (node->uname == NULL)) {
update_peer_uname(node, uname);
}
if(node->uuid == NULL) {
if (uuid == NULL) {
uuid = pcmk__cluster_node_uuid(node);
}
if (uuid) {
crm_info("Node %u has uuid %s", id, uuid);
} else {
crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
}
}
free(uname_lookup);
return node;
}
/*!
* \internal
* \brief Update a node's uname
*
* \param[in,out] node Node object to update
* \param[in] uname New name to set
*
* \note This function should not be called within a peer cache iteration,
* because in some cases it can remove conflicting cache entries,
* which would invalidate the iterator.
*/
static void
update_peer_uname(crm_node_t *node, const char *uname)
{
CRM_CHECK(uname != NULL,
crm_err("Bug: can't update node name without name"); return);
CRM_CHECK(node != NULL,
crm_err("Bug: can't update node name to %s without node", uname);
return);
if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
crm_debug("Node uname '%s' did not change", uname);
return;
}
for (const char *c = uname; *c; ++c) {
if ((*c >= 'A') && (*c <= 'Z')) {
crm_warn("Node names with capitals are discouraged, consider changing '%s'",
uname);
break;
}
}
pcmk__str_update(&node->uname, uname);
if (peer_status_callback != NULL) {
peer_status_callback(crm_status_uname, node, NULL);
}
#if SUPPORT_COROSYNC
if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)
&& !pcmk_is_set(node->flags, crm_remote_node)) {
remove_conflicting_peer(node);
}
#endif
}
/*!
* \internal
* \brief Get log-friendly string equivalent of a process flag
*
* \param[in] proc Process flag
*
* \return Log-friendly string equivalent of \p proc
*/
static inline const char *
proc2text(enum crm_proc_flag proc)
{
const char *text = "unknown";
switch (proc) {
case crm_proc_none:
text = "none";
break;
case crm_proc_cpg:
text = "corosync-cpg";
break;
}
return text;
}
/*!
* \internal
* \brief Update a node's process information (and potentially state)
*
* \param[in] source Caller's function name (for log messages)
* \param[in,out] node Node object to update
* \param[in] flag Bitmask of new process information
* \param[in] status node status (online, offline, etc.)
*
* \return NULL if any node was reaped from peer caches, value of node otherwise
*
* \note If this function returns NULL, the supplied node object was likely
* freed and should not be used again. This function should not be
* called within a cache iteration if reaping is possible, otherwise
* reaping could invalidate the iterator.
*/
crm_node_t *
crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
{
uint32_t last = 0;
gboolean changed = FALSE;
CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
source, proc2text(flag), status);
return NULL);
/* Pacemaker doesn't spawn processes on remote nodes */
if (pcmk_is_set(node->flags, crm_remote_node)) {
return node;
}
last = node->processes;
if (status == NULL) {
node->processes = flag;
if (node->processes != last) {
changed = TRUE;
}
} else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
if ((node->processes & flag) != flag) {
node->processes = pcmk__set_flags_as(__func__, __LINE__,
LOG_TRACE, "Peer process",
node->uname, node->processes,
flag, "processes");
changed = TRUE;
}
} else if (node->processes & flag) {
node->processes = pcmk__clear_flags_as(__func__, __LINE__,
LOG_TRACE, "Peer process",
node->uname, node->processes,
flag, "processes");
changed = TRUE;
}
if (changed) {
if (status == NULL && flag <= crm_proc_none) {
crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
node->id);
} else {
crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
proc2text(flag), status);
}
if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
node->when_online = time(NULL);
} else {
node->when_online = 0;
}
/* Call the client callback first, then update the peer state,
* in case the node will be reaped
*/
if (peer_status_callback != NULL) {
peer_status_callback(crm_status_processes, node, &last);
}
/* The client callback shouldn't touch the peer caches,
* but as a safety net, bail if the peer cache was destroyed.
*/
if (crm_peer_cache == NULL) {
return NULL;
}
if (autoreap) {
const char *peer_state = NULL;
if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
peer_state = CRM_NODE_MEMBER;
} else {
peer_state = CRM_NODE_LOST;
}
node = pcmk__update_peer_state(__func__, node, peer_state, 0);
}
} else {
crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
proc2text(flag), status);
}
return node;
}
/*!
* \internal
* \brief Update a cluster node cache entry's expected join state
*
* \param[in] source Caller's function name (for logging)
* \param[in,out] node Node to update
* \param[in] expected Node's new join state
*/
void
pcmk__update_peer_expected(const char *source, crm_node_t *node,
const char *expected)
{
char *last = NULL;
gboolean changed = FALSE;
CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
return);
/* Remote nodes don't participate in joins */
if (pcmk_is_set(node->flags, crm_remote_node)) {
return;
}
last = node->expected;
if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
node->expected = strdup(expected);
changed = TRUE;
}
if (changed) {
crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
expected, last);
free(last);
} else {
crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
node->id, expected);
}
}
/*!
* \internal
* \brief Update a node's state and membership information
*
* \param[in] source Caller's function name (for log messages)
* \param[in,out] node Node object to update
* \param[in] state Node's new state
* \param[in] membership Node's new membership ID
* \param[in,out] iter If not NULL, pointer to node's peer cache iterator
*
* \return NULL if any node was reaped, value of node otherwise
*
* \note If this function returns NULL, the supplied node object was likely
* freed and should not be used again. This function may be called from
* within a peer cache iteration if the iterator is supplied.
*/
static crm_node_t *
update_peer_state_iter(const char *source, crm_node_t *node, const char *state,
uint64_t membership, GHashTableIter *iter)
{
gboolean is_member;
CRM_CHECK(node != NULL,
crm_err("Could not set state for unknown host to %s"
QB_XS " source=%s", state, source);
return NULL);
is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
if (is_member) {
node->when_lost = 0;
if (membership) {
node->last_seen = membership;
}
}
if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
char *last = node->state;
if (is_member) {
node->when_member = time(NULL);
} else {
node->when_member = 0;
}
node->state = strdup(state);
crm_notice("Node %s state is now %s " QB_XS
" nodeid=%u previous=%s source=%s", node->uname, state,
node->id, (last? last : "unknown"), source);
if (peer_status_callback != NULL) {
peer_status_callback(crm_status_nstate, node, last);
}
free(last);
if (autoreap && !is_member
&& !pcmk_is_set(node->flags, crm_remote_node)) {
/* We only autoreap from the peer cache, not the remote peer cache,
* because the latter should be managed only by
* refresh_remote_nodes().
*/
if(iter) {
crm_notice("Purged 1 peer with " PCMK_XA_ID
"=%u and/or uname=%s from the membership cache",
node->id, node->uname);
g_hash_table_iter_remove(iter);
} else {
pcmk__cluster_forget_cluster_node(node->id, node->uname);
}
node = NULL;
}
} else {
crm_trace("Node %s state is unchanged (%s) " QB_XS
" nodeid=%u source=%s", node->uname, state, node->id, source);
}
return node;
}
/*!
* \brief Update a node's state and membership information
*
* \param[in] source Caller's function name (for log messages)
* \param[in,out] node Node object to update
* \param[in] state Node's new state
* \param[in] membership Node's new membership ID
*
* \return NULL if any node was reaped, value of node otherwise
*
* \note If this function returns NULL, the supplied node object was likely
* freed and should not be used again. This function should not be
* called within a cache iteration if reaping is possible,
* otherwise reaping could invalidate the iterator.
*/
crm_node_t *
pcmk__update_peer_state(const char *source, crm_node_t *node,
const char *state, uint64_t membership)
{
return update_peer_state_iter(source, node, state, membership, NULL);
}
/*!
* \internal
* \brief Reap all nodes from cache whose membership information does not match
*
* \param[in] membership Membership ID of nodes to keep
*/
void
pcmk__reap_unseen_nodes(uint64_t membership)
{
GHashTableIter iter;
crm_node_t *node = NULL;
crm_trace("Reaping unseen nodes...");
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
if (node->last_seen != membership) {
if (node->state) {
/*
* Calling update_peer_state_iter() allows us to
* remove the node from crm_peer_cache without
* invalidating our iterator
*/
update_peer_state_iter(__func__, node, CRM_NODE_LOST,
membership, &iter);
} else {
crm_info("State of node %s[%u] is still unknown",
node->uname, node->id);
}
}
}
}
static crm_node_t *
find_cib_cluster_node(const char *id, const char *uname)
{
GHashTableIter iter;
crm_node_t *node = NULL;
crm_node_t *by_id = NULL;
crm_node_t *by_name = NULL;
if (uname) {
g_hash_table_iter_init(&iter, cluster_node_cib_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (node->uname && strcasecmp(node->uname, uname) == 0) {
crm_trace("Name match: %s = %p", node->uname, node);
by_name = node;
break;
}
}
}
if (id) {
g_hash_table_iter_init(&iter, cluster_node_cib_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(strcasecmp(node->uuid, id) == 0) {
crm_trace("ID match: %s= %p", id, node);
by_id = node;
break;
}
}
}
node = by_id; /* Good default */
if (by_id == by_name) {
/* Nothing to do if they match (both NULL counts) */
crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
} else if (by_id == NULL && by_name) {
crm_trace("Only one: %p for %s/%s", by_name, id, uname);
if (id) {
node = NULL;
} else {
node = by_name;
}
} else if (by_name == NULL && by_id) {
crm_trace("Only one: %p for %s/%s", by_id, id, uname);
if (uname) {
node = NULL;
}
} else if (uname && by_id->uname
&& pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
/* Multiple nodes have the same uname in the CIB.
* Return by_id. */
} else if (id && by_name->uuid
&& pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
/* Multiple nodes have the same id in the CIB.
* Return by_name. */
node = by_name;
} else {
node = NULL;
}
if (node == NULL) {
crm_debug("Couldn't find node%s%s%s%s",
id? " " : "",
id? id : "",
uname? " with name " : "",
uname? uname : "");
}
return node;
}
static void
cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data)
{
const char *id = crm_element_value(xml_node, PCMK_XA_ID);
const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
crm_node_t * node = NULL;
CRM_CHECK(id != NULL && uname !=NULL, return);
node = find_cib_cluster_node(id, uname);
if (node == NULL) {
char *uniqueid = crm_generate_uuid();
node = pcmk__assert_alloc(1, sizeof(crm_node_t));
node->uname = pcmk__str_copy(uname);
node->uuid = pcmk__str_copy(id);
g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);
} else if (pcmk_is_set(node->flags, crm_node_dirty)) {
pcmk__str_update(&node->uname, uname);
/* Node is in cache and hasn't been updated already, so mark it clean */
clear_peer_flags(node, crm_node_dirty);
}
}
static void
refresh_cluster_node_cib_cache(xmlNode *cib)
{
pcmk__cluster_init_node_caches();
g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);
crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG,
cluster_node_cib_cache_refresh_helper, NULL);
// Remove all old cache entries that weren't seen in the CIB
g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
}
void
pcmk__refresh_node_caches_from_cib(xmlNode *cib)
{
refresh_remote_nodes(cib);
refresh_cluster_node_cib_cache(cib);
}
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
#include <crm/cluster/compat.h>
void
crm_peer_init(void)
{
pcmk__cluster_init_node_caches();
}
// LCOV_EXCL_STOP
// End deprecated API

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 23, 4:39 PM (16 h, 56 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018925
Default Alt Text
(146 KB)

Event Timeline