diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index 4b5584dcd7..9f67ccce07 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -1,1119 +1,1119 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event); /* * stonith failure counting * * We don't want to get stuck in a permanent fencing loop. Keep track of the * number of fencing failures for each target node, and the most we'll restart a * transition for. */ struct st_fail_rec { int count; }; static bool fence_reaction_panic = false; static unsigned long int stonith_max_attempts = 10; static GHashTable *stonith_failures = NULL; /*! * \internal * \brief Update max fencing attempts before giving up * * \param[in] value New max fencing attempts */ static void update_stonith_max_attempts(const char *value) { stonith_max_attempts = char2score(value); if (stonith_max_attempts < 1UL) { stonith_max_attempts = 10UL; } } /*! * \internal * \brief Configure reaction to notification of local node being fenced * * \param[in] reaction_s Reaction type */ static void set_fence_reaction(const char *reaction_s) { if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) { fence_reaction_panic = true; } else { if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) { crm_warn("Invalid value '%s' for %s, using 'stop'", reaction_s, PCMK_OPT_FENCE_REACTION); } fence_reaction_panic = false; } } /*! * \internal * \brief Configure fencing options based on the CIB * * \param[in,out] options Name/value pairs for configured options */ void controld_configure_fencing(GHashTable *options) { const char *value = NULL; value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION); set_fence_reaction(value); value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS); update_stonith_max_attempts(value); } static gboolean too_many_st_failures(const char *target) { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *value = NULL; if (stonith_failures == NULL) { return FALSE; } if (target == NULL) { g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { if (value->count >= stonith_max_attempts) { target = (const char*)key; goto too_many; } } } else { value = g_hash_table_lookup(stonith_failures, target); if ((value != NULL) && (value->count >= stonith_max_attempts)) { goto too_many; } } return FALSE; too_many: crm_warn("Too many failures (%d) to fence %s, giving up", value->count, target); return TRUE; } /*! * \internal * \brief Reset a stonith fail count * * \param[in] target Name of node to reset, or NULL for all */ void st_fail_count_reset(const char *target) { if (stonith_failures == NULL) { return; } if (target) { struct st_fail_rec *rec = NULL; rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count = 0; } } else { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *rec = NULL; g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rec)) { rec->count = 0; } } } static void st_fail_count_increment(const char *target) { struct st_fail_rec *rec = NULL; if (stonith_failures == NULL) { stonith_failures = pcmk__strkey_table(free, free); } rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count++; } else { rec = malloc(sizeof(struct st_fail_rec)); if(rec == NULL) { return; } rec->count = 1; g_hash_table_insert(stonith_failures, pcmk__str_copy(target), rec); } } /* end stonith fail count functions */ static void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if (rc < pcmk_ok) { crm_err("Fencing update %d for %s: failed - %s (%d)", call_id, (char *)user_data, pcmk_strerror(rc), rc); crm_log_xml_warn(msg, "Failed update"); abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown, "CIB update failed", NULL); } else { crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); } } static void send_stonith_update(pcmk__graph_action_t *action, const char *target, const char *uuid) { int rc = pcmk_ok; crm_node_t *peer = NULL; /* We (usually) rely on the membership layer to do node_update_cluster, * and the peer status callback to do node_update_peer, because the node * might have already rejoined before we get the stonith result here. */ int flags = node_update_join | node_update_expected; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = NULL; CRM_CHECK(target != NULL, return); CRM_CHECK(uuid != NULL, return); /* Make sure the membership and join caches are accurate. * Try getting any existing node cache entry also by node uuid in case it * doesn't have an uname yet. */ peer = pcmk__get_node(0, target, uuid, pcmk__node_search_any); CRM_CHECK(peer != NULL, return); if (peer->state == NULL) { /* Usually, we rely on the membership layer to update the cluster state * in the CIB. However, if the node has never been seen, do it here, so * the node is not considered unclean. */ flags |= node_update_cluster; } if (peer->uuid == NULL) { crm_info("Recording uuid '%s' for node '%s'", uuid, target); peer->uuid = pcmk__str_copy(uuid); } crmd_peer_down(peer, TRUE); /* Generate a node state update for the CIB */ node_state = create_node_state_update(peer, flags, NULL, __func__); /* we have to mark whether or not remote nodes have already been fenced */ if (peer->flags & crm_remote_node) { char *now_s = pcmk__ttoa(time(NULL)); crm_xml_add(node_state, PCMK__XA_NODE_FENCED, now_s); free(now_s); } /* Force our known ID */ crm_xml_add(node_state, PCMK_XA_ID, uuid); rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn, PCMK_XE_STATUS, node_state, cib_scope_local |cib_can_create); /* Delay processing the trigger until the update completes */ crm_debug("Sending fencing update %d for %s", rc, target); fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated); // Make sure it sticks /* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn, * cib_scope_local); */ controld_delete_node_state(peer->uname, controld_section_all, cib_scope_local); free_xml(node_state); return; } /*! * \internal * \brief Abort transition due to stonith failure * * \param[in] abort_action Whether to restart or stop transition * \param[in] target Don't restart if this (NULL for any) has too many failures * \param[in] reason Log this stonith action XML as abort reason (or NULL) */ static void abort_for_stonith_failure(enum pcmk__graph_next abort_action, const char *target, const xmlNode *reason) { /* If stonith repeatedly fails, we eventually give up on starting a new * transition for that reason. */ if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) { abort_action = pcmk__graph_wait; } abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed", reason); } /* * stonith cleanup list * * If the DC is shot, proper notifications might not go out. * The stonith cleanup list allows the cluster to (re-)send * notifications once a new DC is elected. */ static GList *stonith_cleanup_list = NULL; /*! * \internal * \brief Add a node to the stonith cleanup list * * \param[in] target Name of node to add */ void add_stonith_cleanup(const char *target) { stonith_cleanup_list = g_list_append(stonith_cleanup_list, pcmk__str_copy(target)); } /*! * \internal * \brief Remove a node from the stonith cleanup list * * \param[in] Name of node to remove */ void remove_stonith_cleanup(const char *target) { GList *iter = stonith_cleanup_list; while (iter != NULL) { GList *tmp = iter; char *iter_name = tmp->data; iter = iter->next; if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) { crm_trace("Removing %s from the cleanup list", iter_name); stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); free(iter_name); } } } /*! * \internal * \brief Purge all entries from the stonith cleanup list */ void purge_stonith_cleanup(void) { if (stonith_cleanup_list) { GList *iter = NULL; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_info("Purging %s from stonith cleanup list", target); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } } /*! * \internal * \brief Send stonith updates for all entries in cleanup list, then purge it */ void execute_stonith_cleanup(void) { GList *iter; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_node_t *target_node = pcmk__get_node(0, target, NULL, pcmk__node_search_cluster); const char *uuid = crm_peer_uuid(target_node); crm_notice("Marking %s, target of a previous stonith action, as clean", target); send_stonith_update(NULL, target, uuid); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } /* end stonith cleanup list functions */ /* stonith API client * * Functions that need to interact directly with the fencer via its API */ static stonith_t *stonith_api = NULL; static mainloop_timer_t *controld_fencer_connect_timer = NULL; static char *te_client_id = NULL; static gboolean fail_incompletable_stonith(pcmk__graph_t *graph) { GList *lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GList *lpc2 = NULL; pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data; if ((action->type != pcmk__cluster_graph_action) || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { continue; } task = crm_element_value(action->xml, PCMK_XA_OPERATION); if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) { pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); last_action = action->xml; pcmk__update_graph(graph, action); crm_notice("Failing action %d (%s): fencer terminated", action->id, pcmk__xe_id(action->xml)); } } } if (last_action != NULL) { crm_warn("Fencer failure resulted in unrunnable actions"); abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action); return TRUE; } return FALSE; } static void tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e) { te_cleanup_stonith_history_sync(st, FALSE); if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { crm_err("Lost fencer connection (will attempt to reconnect)"); if (!mainloop_timer_running(controld_fencer_connect_timer)) { mainloop_timer_start(controld_fencer_connect_timer); } } else { crm_info("Disconnected from fencer"); } if (stonith_api) { /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(st); } stonith_api->cmds->remove_notification(stonith_api, NULL); } if (AM_I_DC) { fail_incompletable_stonith(controld_globals.transition_graph); trigger_graph(); } } /*! * \internal * \brief Handle an event notification from the fencing API * * \param[in] st Fencing API connection (ignored) * \param[in] event Fencing API event notification */ static void handle_fence_notification(stonith_t *st, stonith_event_t *event) { bool succeeded = true; const char *executioner = "the cluster"; const char *client = "a client"; const char *reason = NULL; int exec_status; if (te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } if (event == NULL) { crm_err("Notify data not found"); return; } if (event->executioner != NULL) { executioner = event->executioner; } if (event->client_origin != NULL) { client = event->client_origin; } exec_status = stonith__event_execution_status(event); if ((stonith__event_exit_status(event) != CRM_EX_OK) || (exec_status != PCMK_EXEC_DONE)) { succeeded = false; if (exec_status == PCMK_EXEC_DONE) { exec_status = PCMK_EXEC_ERROR; } } reason = stonith__event_exit_reason(event); crmd_alert_fencing_op(event); if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) { // Unfencing doesn't need special handling, just a log message if (succeeded) { crm_notice("%s was unfenced by %s at the request of %s@%s", event->target, executioner, client, event->origin); } else { crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d", event->target, executioner, pcmk_exec_status_str(exec_status), ((reason == NULL)? "" : ": "), ((reason == NULL)? "" : reason), stonith__event_exit_status(event)); } return; } if (succeeded && pcmk__str_eq(event->target, controld_globals.our_nodename, pcmk__str_casei)) { /* We were notified of our own fencing. Most likely, either fencing was * misconfigured, or fabric fencing that doesn't cut cluster * communication is in use. * * Either way, shutting down the local host is a good idea, to require * administrator intervention. Also, other nodes would otherwise likely * set our status to lost because of the fencing callback and discard * our subsequent election votes as "not part of our cluster". */ crm_crit("We were allegedly just fenced by %s for %s!", executioner, event->origin); // Dumps blackbox if enabled if (fence_reaction_panic) { pcmk__panic(__func__); } else { crm_exit(CRM_EX_FATAL); } return; // Should never get here } /* Update the count of fencing failures for this target, in case we become * DC later. The current DC has already updated its fail count in * tengine_stonith_callback(). */ if (!AM_I_DC) { if (succeeded) { st_fail_count_reset(event->target); } else { st_fail_count_increment(event->target); } } crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: " "%s%s%s%s " CRM_XS " event=%s", event->target, (succeeded? "" : " not"), event->action, executioner, client, event->origin, (succeeded? "OK" : pcmk_exec_status_str(exec_status)), ((reason == NULL)? "" : " ("), ((reason == NULL)? "" : reason), ((reason == NULL)? "" : ")"), event->id); if (succeeded) { crm_node_t *peer = pcmk__search_node_caches(0, event->target, pcmk__node_search_any |pcmk__node_search_known); const char *uuid = NULL; if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); if (AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. * Unfortunately, the controller doesn't have a simple, reliable way * to map hosts to guests. It might be possible to track this in the - * peer cache via crm_remote_peer_cache_refresh(). For now, we rely - * on the scheduler creating fence pseudo-events for the guests. + * peer cache via refresh_remote_nodes(). For now, we rely on the + * scheduler creating fence pseudo-events for the guests. */ if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) { /* Abort the current transition if it wasn't the cluster that * initiated fencing. */ crm_info("External fencing operation from %s fenced %s", client, event->target); abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "External Fencing Operation", NULL); } } else if (pcmk__str_eq(controld_globals.dc_name, event->target, pcmk__str_null_matches|pcmk__str_casei) && !pcmk_is_set(peer->flags, crm_remote_node)) { // Assume the target was our DC if we don't currently have one if (controld_globals.dc_name != NULL) { crm_notice("Fencing target %s was our DC", event->target); } else { crm_notice("Fencing target %s may have been our DC", event->target); } /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (pcmk__str_eq(event->executioner, controld_globals.our_nodename, pcmk__str_casei)) { send_stonith_update(NULL, event->target, uuid); } add_stonith_cleanup(event->target); } /* If the target is a remote node, and we host its connection, * immediately fail all monitors so it can be recovered quickly. * The connection won't necessarily drop when a remote node is fenced, * so the failure might not otherwise be detected until the next poke. */ if (pcmk_is_set(peer->flags, crm_remote_node)) { remote_ra_fail(event->target); } crmd_peer_down(peer, TRUE); } } /*! * \brief Connect to fencer * * \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer * * \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry * \note If user_data is NULL, this will wait 2s between attempts, for up to * 30 attempts, meaning the controller could be blocked as long as 58s. */ gboolean controld_timer_fencer_connect(gpointer user_data) { int rc = pcmk_ok; if (stonith_api == NULL) { stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return G_SOURCE_REMOVE; } } if (stonith_api->state != stonith_disconnected) { crm_trace("Already connected to fencer, no need to retry"); return G_SOURCE_REMOVE; } if (user_data == NULL) { // Blocking (retry failures now until successful) rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 30 attempts: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } } else { // Non-blocking (retry failures later in main loop) rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); if (controld_fencer_connect_timer == NULL) { controld_fencer_connect_timer = mainloop_timer_add("controld_fencer_connect", 1000, TRUE, controld_timer_fencer_connect, GINT_TO_POINTER(TRUE)); } if (rc != pcmk_ok) { if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { crm_notice("Fencer connection failed (will retry): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); if (!mainloop_timer_running(controld_fencer_connect_timer)) { mainloop_timer_start(controld_fencer_connect_timer); } return G_SOURCE_CONTINUE; } else { crm_info("Fencer connection failed (ignoring because no longer required): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } return G_SOURCE_REMOVE; } } if (rc == pcmk_ok) { stonith_api_operations_t *cmds = stonith_api->cmds; cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_FENCE, handle_fence_notification); cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, tengine_stonith_history_synced); te_trigger_stonith_history_sync(TRUE); crm_notice("Fencer successfully connected"); } return G_SOURCE_REMOVE; } void controld_disconnect_fencer(bool destroy) { if (stonith_api) { // Prevent fencer connection from coming up again controld_clear_fsa_input_flags(R_ST_REQUIRED); if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(stonith_api); } stonith_api->cmds->remove_notification(stonith_api, NULL); } if (destroy) { if (stonith_api) { stonith_api->cmds->free(stonith_api); stonith_api = NULL; } if (controld_fencer_connect_timer) { mainloop_timer_del(controld_fencer_connect_timer); controld_fencer_connect_timer = NULL; } if (te_client_id) { free(te_client_id); te_client_id = NULL; } } } static gboolean do_stonith_history_sync(gpointer user_data) { if (stonith_api && (stonith_api->state != stonith_disconnected)) { stonith_history_t *history = NULL; te_cleanup_stonith_history_sync(stonith_api, FALSE); stonith_api->cmds->history(stonith_api, st_opt_sync_call | st_opt_broadcast, NULL, &history, 5); stonith_history_free(history); return TRUE; } else { crm_info("Skip triggering stonith history-sync as stonith is disconnected"); return FALSE; } } static void tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) { char *uuid = NULL; int stonith_id = -1; int transition_id = -1; pcmk__graph_action_t *action = NULL; const char *target = NULL; if ((data == NULL) || (data->userdata == NULL)) { crm_err("Ignoring fence operation %d result: " "No transition key given (bug?)", ((data == NULL)? -1 : data->call_id)); return; } if (!AM_I_DC) { const char *reason = stonith__exit_reason(data); if (reason == NULL) { reason = pcmk_exec_status_str(stonith__execution_status(data)); } crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s", data->call_id, stonith__exit_status(data), reason, (const char *) data->userdata); return; } CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id, &stonith_id, NULL), goto bail); if (controld_globals.transition_graph->complete || (stonith_id < 0) || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none) || (controld_globals.transition_graph->id != transition_id)) { crm_info("Ignoring fence operation %d result: " "Not from current transition " CRM_XS " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)", data->call_id, pcmk__btoa(controld_globals.transition_graph->complete), stonith_id, uuid, controld_globals.te_uuid, transition_id, controld_globals.transition_graph->id); goto bail; } action = controld_get_action(stonith_id); if (action == NULL) { crm_err("Ignoring fence operation %d result: " "Action %d not found in transition graph (bug?) " CRM_XS " uuid=%s transition=%d", data->call_id, stonith_id, uuid, transition_id); goto bail; } target = crm_element_value(action->xml, PCMK__META_ON_NODE); if (target == NULL) { crm_err("Ignoring fence operation %d result: No target given (bug?)", data->call_id); goto bail; } stop_te_timer(action); if (stonith__exit_status(data) == CRM_EX_OK) { const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID); const char *op = crm_meta_value(action->params, PCMK__META_STONITH_ACTION); crm_info("Fence operation %d for %s succeeded", data->call_id, target); if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) { te_action_confirmed(action, NULL); if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) { const char *value = NULL; char *now = pcmk__ttoa(time(NULL)); gboolean is_remote_node = FALSE; /* This check is not 100% reliable, since this node is not * guaranteed to have the remote node cached. However, it * doesn't have to be reliable, since the attribute manager can * learn a node's "remoteness" by other means sooner or later. * This allows it to learn more quickly if this node does have * the information. */ if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) { is_remote_node = TRUE; } update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, is_remote_node); free(now); value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL); update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, is_remote_node); value = crm_meta_value(action->params, PCMK__META_DIGESTS_SECURE); update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, is_remote_node); } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) { send_stonith_update(action, target, uuid); pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update); } } st_fail_count_reset(target); } else { enum pcmk__graph_next abort_action = pcmk__graph_restart; int status = stonith__execution_status(data); const char *reason = stonith__exit_reason(data); if (reason == NULL) { if (status == PCMK_EXEC_DONE) { reason = "Agent returned error"; } else { reason = pcmk_exec_status_str(status); } } pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); /* If no fence devices were available, there's no use in immediately * checking again, so don't start a new transition in that case. */ if (status == PCMK_EXEC_NO_FENCE_DEVICE) { crm_warn("Fence operation %d for %s failed: %s " "(aborting transition and giving up for now)", data->call_id, target, reason); abort_action = pcmk__graph_wait; } else { crm_notice("Fence operation %d for %s failed: %s " "(aborting transition)", data->call_id, target, reason); } /* Increment the fail count now, so abort_for_stonith_failure() can * check it. Non-DC nodes will increment it in * handle_fence_notification(). */ st_fail_count_increment(target); abort_for_stonith_failure(abort_action, target, NULL); } pcmk__update_graph(controld_globals.transition_graph, action); trigger_graph(); bail: free(data->userdata); free(uuid); return; } static int fence_with_delay(const char *target, const char *type, int delay) { uint32_t options = st_opt_none; // Group of enum stonith_call_options int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout / 1000); if (crmd_join_phase_count(crm_join_confirmed) == 1) { stonith__set_call_options(options, target, st_opt_allow_suicide); } return stonith_api->cmds->fence_with_delay(stonith_api, options, target, type, timeout_sec, 0, delay); } /*! * \internal * \brief Execute a fencing action from a transition graph * * \param[in] graph Transition graph being executed (ignored) * \param[in] action Fencing action to execute * * \return Standard Pacemaker return code */ int controld_execute_fence_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { int rc = 0; const char *id = pcmk__xe_id(action->xml); const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID); const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE); const char *type = crm_meta_value(action->params, PCMK__META_STONITH_ACTION); char *transition_key = NULL; const char *priority_delay = NULL; int delay_i = 0; gboolean invalid_action = FALSE; int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout / 1000); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return EPROTO; } priority_delay = crm_meta_value(action->params, PCMK_OPT_PRIORITY_FENCING_DELAY); crm_notice("Requesting fencing (%s) targeting node %s " CRM_XS " action=%s timeout=%i%s%s", type, target, id, stonith_timeout, priority_delay ? " priority_delay=" : "", priority_delay ? priority_delay : ""); /* Passing NULL means block until we can connect... */ controld_timer_fencer_connect(NULL); pcmk__scan_min_int(priority_delay, &delay_i, 0); rc = fence_with_delay(target, type, delay_i); transition_key = pcmk__transition_key(controld_globals.transition_graph->id, action->id, 0, controld_globals.te_uuid), stonith_api->cmds->register_callback(stonith_api, rc, (stonith_timeout + (delay_i > 0 ? delay_i : 0)), st_opt_timeout_updates, transition_key, "tengine_stonith_callback", tengine_stonith_callback); return pcmk_rc_ok; } bool controld_verify_stonith_watchdog_timeout(const char *value) { long long st_timeout = (value != NULL)? crm_get_msec(value) : 0; const char *our_nodename = controld_globals.our_nodename; if (st_timeout == 0 || (stonith_api && (stonith_api->state != stonith_disconnected) && stonith__watchdog_fencing_enabled_for_node_api(stonith_api, our_nodename))) { return pcmk__valid_stonith_watchdog_timeout(value); } return true; } /* end stonith API client functions */ /* * stonith history synchronization * * Each node's fencer keeps track of a cluster-wide fencing history. When a node * joins or leaves, we need to synchronize the history across all nodes. */ static crm_trigger_t *stonith_history_sync_trigger = NULL; static mainloop_timer_t *stonith_history_sync_timer_short = NULL; static mainloop_timer_t *stonith_history_sync_timer_long = NULL; void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers) { if (free_timers) { mainloop_timer_del(stonith_history_sync_timer_short); stonith_history_sync_timer_short = NULL; mainloop_timer_del(stonith_history_sync_timer_long); stonith_history_sync_timer_long = NULL; } else { mainloop_timer_stop(stonith_history_sync_timer_short); mainloop_timer_stop(stonith_history_sync_timer_long); } if (st) { st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED); } } static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event) { te_cleanup_stonith_history_sync(st, FALSE); crm_debug("Fence-history synced - cancel all timers"); } static gboolean stonith_history_sync_set_trigger(gpointer user_data) { mainloop_set_trigger(stonith_history_sync_trigger); return FALSE; } void te_trigger_stonith_history_sync(bool long_timeout) { /* trigger a sync in 5s to give more nodes the * chance to show up so that we don't create * unnecessary stonith-history-sync traffic * * the long timeout of 30s is there as a fallback * so that after a successful connection to fenced * we will wait for 30s for the DC to trigger a * history-sync * if this doesn't happen we trigger a sync locally * (e.g. fenced segfaults and is restarted by pacemakerd) */ /* as we are finally checking the stonith-connection * in do_stonith_history_sync we should be fine * leaving stonith_history_sync_time & stonith_history_sync_trigger * around */ if (stonith_history_sync_trigger == NULL) { stonith_history_sync_trigger = mainloop_add_trigger(G_PRIORITY_LOW, do_stonith_history_sync, NULL); } if (long_timeout) { if(stonith_history_sync_timer_long == NULL) { stonith_history_sync_timer_long = mainloop_timer_add("history_sync_long", 30000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 30 seconds"); mainloop_timer_start(stonith_history_sync_timer_long); } else { if(stonith_history_sync_timer_short == NULL) { stonith_history_sync_timer_short = mainloop_timer_add("history_sync_short", 5000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); mainloop_timer_start(stonith_history_sync_timer_short); } } /* end stonith history synchronization functions */ diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index 22363a2461..4ccde3400f 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,1399 +1,1411 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include "crmcluster_private.h" /* The peer cache remembers cluster nodes that have been seen. * This is managed mostly automatically by libcluster, based on * cluster membership events. * * Because cluster nodes can have conflicting names or UUIDs, * the hash table key is a uniquely generated ID. */ GHashTable *crm_peer_cache = NULL; /* * The remote peer cache tracks pacemaker_remote nodes. While the * value has the same type as the peer cache's, it is tracked separately for * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs, * so the name (which is also the UUID) is used as the hash table key; there * is no equivalent of membership events, so management is not automatic; and * most users of the peer cache need to exclude pacemaker_remote nodes. * * That said, using a single cache would be more logical and less error-prone, * so it would be a good idea to merge them one day. * * libcluster provides two avenues for populating the cache: * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it, - * while crm_remote_peer_cache_refresh() populates it via the CIB. + * while refresh_remote_nodes() populates it via the CIB. */ GHashTable *crm_remote_peer_cache = NULL; /* * The known node cache tracks cluster and remote nodes that have been seen in * the CIB. It is useful mainly when a caller needs to know about a node that * may no longer be in the membership, but doesn't want to add the node to the * main peer cache tables. */ static GHashTable *known_node_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; static gboolean crm_autoreap = TRUE; // Flag setting and clearing for crm_node_t:flags #define set_peer_flags(peer, flags_to_set) do { \ (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Peer", (peer)->uname, \ (peer)->flags, (flags_to_set), \ #flags_to_set); \ } while (0) #define clear_peer_flags(peer, flags_to_clear) do { \ (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Peer", (peer)->uname, \ (peer)->flags, (flags_to_clear), \ #flags_to_clear); \ } while (0) static void update_peer_uname(crm_node_t *node, const char *uname); static crm_node_t *find_known_node(const char *id, const char *uname); /*! * \internal * \brief Get the number of Pacemaker Remote nodes that have been seen * * \return Number of cached Pacemaker Remote nodes */ unsigned int pcmk__cluster_num_remote_nodes(void) { if (crm_remote_peer_cache == NULL) { return 0U; } return g_hash_table_size(crm_remote_peer_cache); } /*! * \brief Get a remote node peer cache entry, creating it if necessary * * \param[in] node_name Name of remote node * * \return Cache entry for node on success, NULL (and set errno) otherwise * * \note When creating a new entry, this will leave the node state undetermined, * so the caller should also call pcmk__update_peer_state() if the state * is known. * \note Because this can add and remove cache entries, callers should not * assume any previously obtained cache entry pointers remain valid. */ crm_node_t * crm_remote_peer_get(const char *node_name) { crm_node_t *node; char *node_name_copy = NULL; if (node_name == NULL) { errno = EINVAL; return NULL; } /* It's theoretically possible that the node was added to the cluster peer * cache before it was known to be a Pacemaker Remote node. Remove that * entry unless it has a node ID, which means the name actually is * associated with a cluster node. (@TODO return an error in that case?) */ node = pcmk__search_node_caches(0, node_name, pcmk__node_search_cluster); if ((node != NULL) && (node->uuid == NULL)) { /* node_name could be a pointer into the cache entry being removed, so * reassign it to a copy before the original gets freed */ node_name_copy = strdup(node_name); if (node_name_copy == NULL) { errno = ENOMEM; return NULL; } node_name = node_name_copy; reap_crm_member(0, node_name); } /* Return existing cache entry if one exists */ node = g_hash_table_lookup(crm_remote_peer_cache, node_name); if (node) { free(node_name_copy); return node; } /* Allocate a new entry */ node = calloc(1, sizeof(crm_node_t)); if (node == NULL) { free(node_name_copy); return NULL; } /* Populate the essential information */ set_peer_flags(node, crm_remote_node); node->uuid = strdup(node_name); if (node->uuid == NULL) { free(node); errno = ENOMEM; free(node_name_copy); return NULL; } /* Add the new entry to the cache */ g_hash_table_replace(crm_remote_peer_cache, node->uuid, node); crm_trace("added %s to remote cache", node_name); /* Update the entry's uname, ensuring peer status callbacks are called */ update_peer_uname(node, node_name); free(node_name_copy); return node; } /*! * \brief Remove a node from the Pacemaker Remote node cache * * \param[in] node_name Name of node to remove from cache * * \note The caller must be careful not to use \p node_name after calling this * function if it might be a pointer into the cache entry being removed. */ void crm_remote_peer_cache_remove(const char *node_name) { /* Do a lookup first, because node_name could be a pointer within the entry * being removed -- we can't log it *after* removing it. */ if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) { crm_trace("Removing %s from Pacemaker Remote node cache", node_name); g_hash_table_remove(crm_remote_peer_cache, node_name); } } /*! * \internal * \brief Return node status based on a CIB status entry * * \param[in] node_state XML of node state * * \return \c CRM_NODE_LOST if \c PCMK__XA_IN_CCM is false in * \c PCMK__XE_NODE_STATE, \c CRM_NODE_MEMBER otherwise * \note Unlike most boolean XML attributes, this one defaults to true, for * backward compatibility with older controllers that don't set it. */ static const char * remote_state_from_cib(const xmlNode *node_state) { bool status = false; if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM, &status) == pcmk_rc_ok) && !status) { return CRM_NODE_LOST; } else { return CRM_NODE_MEMBER; } } /* user data for looping through remote node xpath searches */ struct refresh_data { const char *field; /* XML attribute to check for node name */ gboolean has_state; /* whether to update node state based on XML */ }; /*! * \internal * \brief Process one pacemaker_remote node xpath search result * * \param[in] result XML search result * \param[in] user_data what to look for in the XML */ static void remote_cache_refresh_helper(xmlNode *result, void *user_data) { const struct refresh_data *data = user_data; const char *remote = crm_element_value(result, data->field); const char *state = NULL; crm_node_t *node; CRM_CHECK(remote != NULL, return); /* Determine node's state, if the result has it */ if (data->has_state) { state = remote_state_from_cib(result); } /* Check whether cache already has entry for node */ node = g_hash_table_lookup(crm_remote_peer_cache, remote); if (node == NULL) { /* Node is not in cache, so add a new entry for it */ node = crm_remote_peer_get(remote); CRM_ASSERT(node); if (state) { pcmk__update_peer_state(__func__, node, state, 0); } } else if (pcmk_is_set(node->flags, crm_node_dirty)) { /* Node is in cache and hasn't been updated already, so mark it clean */ clear_peer_flags(node, crm_node_dirty); if (state) { pcmk__update_peer_state(__func__, node, state, 0); } } } static void mark_dirty(gpointer key, gpointer value, gpointer user_data) { set_peer_flags((crm_node_t *) value, crm_node_dirty); } static gboolean is_dirty(gpointer key, gpointer value, gpointer user_data) { return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty); } /*! - * \brief Repopulate the remote peer cache based on CIB XML + * \internal + * \brief Repopulate the remote node cache based on CIB XML * - * \param[in] xmlNode CIB XML to parse + * \param[in] cib CIB XML to parse */ -void -crm_remote_peer_cache_refresh(xmlNode *cib) +static void +refresh_remote_nodes(xmlNode *cib) { struct refresh_data data; crm_peer_init(); /* First, we mark all existing cache entries as dirty, * so that later we can remove any that weren't in the CIB. * We don't empty the cache, because we need to detect changes in state. */ g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL); /* Look for guest nodes and remote nodes in the status section */ data.field = PCMK_XA_ID; data.has_state = TRUE; crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS, remote_cache_refresh_helper, &data); /* Look for guest nodes and remote nodes in the configuration section, * because they may have just been added and not have a status entry yet. * In that case, the cached node state will be left NULL, so that the * peer status callback isn't called until we're sure the node started * successfully. */ data.field = PCMK_XA_VALUE; data.has_state = FALSE; crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG, remote_cache_refresh_helper, &data); data.field = PCMK_XA_ID; data.has_state = FALSE; crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG, remote_cache_refresh_helper, &data); /* Remove all old cache entries that weren't seen in the CIB */ g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL); } +/*! + * \brief Repopulate the remote peer cache based on CIB XML + * + * \param[in] xmlNode CIB XML to parse + */ +void +crm_remote_peer_cache_refresh(xmlNode *cib) +{ + refresh_remote_nodes(cib); +} + gboolean crm_is_peer_active(const crm_node_t * node) { if(node == NULL) { return FALSE; } if (pcmk_is_set(node->flags, crm_remote_node)) { /* remote nodes are never considered active members. This * guarantees they will never be considered for DC membership.*/ return FALSE; } #if SUPPORT_COROSYNC if (is_corosync_cluster()) { return crm_is_corosync_peer_active(node); } #endif crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); return FALSE; } static gboolean crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search == NULL) { return FALSE; } else if (search->id && node->id != search->id) { return FALSE; } else if (search->id == 0 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) { return FALSE; } else if (crm_is_peer_active(value) == FALSE) { crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership cache", (node->uname? node->uname : "unknown"), node->id); return TRUE; } return FALSE; } /*! * \brief Remove all peer cache entries matching a node ID and/or uname * * \param[in] id ID of node to remove (or 0 to ignore) * \param[in] name Uname of node to remove (or NULL to ignore) * * \return Number of cache entries removed * * \note The caller must be careful not to use \p name after calling this * function if it might be a pointer into the cache entry being removed. */ guint reap_crm_member(uint32_t id, const char *name) { int matches = 0; crm_node_t search = { 0, }; if (crm_peer_cache == NULL) { crm_trace("Membership cache not initialized, ignoring purge request"); return 0; } search.id = id; search.uname = pcmk__str_copy(name); matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search); if(matches) { crm_notice("Purged %d peer%s with " PCMK_XA_ID "=%u%s%s from the membership cache", matches, pcmk__plural_s(matches), search.id, (search.uname? " and/or uname=" : ""), (search.uname? search.uname : "")); } else { crm_info("No peers with " PCMK_XA_ID "=%u%s%s to purge from the membership cache", search.id, (search.uname? " and/or uname=" : ""), (search.uname? search.uname : "")); } free(search.uname); return matches; } static void count_peer(gpointer key, gpointer value, gpointer user_data) { guint *count = user_data; crm_node_t *node = value; if (crm_is_peer_active(node)) { *count = *count + 1; } } guint crm_active_peers(void) { guint count = 0; if (crm_peer_cache) { g_hash_table_foreach(crm_peer_cache, count_peer, &count); } return count; } static void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u: %s", node->id, node->uname); free(node->uname); free(node->state); free(node->uuid); free(node->expected); free(node->conn_host); free(node); } void crm_peer_init(void) { if (crm_peer_cache == NULL) { crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node); } if (crm_remote_peer_cache == NULL) { crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node); } if (known_node_cache == NULL) { known_node_cache = pcmk__strikey_table(free, destroy_crm_node); } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_remote_peer_cache != NULL) { crm_trace("Destroying remote peer cache with %d members", pcmk__cluster_num_remote_nodes()); g_hash_table_destroy(crm_remote_peer_cache); crm_remote_peer_cache = NULL; } if (known_node_cache != NULL) { crm_trace("Destroying known node cache with %d members", g_hash_table_size(known_node_cache)); g_hash_table_destroy(known_node_cache); known_node_cache = NULL; } } static void (*peer_status_callback)(enum crm_status_type, crm_node_t *, const void *) = NULL; /*! * \brief Set a client function that will be called after peer status changes * * \param[in] dispatch Pointer to function to use as callback * * \note Previously, client callbacks were responsible for peer cache * management. This is no longer the case, and client callbacks should do * only client-specific handling. Callbacks MUST NOT add or remove entries * in the peer caches. */ void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { peer_status_callback = dispatch; } /*! * \brief Tell the library whether to automatically reap lost nodes * * If TRUE (the default), calling crm_update_peer_proc() will also update the * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and pcmk__update_peer_state() * will reap peers whose state changes to anything other than CRM_NODE_MEMBER. * Callers should leave this enabled unless they plan to manage the cache * separately on their own. * * \param[in] autoreap TRUE to enable automatic reaping, FALSE to disable */ void crm_set_autoreap(gboolean autoreap) { crm_autoreap = autoreap; } static void dump_peer_hash(int level, const char *caller) { GHashTableIter iter; const char *id = NULL; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) { do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id); } } static gboolean hash_find_by_data(gpointer key, gpointer value, gpointer user_data) { return value == user_data; } /*! * \internal * \brief Search caches for a node (cluster or Pacemaker Remote) * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] flags Group of enum pcmk__node_search_flags * * \return Node cache entry if found, otherwise NULL */ crm_node_t * pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); } if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster)) { node = pcmk__search_cluster_node_cache(id, uname, NULL); } if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_known)) { char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id); node = find_known_node(id_str, uname); free(id_str); } return node; } /*! * \internal * \brief Purge a node from cache (both cluster and Pacemaker Remote) * * \param[in] node_name If not NULL, purge only nodes with this name * \param[in] node_id If not 0, purge cluster nodes only if they have this ID * * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged. * If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote * nodes that match \p node_name will be purged, and cluster nodes that * match both \p node_name and \p node_id will be purged. * \note The caller must be careful not to use \p node_name after calling this * function if it might be a pointer into a cache entry being removed. */ void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id) { char *node_name_copy = NULL; if ((node_name == NULL) && (node_id == 0U)) { return; } // Purge from Pacemaker Remote node cache if ((node_name != NULL) && (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) { /* node_name could be a pointer into the cache entry being purged, * so reassign it to a copy before the original gets freed */ node_name_copy = pcmk__str_copy(node_name); node_name = node_name_copy; crm_trace("Purging %s from Pacemaker Remote node cache", node_name); g_hash_table_remove(crm_remote_peer_cache, node_name); } reap_crm_member(node_id, node_name); free(node_name_copy); } /*! * \internal * \brief Search cluster node cache * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster * node ID to search for * * \return Cluster node cache entry if found, otherwise NULL */ crm_node_t * pcmk__search_cluster_node_cache(unsigned int id, const char *uname, const char *uuid) { GHashTableIter iter; crm_node_t *node = NULL; crm_node_t *by_id = NULL; crm_node_t *by_name = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (uname != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); by_name = node; break; } } } if (id > 0) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->id == id) { crm_trace("ID match: %u = %p", node->id, node); by_id = node; break; } } } else if (uuid != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) { crm_trace("UUID match: %s = %p", node->uuid, node); by_id = node; break; } } } node = by_id; /* Good default */ if(by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %u/%s", by_id, id, uname); } else if(by_id == NULL && by_name) { crm_trace("Only one: %p for %u/%s", by_name, id, uname); if(id && by_name->id) { dump_peer_hash(LOG_WARNING, __func__); crm_crit("Node %u and %u share the same name '%s'", id, by_name->id, uname); node = NULL; /* Create a new one */ } else { node = by_name; } } else if(by_name == NULL && by_id) { crm_trace("Only one: %p for %u/%s", by_id, id, uname); if(uname && by_id->uname) { dump_peer_hash(LOG_WARNING, __func__); crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct", uname, by_id->uname, id, uname); } } else if(uname && by_id->uname) { if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) { crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id); g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name); } else { crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname); dump_peer_hash(LOG_INFO, __func__); crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE, TRUE); } } else if(id && by_name->id) { crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname); } else { /* Simple merge */ /* Only corosync-based clusters use node IDs. The functions that call * pcmk__update_peer_state() and crm_update_peer_proc() only know * nodeid, so 'by_id' is authoritative when merging. */ dump_peer_hash(LOG_DEBUG, __func__); crm_info("Merging %p into %p", by_name, by_id); g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name); } return node; } #if SUPPORT_COROSYNC static guint remove_conflicting_peer(crm_node_t *node) { int matches = 0; GHashTableIter iter; crm_node_t *existing_node = NULL; if (node->id == 0 || node->uname == NULL) { return 0; } if (!pcmk__corosync_has_nodelist()) { return 0; } g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) { if (existing_node->id > 0 && existing_node->id != node->id && existing_node->uname != NULL && strcasecmp(existing_node->uname, node->uname) == 0) { if (crm_is_peer_active(existing_node)) { continue; } crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u", existing_node->id, existing_node->uname, node->id); g_hash_table_iter_remove(&iter); matches++; } } return matches; } #endif /*! * \brief Get a cluster node cache entry * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster * node ID to search for * \param[in] flags Group of enum pcmk__node_search_flags * * \return (Possibly newly created) cluster node cache entry */ /* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * pcmk__get_node(unsigned int id, const char *uname, const char *uuid, uint32_t flags) { crm_node_t *node = NULL; char *uname_lookup = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); // Check the Pacemaker Remote node cache first if (pcmk_is_set(flags, pcmk__node_search_remote)) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); if (node != NULL) { return node; } } if (!pcmk_is_set(flags, pcmk__node_search_cluster)) { return NULL; } node = pcmk__search_cluster_node_cache(id, uname, uuid); /* if uname wasn't provided, and find_peer did not turn up a uname based on id. * we need to do a lookup of the node name using the id in the cluster membership. */ if ((node == NULL || node->uname == NULL) && (uname == NULL)) { uname_lookup = get_node_name(id); } if (uname_lookup) { uname = uname_lookup; crm_trace("Inferred a name of '%s' for node %u", uname, id); /* try to turn up the node one more time now that we know the uname. */ if (node == NULL) { node = pcmk__search_cluster_node_cache(id, uname, uuid); } } if (node == NULL) { char *uniqueid = crm_generate_uuid(); node = pcmk__assert_alloc(1, sizeof(crm_node_t)); crm_info("Created entry %s/%p for node %s/%u (%d total)", uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); g_hash_table_replace(crm_peer_cache, uniqueid, node); } if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) { crm_info("Node %u is now known as %s", id, uname); } if(id > 0 && node->id == 0) { node->id = id; } if (uname && (node->uname == NULL)) { update_peer_uname(node, uname); } if(node->uuid == NULL) { if (uuid == NULL) { uuid = crm_peer_uuid(node); } if (uuid) { crm_info("Node %u has uuid %s", id, uuid); } else { crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname); } } free(uname_lookup); return node; } /*! * \internal * \brief Update a node's uname * * \param[in,out] node Node object to update * \param[in] uname New name to set * * \note This function should not be called within a peer cache iteration, * because in some cases it can remove conflicting cache entries, * which would invalidate the iterator. */ static void update_peer_uname(crm_node_t *node, const char *uname) { CRM_CHECK(uname != NULL, crm_err("Bug: can't update node name without name"); return); CRM_CHECK(node != NULL, crm_err("Bug: can't update node name to %s without node", uname); return); if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) { crm_debug("Node uname '%s' did not change", uname); return; } for (const char *c = uname; *c; ++c) { if ((*c >= 'A') && (*c <= 'Z')) { crm_warn("Node names with capitals are discouraged, consider changing '%s'", uname); break; } } pcmk__str_update(&node->uname, uname); if (peer_status_callback != NULL) { peer_status_callback(crm_status_uname, node, NULL); } #if SUPPORT_COROSYNC if (is_corosync_cluster() && !pcmk_is_set(node->flags, crm_remote_node)) { remove_conflicting_peer(node); } #endif } /*! * \internal * \brief Get log-friendly string equivalent of a process flag * * \param[in] proc Process flag * * \return Log-friendly string equivalent of \p proc */ static inline const char * proc2text(enum crm_proc_flag proc) { const char *text = "unknown"; switch (proc) { case crm_proc_none: text = "none"; break; case crm_proc_based: text = "pacemaker-based"; break; case crm_proc_controld: text = "pacemaker-controld"; break; case crm_proc_schedulerd: text = "pacemaker-schedulerd"; break; case crm_proc_execd: text = "pacemaker-execd"; break; case crm_proc_attrd: text = "pacemaker-attrd"; break; case crm_proc_fenced: text = "pacemaker-fenced"; break; case crm_proc_cpg: text = "corosync-cpg"; break; } return text; } /*! * \internal * \brief Update a node's process information (and potentially state) * * \param[in] source Caller's function name (for log messages) * \param[in,out] node Node object to update * \param[in] flag Bitmask of new process information * \param[in] status node status (online, offline, etc.) * * \return NULL if any node was reaped from peer caches, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, otherwise * reaping could invalidate the iterator. */ crm_node_t * crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, proc2text(flag), status); return NULL); /* Pacemaker doesn't spawn processes on remote nodes */ if (pcmk_is_set(node->flags, crm_remote_node)) { return node; } last = node->processes; if (status == NULL) { node->processes = flag; if (node->processes != last) { changed = TRUE; } } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) { if ((node->processes & flag) != flag) { node->processes = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Peer process", node->uname, node->processes, flag, "processes"); changed = TRUE; } } else if (node->processes & flag) { node->processes = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Peer process", node->uname, node->processes, flag, "processes"); changed = TRUE; } if (changed) { if (status == NULL && flag <= crm_proc_none) { crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname, node->id); } else { crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id, proc2text(flag), status); } if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { node->when_online = time(NULL); } else { node->when_online = 0; } /* Call the client callback first, then update the peer state, * in case the node will be reaped */ if (peer_status_callback != NULL) { peer_status_callback(crm_status_processes, node, &last); } /* The client callback shouldn't touch the peer caches, * but as a safety net, bail if the peer cache was destroyed. */ if (crm_peer_cache == NULL) { return NULL; } if (crm_autoreap) { const char *peer_state = NULL; if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { peer_state = CRM_NODE_MEMBER; } else { peer_state = CRM_NODE_LOST; } node = pcmk__update_peer_state(__func__, node, peer_state, 0); } } else { crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id, proc2text(flag), status); } return node; } /*! * \internal * \brief Update a cluster node cache entry's expected join state * * \param[in] source Caller's function name (for logging) * \param[in,out] node Node to update * \param[in] expected Node's new join state */ void pcmk__update_peer_expected(const char *source, crm_node_t *node, const char *expected) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); return); /* Remote nodes don't participate in joins */ if (pcmk_is_set(node->flags, crm_remote_node)) { return; } last = node->expected; if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) { node->expected = strdup(expected); changed = TRUE; } if (changed) { crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id, expected, last); free(last); } else { crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname, node->id, expected); } } /*! * \internal * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in,out] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * \param[in,out] iter If not NULL, pointer to node's peer cache iterator * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function may be called from * within a peer cache iteration if the iterator is supplied. */ static crm_node_t * update_peer_state_iter(const char *source, crm_node_t *node, const char *state, uint64_t membership, GHashTableIter *iter) { gboolean is_member; CRM_CHECK(node != NULL, crm_err("Could not set state for unknown host to %s" CRM_XS " source=%s", state, source); return NULL); is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei); if (is_member) { node->when_lost = 0; if (membership) { node->last_seen = membership; } } if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) { char *last = node->state; if (is_member) { node->when_member = time(NULL); } else { node->when_member = 0; } node->state = strdup(state); crm_notice("Node %s state is now %s " CRM_XS " nodeid=%u previous=%s source=%s", node->uname, state, node->id, (last? last : "unknown"), source); if (peer_status_callback != NULL) { peer_status_callback(crm_status_nstate, node, last); } free(last); if (crm_autoreap && !is_member && !pcmk_is_set(node->flags, crm_remote_node)) { /* We only autoreap from the peer cache, not the remote peer cache, * because the latter should be managed only by - * crm_remote_peer_cache_refresh(). + * refresh_remote_nodes(). */ if(iter) { crm_notice("Purged 1 peer with " PCMK_XA_ID "=%u and/or uname=%s from the membership cache", node->id, node->uname); g_hash_table_iter_remove(iter); } else { reap_crm_member(node->id, node->uname); } node = NULL; } } else { crm_trace("Node %s state is unchanged (%s) " CRM_XS " nodeid=%u source=%s", node->uname, state, node->id, source); } return node; } /*! * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in,out] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, * otherwise reaping could invalidate the iterator. */ crm_node_t * pcmk__update_peer_state(const char *source, crm_node_t *node, const char *state, uint64_t membership) { return update_peer_state_iter(source, node, state, membership, NULL); } /*! * \internal * \brief Reap all nodes from cache whose membership information does not match * * \param[in] membership Membership ID of nodes to keep */ void pcmk__reap_unseen_nodes(uint64_t membership) { GHashTableIter iter; crm_node_t *node = NULL; crm_trace("Reaping unseen nodes..."); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) { if (node->last_seen != membership) { if (node->state) { /* * Calling update_peer_state_iter() allows us to * remove the node from crm_peer_cache without * invalidating our iterator */ update_peer_state_iter(__func__, node, CRM_NODE_LOST, membership, &iter); } else { crm_info("State of node %s[%u] is still unknown", node->uname, node->id); } } } } static crm_node_t * find_known_node(const char *id, const char *uname) { GHashTableIter iter; crm_node_t *node = NULL; crm_node_t *by_id = NULL; crm_node_t *by_name = NULL; if (uname) { g_hash_table_iter_init(&iter, known_node_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); by_name = node; break; } } } if (id) { g_hash_table_iter_init(&iter, known_node_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(strcasecmp(node->uuid, id) == 0) { crm_trace("ID match: %s= %p", id, node); by_id = node; break; } } } node = by_id; /* Good default */ if (by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %s/%s", by_id, id, uname); } else if (by_id == NULL && by_name) { crm_trace("Only one: %p for %s/%s", by_name, id, uname); if (id) { node = NULL; } else { node = by_name; } } else if (by_name == NULL && by_id) { crm_trace("Only one: %p for %s/%s", by_id, id, uname); if (uname) { node = NULL; } } else if (uname && by_id->uname && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) { /* Multiple nodes have the same uname in the CIB. * Return by_id. */ } else if (id && by_name->uuid && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) { /* Multiple nodes have the same id in the CIB. * Return by_name. */ node = by_name; } else { node = NULL; } if (node == NULL) { crm_debug("Couldn't find node%s%s%s%s", id? " " : "", id? id : "", uname? " with name " : "", uname? uname : ""); } return node; } static void known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data) { const char *id = crm_element_value(xml_node, PCMK_XA_ID); const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME); crm_node_t * node = NULL; CRM_CHECK(id != NULL && uname !=NULL, return); node = find_known_node(id, uname); if (node == NULL) { char *uniqueid = crm_generate_uuid(); node = pcmk__assert_alloc(1, sizeof(crm_node_t)); node->uname = pcmk__str_copy(uname); node->uuid = pcmk__str_copy(id); g_hash_table_replace(known_node_cache, uniqueid, node); } else if (pcmk_is_set(node->flags, crm_node_dirty)) { pcmk__str_update(&node->uname, uname); /* Node is in cache and hasn't been updated already, so mark it clean */ clear_peer_flags(node, crm_node_dirty); } } static void refresh_known_node_cache(xmlNode *cib) { crm_peer_init(); g_hash_table_foreach(known_node_cache, mark_dirty, NULL); crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG, known_node_cache_refresh_helper, NULL); /* Remove all old cache entries that weren't seen in the CIB */ g_hash_table_foreach_remove(known_node_cache, is_dirty, NULL); } void pcmk__refresh_node_caches_from_cib(xmlNode *cib) { - crm_remote_peer_cache_refresh(cib); + refresh_remote_nodes(cib); refresh_known_node_cache(cib); } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include int crm_terminate_member(int nodeid, const char *uname, void *unused) { return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); } crm_node_t * crm_get_peer(unsigned int id, const char *uname) { return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster); } crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags) { return pcmk__get_node(id, uname, NULL, flags); } int crm_remote_peer_cache_size(void) { unsigned int count = pcmk__cluster_num_remote_nodes(); return QB_MIN(count, INT_MAX); } // LCOV_EXCL_STOP // End deprecated API