diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c index fc2f74438f..46c6042c47 100644 --- a/daemons/controld/controld_callbacks.c +++ b/daemons/controld/controld_callbacks.c @@ -1,396 +1,396 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include // PRIu32 #include // bool #include // uint32_t #include // NULL #include #include #include #include #include #include #include /* From join_dc... */ extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); void crmd_ha_msg_filter(xmlNode * msg) { if (AM_I_DC) { const char *sys_from = pcmk__xe_get(msg, PCMK__XA_CRM_SYS_FROM); if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) { const char *from = pcmk__xe_get(msg, PCMK__XA_SRC); if (!controld_is_local_node(from)) { int level = LOG_INFO; const char *op = pcmk__xe_get(msg, PCMK__XA_CRM_TASK); /* make sure the election happens NOW */ if (controld_globals.fsa_state != S_ELECTION) { ha_msg_input_t new_input; level = LOG_WARNING; new_input.msg = msg; register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input, __func__); } do_crm_log(level, "Another DC detected: %s (op=%s)", from, op); goto done; } } } else { const char *sys_to = pcmk__xe_get(msg, PCMK__XA_CRM_SYS_TO); if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) { return; } } /* crm_log_xml_trace(msg, "HA[inbound]"); */ route_message(C_HA_MESSAGE, msg); done: controld_trigger_fsa(); } /*! * \internal * \brief Check whether a node is online * * \param[in] node Node to check * * \retval -1 if completely dead * \retval 0 if partially alive * \retval 1 if completely alive */ static int node_alive(const pcmk__node_status_t *node) { if (pcmk_is_set(node->flags, pcmk__node_status_remote)) { // Pacemaker Remote nodes can't be partially alive if (pcmk__str_eq(node->state, PCMK_VALUE_MEMBER, pcmk__str_none)) { return 1; } return -1; } else if (pcmk__cluster_is_node_active(node)) { // Completely up cluster node: both cluster member and peer return 1; } else if (!pcmk_is_set(node->processes, crm_get_cluster_proc()) && !pcmk__str_eq(node->state, PCMK_VALUE_MEMBER, pcmk__str_none)) { // Completely down cluster node: neither cluster member nor peer return -1; } // Partially up cluster node: only cluster member or only peer return 0; } #define state_text(state) ((state)? (const char *)(state) : "in unknown state") // @TODO This is insanely long, and some parts should be functionized void peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, const void *data) { uint32_t old = 0; bool appeared = FALSE; bool is_remote = pcmk_is_set(node->flags, pcmk__node_status_remote); controld_node_pending_timer(node); /* The controller waits to receive some information from the membership * layer before declaring itself operational. If this is being called for a * cluster node, indicate that we have it. */ if (!is_remote) { controld_set_fsa_input_flags(R_PEER_DATA); } if ((type == pcmk__node_update_processes) && pcmk_is_set(node->processes, crm_get_cluster_proc()) && !AM_I_DC && !is_remote) { /* relay_message() on the recipient ignores these messages, but * libcrmcluster will have cached the node name by then */ xmlNode *query = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD, NULL, CRM_SYSTEM_CRMD, CRM_OP_HELLO, NULL); crm_debug("Sending hello to node %" PRIu32 " so that it learns our " "node name", node->cluster_layer_id); pcmk__cluster_send_message(node, pcmk_ipc_controld, query); pcmk__xml_free(query); } if (node->name == NULL) { return; } switch (type) { case pcmk__node_update_name: /* If we've never seen the node, then it also won't be in the status section */ crm_info("%s node %s is now %s", (is_remote? "Remote" : "Cluster"), node->name, state_text(node->state)); return; case pcmk__node_update_state: /* This callback should not be called unless the state actually * changed, but here's a failsafe just in case. */ CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei), return); crm_info("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), node->name, state_text(node->state), state_text(data)); if (pcmk__str_eq(PCMK_VALUE_MEMBER, node->state, pcmk__str_none)) { appeared = TRUE; if (!is_remote) { controld_remove_fencing_cleanup(node->name); } } else { controld_remove_failed_sync_node(node->name); controld_remove_voter(node->name); } crmd_alert_node_event(node); break; case pcmk__node_update_processes: CRM_CHECK(data != NULL, return); old = *(const uint32_t *)data; appeared = pcmk_is_set(node->processes, crm_get_cluster_proc()); { const char *dc_s = controld_globals.dc_name; if ((dc_s == NULL) && AM_I_DC) { dc_s = PCMK_VALUE_TRUE; } crm_info("Node %s is %s a peer " QB_XS " DC=%s old=%#07x new=%#07x", node->name, (appeared? "now" : "no longer"), pcmk__s(dc_s, ""), old, node->processes); } if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) { /* Peer status did not change. This should not be possible, * since we don't track process flags other than peer status. */ crm_trace("Process flag %#7x did not change from %#7x to %#7x", crm_get_cluster_proc(), old, node->processes); return; } if (!appeared) { node->peer_lost = time(NULL); controld_remove_failed_sync_node(node->name); controld_remove_voter(node->name); } if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { crm_trace("Ignoring peer status change because not connected to CIB"); return; } else if (controld_globals.fsa_state == S_STOPPING) { crm_trace("Ignoring peer status change because stopping"); return; } if (!appeared && controld_is_local_node(node->name)) { /* Did we get evicted? */ crm_notice("Our peer connection failed"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL); } else if (pcmk__str_eq(node->name, controld_globals.dc_name, pcmk__str_casei) && !pcmk__cluster_is_node_active(node)) { /* The DC has left, so delete its transient attributes and * trigger a new election. * * A DC sends its shutdown request to all peers, who update the * DC's expected state to down. This avoids fencing upon * deletion of its transient attributes. */ crm_notice("Our peer on the DC (%s) is dead", controld_globals.dc_name); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); controld_delete_node_state(node->name, controld_section_attrs, cib_none); } else if (AM_I_DC || pcmk_is_set(controld_globals.flags, controld_dc_left) || (controld_globals.dc_name == NULL)) { /* This only needs to be done once, so normally the DC should do * it. However if there is no DC, every node must do it, since * there is no other way to ensure some one node does it. */ if (appeared) { controld_trigger_fencing_history_sync(false); } else { controld_delete_node_state(node->name, controld_section_attrs, cib_none); } } break; } if (AM_I_DC) { xmlNode *update = NULL; uint32_t flags = controld_node_update_peer; int alive = node_alive(node); pcmk__graph_action_t *down = match_down_event(node->xml_id); crm_trace("Alive=%d, appeared=%d, down=%d", alive, appeared, (down? down->id : -1)); if (appeared && (alive > 0) && !is_remote) { register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } if (down) { const char *task = pcmk__xe_get(down->xml, PCMK_XA_OPERATION); if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) { const bool confirmed = pcmk_is_set(down->flags, pcmk__graph_action_confirmed); - /* tengine_stonith_callback() confirms fence actions */ + // fencing_cb() confirms fence actions crm_trace("Updating CIB %s fencer reported fencing of %s complete", (confirmed? "after" : "before"), node->name); } else if (!appeared && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_casei)) { // Shutdown actions are immediately confirmed (i.e. no_wait) if (!is_remote) { flags |= controld_node_update_join |controld_node_update_expected; crmd_peer_down(node, FALSE); check_join_state(controld_globals.fsa_state, __func__); } if (alive >= 0) { crm_info("%s of peer %s is in progress " QB_XS " action=%d", task, node->name, down->id); } else { crm_notice("%s of peer %s is complete " QB_XS " action=%d", task, node->name, down->id); pcmk__update_graph(controld_globals.transition_graph, down); trigger_graph(); } } else { const char *liveness = "alive"; if (alive == 0) { liveness = "partially alive"; } else if (alive < 0) { liveness = "dead"; } crm_trace("Node %s is %s, was expected to %s (op %d)", node->name, liveness, task, down->id); } } else if (appeared == FALSE) { if ((controld_globals.transition_graph == NULL) || (controld_globals.transition_graph->id <= 0)) { crm_info("Stonith/shutdown of node %s is unknown to the " "current DC", node->name); } else { crm_warn("Stonith/shutdown of node %s was not expected", node->name); } if (!is_remote) { crm_update_peer_join(__func__, node, controld_join_none); check_join_state(controld_globals.fsa_state, __func__); } abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node failure", NULL); fail_incompletable_actions(controld_globals.transition_graph, node->xml_id); } else { crm_trace("Node %s came up, was not expected to be down", node->name); } if (is_remote) { /* A pacemaker_remote node won't have its cluster status updated * in the CIB by membership-layer callbacks, so do it here. */ flags |= controld_node_update_cluster; /* Trigger resource placement on newly integrated nodes */ if (appeared) { abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Pacemaker Remote node integrated", NULL); } } if (!appeared && (type == pcmk__node_update_processes) && (node->when_member > 1)) { /* The node left CPG but is still a cluster member. Set its * membership time to 1 to record it in the cluster state as a * boolean, so we don't fence it due to * PCMK_OPT_NODE_PENDING_TIMEOUT. */ node->when_member = 1; flags |= controld_node_update_cluster; controld_node_pending_timer(node); } /* Update the CIB node state */ update = create_node_state_update(node, flags, NULL, __func__); if (update == NULL) { crm_debug("Node state update not yet possible for %s", node->name); } else { fsa_cib_anon_update(PCMK_XE_STATUS, update); } pcmk__xml_free(update); } controld_trigger_fsa(); } gboolean crm_fsa_trigger(gpointer user_data) { crm_trace("Invoking FSA (queue len: %u)", controld_fsa_message_queue_length()); s_crmd_fsa(C_FSA_INTERNAL); crm_trace("Exited FSA (queue len: %u)", controld_fsa_message_queue_length()); return G_SOURCE_CONTINUE; } diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index d30ee416be..991b9502ef 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -1,1094 +1,1093 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event); #define DEFAULT_FENCING_MAX_ATTEMPTS 10 static bool fence_reaction_panic = false; static unsigned long int fencing_max_attempts = DEFAULT_FENCING_MAX_ATTEMPTS; /* * Fencing failure counting * * We don't want to get stuck in a permanent fencing loop. Keep track of the * number of fencing failures for each target node, and the most we'll restart a * transition for. */ static GHashTable *fencing_fail_counts = NULL; /*! * \internal * \brief Update max fencing attempts before giving up * * \param[in] value New max fencing attempts */ static void update_fencing_max_attempts(const char *value) { int score = 0; int rc = pcmk_parse_score(value, &score, DEFAULT_FENCING_MAX_ATTEMPTS); // The option validator ensures invalid values shouldn't be possible CRM_CHECK((rc == pcmk_rc_ok) && (score > 0), return); if (fencing_max_attempts != score) { crm_debug("Maximum fencing attempts per transition is now %d (was %lu)", score, fencing_max_attempts); } fencing_max_attempts = score; } /*! * \internal * \brief Configure reaction to notification of local node being fenced * * \param[in] reaction_s Reaction type */ static void set_fence_reaction(const char *reaction_s) { if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) { fence_reaction_panic = true; } else { if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) { crm_warn("Invalid value '%s' for %s, using 'stop'", reaction_s, PCMK_OPT_FENCING_REACTION); } fence_reaction_panic = false; } } /*! * \internal * \brief Configure fencing options based on the CIB * * \param[in,out] options Name/value pairs for configured options */ void controld_configure_fencing(GHashTable *options) { const char *value = NULL; value = g_hash_table_lookup(options, PCMK_OPT_FENCING_REACTION); set_fence_reaction(value); value = g_hash_table_lookup(options, PCMK_OPT_FENCING_MAX_ATTEMPTS); update_fencing_max_attempts(value); } static bool too_many_fencing_failures(const char *target) { GHashTableIter iter; gpointer value = NULL; if (fencing_fail_counts == NULL) { return false; } if (target == NULL) { g_hash_table_iter_init(&iter, fencing_fail_counts); while (g_hash_table_iter_next(&iter, (gpointer *) &target, &value)) { if (GPOINTER_TO_INT(value) >= fencing_max_attempts) { goto too_many; } } } else if (g_hash_table_lookup_extended(fencing_fail_counts, target, NULL, &value) && (GPOINTER_TO_INT(value) >= fencing_max_attempts)) { goto too_many; } return false; too_many: crm_warn("Too many failures (%d) to fence %s, giving up", GPOINTER_TO_INT(value), target); return true; } /*! * \internal * \brief Reset the count of failed fencing operations for a node * * \param[in] target Name of node whose count to reset, or \c NULL to reset all */ void controld_reset_fencing_fail_count(const char *target) { if (fencing_fail_counts == NULL) { return; } if (target != NULL) { g_hash_table_remove(fencing_fail_counts, target); } else { g_hash_table_remove_all(fencing_fail_counts); } } static void increment_fencing_fail_count(const char *target) { gpointer key = NULL; gpointer value = NULL; if (fencing_fail_counts == NULL) { fencing_fail_counts = pcmk__strikey_table(free, NULL); } if (g_hash_table_lookup_extended(fencing_fail_counts, target, &key, &value)) { gpointer new_value = GINT_TO_POINTER(GPOINTER_TO_INT(value) + 1); // Increment value in the table without freeing key g_hash_table_steal(fencing_fail_counts, key); g_hash_table_insert(fencing_fail_counts, key, new_value); } else { g_hash_table_insert(fencing_fail_counts, pcmk__str_copy(target), GINT_TO_POINTER(1)); } } /* end fencing fail count functions */ static void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if (rc < pcmk_ok) { crm_err("Fencing update %d for %s: failed - %s (%d)", call_id, (char *)user_data, pcmk_strerror(rc), rc); crm_log_xml_warn(msg, "Failed update"); abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown, "CIB update failed", NULL); } else { crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); } } /*! * \internal * \brief Update a fencing target's node state * * \param[in] target Node that was successfully fenced * \param[in] target_xml_id CIB XML ID of target */ static void update_node_state_after_fencing(const char *target, const char *target_xml_id) { int rc = pcmk_ok; pcmk__node_status_t *peer = NULL; xmlNode *node_state = NULL; /* We (usually) rely on the membership layer to do * controld_node_update_cluster, and the peer status callback to do * controld_node_update_peer, because the node might have already rejoined - * before we get the stonith result here. + * before we get the fencing result here. */ uint32_t flags = controld_node_update_join|controld_node_update_expected; CRM_CHECK((target != NULL) && (target_xml_id != NULL), return); // Ensure target is cached peer = pcmk__get_node(0, target, target_xml_id, pcmk__node_search_any); CRM_CHECK(peer != NULL, return); if (peer->state == NULL) { /* Usually, we rely on the membership layer to update the cluster state * in the CIB. However, if the node has never been seen, do it here, so * the node is not considered unclean. */ flags |= controld_node_update_cluster; } if (peer->xml_id == NULL) { crm_info("Recording XML ID '%s' for node '%s'", target_xml_id, target); peer->xml_id = pcmk__str_copy(target_xml_id); } crmd_peer_down(peer, TRUE); node_state = create_node_state_update(peer, flags, NULL, __func__); pcmk__xe_set(node_state, PCMK_XA_ID, target_xml_id); if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) { char *now_s = pcmk__ttoa(time(NULL)); pcmk__xe_set(node_state, PCMK__XA_NODE_FENCED, now_s); free(now_s); } rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn, PCMK_XE_STATUS, node_state, cib_can_create); pcmk__xml_free(node_state); crm_debug("Updating node state for %s after fencing (call %d)", target, rc); fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated); controld_delete_node_state(peer->name, controld_section_all, cib_none); } /*! * \internal * \brief Abort transition due to fencing failure * * \param[in] abort_action Whether to restart or stop transition * \param[in] target Don't restart if this node has too many failures * (\c NULL to check if any node has too many failures) * \param[in] reason Log this fencing action XML as abort reason (can be * \c NULL) */ static void abort_for_fencing_failure(enum pcmk__graph_next abort_action, const char *target, const xmlNode *reason) { /* If fencing repeatedly fails, we eventually give up on starting a new * transition for that reason. */ if ((abort_action != pcmk__graph_wait) && too_many_fencing_failures(target)) { abort_action = pcmk__graph_wait; } abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed", reason); } /* * Fencing cleanup list * * If the DC is fenced, proper notifications might not go out. The fencing * cleanup list allows the cluster to (re-)send notifications once a new DC is * elected. */ static GList *fencing_cleanup_list = NULL; /*! * \internal * \brief Add a node to the fencing cleanup list * * \param[in] target Name of node to add */ static void add_fencing_cleanup(const char *target) { fencing_cleanup_list = g_list_append(fencing_cleanup_list, pcmk__str_copy(target)); } /*! * \internal * \brief Remove a node from the fencing cleanup list * * \param[in] Name of node to remove */ void controld_remove_fencing_cleanup(const char *target) { GList *iter = fencing_cleanup_list; while (iter != NULL) { GList *tmp = iter; char *iter_name = tmp->data; iter = iter->next; if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) { crm_trace("Removing %s from the cleanup list", iter_name); fencing_cleanup_list = g_list_delete_link(fencing_cleanup_list, tmp); free(iter_name); } } } /*! * \internal * \brief Purge all entries from the fencing cleanup list */ void controld_purge_fencing_cleanup(void) { for (GList *iter = fencing_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_info("Purging %s from fencing cleanup list", target); free(target); } g_list_free(fencing_cleanup_list); fencing_cleanup_list = NULL; } /*! * \internal * \brief Send fencing updates for all entries in cleanup list, then purge it */ void controld_execute_fencing_cleanup(void) { for (GList *iter = fencing_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; pcmk__node_status_t *target_node = pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member); const char *uuid = pcmk__cluster_get_xml_id(target_node); crm_notice("Marking %s, target of a previous fencing action, as clean", target); update_node_state_after_fencing(target, uuid); free(target); } g_list_free(fencing_cleanup_list); fencing_cleanup_list = NULL; } /* end fencing cleanup list functions */ /* Fencer API client * * Functions that need to interact directly with the fencer via its API */ static stonith_t *fencer_api = NULL; static mainloop_timer_t *controld_fencer_connect_timer = NULL; static char *te_client_id = NULL; static bool fail_incompletable_fencing(pcmk__graph_t *graph) { GList *lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return false; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GList *lpc2 = NULL; pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data; if ((action->type != pcmk__cluster_graph_action) || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { continue; } task = pcmk__xe_get(action->xml, PCMK_XA_OPERATION); if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) { pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); last_action = action->xml; pcmk__update_graph(graph, action); crm_notice("Failing action %d (%s): fencer terminated", action->id, pcmk__xe_id(action->xml)); } } } if (last_action != NULL) { crm_warn("Fencing failure resulted in unrunnable actions"); abort_for_fencing_failure(pcmk__graph_restart, NULL, last_action); return true; } return false; } static void destroy_fencer_connection(stonith_t *st, stonith_event_t *e) { controld_cleanup_fencing_history_sync(st, false); if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { crm_err("Lost fencer connection (will attempt to reconnect)"); if (!mainloop_timer_running(controld_fencer_connect_timer)) { mainloop_timer_start(controld_fencer_connect_timer); } } else { crm_info("Disconnected from fencer"); } if (fencer_api != NULL) { /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ if (fencer_api->state != stonith_disconnected) { fencer_api->cmds->disconnect(st); } fencer_api->cmds->remove_notification(fencer_api, NULL); } if (AM_I_DC) { fail_incompletable_fencing(controld_globals.transition_graph); trigger_graph(); } } /*! * \internal * \brief Handle an event notification from the fencing API * * \param[in] st Fencing API connection (ignored) * \param[in] event Fencing API event notification */ static void handle_fence_notification(stonith_t *st, stonith_event_t *event) { bool succeeded = true; const char *executioner = "the cluster"; const char *client = "a client"; const char *reason = NULL; int exec_status; if (te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } if (event == NULL) { crm_err("Notify data not found"); return; } if (event->executioner != NULL) { executioner = event->executioner; } if (event->client_origin != NULL) { client = event->client_origin; } exec_status = stonith__event_execution_status(event); if ((stonith__event_exit_status(event) != CRM_EX_OK) || (exec_status != PCMK_EXEC_DONE)) { succeeded = false; if (exec_status == PCMK_EXEC_DONE) { exec_status = PCMK_EXEC_ERROR; } } reason = stonith__event_exit_reason(event); crmd_alert_fencing_op(event); if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) { // Unfencing doesn't need special handling, just a log message if (succeeded) { crm_notice("%s was unfenced by %s at the request of %s@%s", event->target, executioner, client, event->origin); } else { crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d", event->target, executioner, pcmk_exec_status_str(exec_status), ((reason == NULL)? "" : ": "), ((reason == NULL)? "" : reason), stonith__event_exit_status(event)); } return; } if (succeeded && controld_is_local_node(event->target)) { /* We were notified of our own fencing. Most likely, either fencing was * misconfigured, or fabric fencing that doesn't cut cluster * communication is in use. * * Either way, shutting down the local host is a good idea, to require * administrator intervention. Also, other nodes would otherwise likely * set our status to lost because of the fencing callback and discard * our subsequent election votes as "not part of our cluster". */ crm_crit("We were allegedly just fenced by %s for %s!", executioner, event->origin); // Dumps blackbox if enabled if (fence_reaction_panic) { pcmk__panic("Notified of own fencing"); } else { crm_exit(CRM_EX_FATAL); } return; // Should never get here } /* Update the count of fencing failures for this target, in case we become * DC later. The current DC has already updated its fail count in - * tengine_stonith_callback(). + * fencing_cb(). */ if (!AM_I_DC) { if (succeeded) { controld_reset_fencing_fail_count(event->target); } else { increment_fencing_fail_count(event->target); } } crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: " "%s%s%s%s " QB_XS " event=%s", event->target, (succeeded? "" : " not"), event->action, executioner, client, event->origin, (succeeded? "OK" : pcmk_exec_status_str(exec_status)), ((reason == NULL)? "" : " ("), ((reason == NULL)? "" : reason), ((reason == NULL)? "" : ")"), event->id); if (succeeded) { const uint32_t flags = pcmk__node_search_any |pcmk__node_search_cluster_cib; pcmk__node_status_t *peer = pcmk__search_node_caches(0, event->target, NULL, flags); const char *uuid = NULL; if (peer == NULL) { return; } uuid = pcmk__cluster_get_xml_id(peer); if (AM_I_DC) { /* The DC always sends updates */ update_node_state_after_fencing(event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. * Unfortunately, the controller doesn't have a simple, reliable way * to map hosts to guests. It might be possible to track this in the * peer cache via refresh_remote_nodes(). For now, we rely on the * scheduler creating fence pseudo-events for the guests. */ if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) { /* Abort the current transition if it wasn't the cluster that * initiated fencing. */ crm_info("External fencing operation from %s fenced %s", client, event->target); abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "External Fencing Operation", NULL); } } else if (pcmk__str_eq(controld_globals.dc_name, event->target, pcmk__str_null_matches|pcmk__str_casei) && !pcmk_is_set(peer->flags, pcmk__node_status_remote)) { // Assume the target was our DC if we don't currently have one if (controld_globals.dc_name != NULL) { crm_notice("Fencing target %s was our DC", event->target); } else { crm_notice("Fencing target %s may have been our DC", event->target); } /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (controld_is_local_node(event->executioner)) { update_node_state_after_fencing(event->target, uuid); } add_fencing_cleanup(event->target); } /* If the target is a remote node, and we host its connection, * immediately fail all monitors so it can be recovered quickly. * The connection won't necessarily drop when a remote node is fenced, * so the failure might not otherwise be detected until the next poke. */ if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) { remote_ra_fail(event->target); } crmd_peer_down(peer, TRUE); } } /*! * \brief Connect to fencer * * \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer * * \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry * \note If user_data is NULL, this will wait 2s between attempts, for up to * 30 attempts, meaning the controller could be blocked as long as 58s. */ gboolean controld_timer_fencer_connect(gpointer user_data) { int rc = pcmk_ok; if (fencer_api == NULL) { fencer_api = stonith__api_new(); if (fencer_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return G_SOURCE_REMOVE; } } if (fencer_api->state != stonith_disconnected) { crm_trace("Already connected to fencer, no need to retry"); return G_SOURCE_REMOVE; } if (user_data == NULL) { // Blocking (retry failures now until successful) rc = stonith__api_connect_retry(fencer_api, crm_system_name, 30); if (rc != pcmk_rc_ok) { crm_err("Could not connect to fencer in 30 attempts: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); } } else { // Non-blocking (retry failures later in main loop) rc = fencer_api->cmds->connect(fencer_api, crm_system_name, NULL); if (controld_fencer_connect_timer == NULL) { controld_fencer_connect_timer = mainloop_timer_add("controld_fencer_connect", 1000, TRUE, controld_timer_fencer_connect, GINT_TO_POINTER(TRUE)); } if (rc != pcmk_ok) { if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { crm_notice("Fencer connection failed (will retry): %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); if (!mainloop_timer_running(controld_fencer_connect_timer)) { mainloop_timer_start(controld_fencer_connect_timer); } return G_SOURCE_CONTINUE; } else { crm_info("Fencer connection failed (ignoring because no longer required): %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); } return G_SOURCE_REMOVE; } } if (rc == pcmk_ok) { stonith_api_operations_t *cmds = fencer_api->cmds; cmds->register_notification(fencer_api, PCMK__VALUE_ST_NOTIFY_DISCONNECT, destroy_fencer_connection); cmds->register_notification(fencer_api, PCMK__VALUE_ST_NOTIFY_FENCE, handle_fence_notification); cmds->register_notification(fencer_api, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, tengine_stonith_history_synced); controld_trigger_fencing_history_sync(true); crm_notice("Fencer successfully connected"); } return G_SOURCE_REMOVE; } void controld_disconnect_fencer(bool destroy) { if (fencer_api != NULL) { // Prevent fencer connection from coming up again controld_clear_fsa_input_flags(R_ST_REQUIRED); if (fencer_api->state != stonith_disconnected) { fencer_api->cmds->disconnect(fencer_api); } fencer_api->cmds->remove_notification(fencer_api, NULL); } if (destroy) { if (fencer_api != NULL) { fencer_api->cmds->free(fencer_api); fencer_api = NULL; } if (controld_fencer_connect_timer) { mainloop_timer_del(controld_fencer_connect_timer); controld_fencer_connect_timer = NULL; } if (te_client_id) { free(te_client_id); te_client_id = NULL; } } } static gboolean do_stonith_history_sync(gpointer user_data) { if ((fencer_api != NULL) && (fencer_api->state != stonith_disconnected)) { stonith_history_t *history = NULL; controld_cleanup_fencing_history_sync(fencer_api, false); fencer_api->cmds->history(fencer_api, st_opt_sync_call|st_opt_broadcast, NULL, &history, 5); stonith__history_free(history); return TRUE; } else { crm_info("Skip triggering stonith history-sync as stonith is disconnected"); return FALSE; } } static void -tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) +fencing_cb(stonith_t *stonith, stonith_callback_data_t *data) { char *uuid = NULL; int stonith_id = -1; int transition_id = -1; pcmk__graph_action_t *action = NULL; const char *target = NULL; if ((data == NULL) || (data->userdata == NULL)) { crm_err("Ignoring fence operation %d result: " "No transition key given (bug?)", ((data == NULL)? -1 : data->call_id)); return; } if (!AM_I_DC) { const char *reason = stonith__exit_reason(data); if (reason == NULL) { reason = pcmk_exec_status_str(stonith__execution_status(data)); } crm_notice("Result of fence operation %d: %d (%s) " QB_XS " key=%s", data->call_id, stonith__exit_status(data), reason, (const char *) data->userdata); return; } CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id, &stonith_id, NULL), goto bail); if (controld_globals.transition_graph->complete || (stonith_id < 0) || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none) || (controld_globals.transition_graph->id != transition_id)) { crm_info("Ignoring fence operation %d result: " "Not from current transition " QB_XS " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)", data->call_id, pcmk__btoa(controld_globals.transition_graph->complete), stonith_id, uuid, controld_globals.te_uuid, transition_id, controld_globals.transition_graph->id); goto bail; } action = controld_get_action(stonith_id); if (action == NULL) { crm_err("Ignoring fence operation %d result: " "Action %d not found in transition graph (bug?) " QB_XS " uuid=%s transition=%d", data->call_id, stonith_id, uuid, transition_id); goto bail; } target = pcmk__xe_get(action->xml, PCMK__META_ON_NODE); if (target == NULL) { crm_err("Ignoring fence operation %d result: No target given (bug?)", data->call_id); goto bail; } stop_te_timer(action); if (stonith__exit_status(data) == CRM_EX_OK) { const char *uuid = pcmk__xe_get(action->xml, PCMK__META_ON_NODE_UUID); const char *op = crm_meta_value(action->params, PCMK__META_STONITH_ACTION); crm_info("Fence operation %d for %s succeeded", data->call_id, target); if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) { te_action_confirmed(action, NULL); if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) { const char *value = NULL; char *now = pcmk__ttoa(time(NULL)); gboolean is_remote_node = FALSE; /* This check is not 100% reliable, since this node is not * guaranteed to have the remote node cached. However, it * doesn't have to be reliable, since the attribute manager can * learn a node's "remoteness" by other means sooner or later. * This allows it to learn more quickly if this node does have * the information. */ if (g_hash_table_lookup(pcmk__remote_peer_cache, uuid) != NULL) { is_remote_node = TRUE; } update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, is_remote_node); free(now); value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL); update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, is_remote_node); value = crm_meta_value(action->params, PCMK__META_DIGESTS_SECURE); update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, is_remote_node); } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) { update_node_state_after_fencing(target, uuid); pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update); } } controld_reset_fencing_fail_count(target); } else { enum pcmk__graph_next abort_action = pcmk__graph_restart; int status = stonith__execution_status(data); const char *reason = stonith__exit_reason(data); if (reason == NULL) { if (status == PCMK_EXEC_DONE) { reason = "Agent returned error"; } else { reason = pcmk_exec_status_str(status); } } pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); /* If no fence devices were available, there's no use in immediately * checking again, so don't start a new transition in that case. */ if (status == PCMK_EXEC_NO_FENCE_DEVICE) { crm_warn("Fence operation %d for %s failed: %s " "(aborting transition and giving up for now)", data->call_id, target, reason); abort_action = pcmk__graph_wait; } else { crm_notice("Fence operation %d for %s failed: %s " "(aborting transition)", data->call_id, target, reason); } /* Increment the fail count now, so abort_for_fencing_failure() can * check it. Non-DC nodes will increment it in * handle_fence_notification(). */ increment_fencing_fail_count(target); abort_for_fencing_failure(abort_action, target, NULL); } pcmk__update_graph(controld_globals.transition_graph, action); trigger_graph(); bail: free(data->userdata); free(uuid); return; } static int fence_with_delay(const char *target, const char *type, int delay) { uint32_t options = st_opt_none; // Group of enum stonith_call_options int timeout_sec = pcmk__timeout_ms2s(controld_globals.transition_graph->fencing_timeout); if (crmd_join_phase_count(controld_join_confirmed) == 1) { stonith__set_call_options(options, target, st_opt_allow_self_fencing); } return fencer_api->cmds->fence_with_delay(fencer_api, options, target, type, timeout_sec, 0, delay); } /*! * \internal * \brief Execute a fencing action from a transition graph * * \param[in] graph Transition graph being executed (ignored) * \param[in] action Fencing action to execute * * \return Standard Pacemaker return code */ int controld_execute_fence_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { int rc = 0; const char *id = pcmk__xe_id(action->xml); const char *uuid = pcmk__xe_get(action->xml, PCMK__META_ON_NODE_UUID); const char *target = pcmk__xe_get(action->xml, PCMK__META_ON_NODE); const char *type = crm_meta_value(action->params, PCMK__META_STONITH_ACTION); char *transition_key = NULL; const char *priority_delay = NULL; int delay_i = 0; gboolean invalid_action = FALSE; int timeout_sec = pcmk__timeout_ms2s(controld_globals.transition_graph->fencing_timeout); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return EPROTO; } priority_delay = crm_meta_value(action->params, PCMK_OPT_PRIORITY_FENCING_DELAY); crm_notice("Requesting fencing (%s) targeting node %s " QB_XS " action=%s timeout=%i%s%s", type, target, id, timeout_sec, priority_delay ? " priority_delay=" : "", priority_delay ? priority_delay : ""); /* Passing NULL means block until we can connect... */ controld_timer_fencer_connect(NULL); pcmk__scan_min_int(priority_delay, &delay_i, 0); rc = fence_with_delay(target, type, delay_i); transition_key = pcmk__transition_key(controld_globals.transition_graph->id, action->id, 0, controld_globals.te_uuid), fencer_api->cmds->register_callback(fencer_api, rc, (timeout_sec + (delay_i > 0 ? delay_i : 0)), st_opt_timeout_updates, transition_key, - "tengine_stonith_callback", - tengine_stonith_callback); + "fencing_cb", fencing_cb); return pcmk_rc_ok; } bool controld_valid_fencing_watchdog_timeout(const char *value) { const char *our_nodename = controld_globals.cluster->priv->node_name; if ((fencer_api == NULL) || (fencer_api->state == stonith_disconnected) || !stonith__watchdog_fencing_enabled_for_node_api(fencer_api, our_nodename)) { // Anything is valid since it won't be used return true; } return pcmk__valid_fencing_watchdog_timeout(value); } /* end stonith API client functions */ /* * Fencing history synchronization * * Each node's fencer keeps track of a cluster-wide fencing history. When a node * joins or leaves, we need to synchronize the history across all nodes. */ static crm_trigger_t *fencing_history_sync_trigger = NULL; static mainloop_timer_t *stonith_history_sync_timer_short = NULL; static mainloop_timer_t *stonith_history_sync_timer_long = NULL; void controld_cleanup_fencing_history_sync(stonith_t *st, bool free_timers) { if (free_timers) { mainloop_timer_del(stonith_history_sync_timer_short); stonith_history_sync_timer_short = NULL; mainloop_timer_del(stonith_history_sync_timer_long); stonith_history_sync_timer_long = NULL; } else { mainloop_timer_stop(stonith_history_sync_timer_short); mainloop_timer_stop(stonith_history_sync_timer_long); } if (st) { st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED); } } static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event) { controld_cleanup_fencing_history_sync(st, false); crm_debug("Fence-history synced - cancel all timers"); } static gboolean stonith_history_sync_set_trigger(gpointer user_data) { mainloop_set_trigger(fencing_history_sync_trigger); return FALSE; } void controld_trigger_fencing_history_sync(bool long_timeout) { /* trigger a sync in 5s to give more nodes the * chance to show up so that we don't create * unnecessary stonith-history-sync traffic * * the long timeout of 30s is there as a fallback * so that after a successful connection to fenced * we will wait for 30s for the DC to trigger a * history-sync * if this doesn't happen we trigger a sync locally * (e.g. fenced segfaults and is restarted by pacemakerd) */ /* as we are finally checking the stonith-connection * in do_stonith_history_sync we should be fine * leaving stonith_history_sync_time and fencing_history_sync_trigger * around */ if (fencing_history_sync_trigger == NULL) { fencing_history_sync_trigger = mainloop_add_trigger(G_PRIORITY_LOW, do_stonith_history_sync, NULL); } if (long_timeout) { if(stonith_history_sync_timer_long == NULL) { stonith_history_sync_timer_long = mainloop_timer_add("history_sync_long", 30000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 30 seconds"); mainloop_timer_start(stonith_history_sync_timer_long); } else { if(stonith_history_sync_timer_short == NULL) { stonith_history_sync_timer_short = mainloop_timer_add("history_sync_short", 5000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); mainloop_timer_start(stonith_history_sync_timer_short); } } /* end stonith history synchronization functions */