diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index 47cc87ed83..f3ab3069c7 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -1,1053 +1,1053 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event); /* * stonith failure counting * * We don't want to get stuck in a permanent fencing loop. Keep track of the * number of fencing failures for each target node, and the most we'll restart a * transition for. */ struct st_fail_rec { int count; }; static bool fence_reaction_panic = FALSE; static unsigned long int stonith_max_attempts = 10; static GHashTable *stonith_failures = NULL; void update_stonith_max_attempts(const char *value) { stonith_max_attempts = char2score(value); if (stonith_max_attempts < 1UL) { stonith_max_attempts = 10UL; } } void set_fence_reaction(const char *reaction_s) { if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) { fence_reaction_panic = TRUE; } else { if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) { crm_warn("Invalid value '%s' for %s, using 'stop'", reaction_s, XML_CONFIG_ATTR_FENCE_REACTION); } fence_reaction_panic = FALSE; } } static gboolean too_many_st_failures(const char *target) { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *value = NULL; if (stonith_failures == NULL) { return FALSE; } if (target == NULL) { g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { if (value->count >= stonith_max_attempts) { target = (const char*)key; goto too_many; } } } else { value = g_hash_table_lookup(stonith_failures, target); if ((value != NULL) && (value->count >= stonith_max_attempts)) { goto too_many; } } return FALSE; too_many: crm_warn("Too many failures (%d) to fence %s, giving up", value->count, target); return TRUE; } /*! * \internal * \brief Reset a stonith fail count * * \param[in] target Name of node to reset, or NULL for all */ void st_fail_count_reset(const char *target) { if (stonith_failures == NULL) { return; } if (target) { struct st_fail_rec *rec = NULL; rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count = 0; } } else { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *rec = NULL; g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rec)) { rec->count = 0; } } } static void st_fail_count_increment(const char *target) { struct st_fail_rec *rec = NULL; if (stonith_failures == NULL) { stonith_failures = pcmk__strkey_table(free, free); } rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count++; } else { rec = malloc(sizeof(struct st_fail_rec)); if(rec == NULL) { return; } rec->count = 1; g_hash_table_insert(stonith_failures, strdup(target), rec); } } /* end stonith fail count functions */ static void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if (rc < pcmk_ok) { crm_err("Fencing update %d for %s: failed - %s (%d)", call_id, (char *)user_data, pcmk_strerror(rc), rc); crm_log_xml_warn(msg, "Failed update"); abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL); } else { crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); } } static void send_stonith_update(pcmk__graph_action_t *action, const char *target, const char *uuid) { int rc = pcmk_ok; crm_node_t *peer = NULL; /* We (usually) rely on the membership layer to do node_update_cluster, * and the peer status callback to do node_update_peer, because the node * might have already rejoined before we get the stonith result here. */ int flags = node_update_join | node_update_expected; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = NULL; CRM_CHECK(target != NULL, return); CRM_CHECK(uuid != NULL, return); /* Make sure the membership and join caches are accurate */ peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return); if (peer->state == NULL) { /* Usually, we rely on the membership layer to update the cluster state * in the CIB. However, if the node has never been seen, do it here, so * the node is not considered unclean. */ flags |= node_update_cluster; } if (peer->uuid == NULL) { crm_info("Recording uuid '%s' for node '%s'", uuid, target); peer->uuid = strdup(uuid); } crmd_peer_down(peer, TRUE); /* Generate a node state update for the CIB */ node_state = create_node_state_update(peer, flags, NULL, __func__); /* we have to mark whether or not remote nodes have already been fenced */ if (peer->flags & crm_remote_node) { char *now_s = pcmk__ttoa(time(NULL)); crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s); free(now_s); } /* Force our known ID */ crm_xml_add(node_state, XML_ATTR_UUID, uuid); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, cib_quorum_override | cib_scope_local | cib_can_create); /* Delay processing the trigger until the update completes */ crm_debug("Sending fencing update %d for %s", rc, target); fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated); /* Make sure it sticks */ /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */ controld_delete_node_state(peer->uname, controld_section_all, cib_scope_local); free_xml(node_state); return; } /*! * \internal * \brief Abort transition due to stonith failure * * \param[in] abort_action Whether to restart or stop transition * \param[in] target Don't restart if this (NULL for any) has too many failures * \param[in] reason Log this stonith action XML as abort reason (or NULL) */ static void abort_for_stonith_failure(enum transition_action abort_action, const char *target, xmlNode *reason) { /* If stonith repeatedly fails, we eventually give up on starting a new * transition for that reason. */ if ((abort_action != tg_stop) && too_many_st_failures(target)) { abort_action = tg_stop; } abort_transition(INFINITY, abort_action, "Stonith failed", reason); } /* * stonith cleanup list * * If the DC is shot, proper notifications might not go out. * The stonith cleanup list allows the cluster to (re-)send * notifications once a new DC is elected. */ static GList *stonith_cleanup_list = NULL; /*! * \internal * \brief Add a node to the stonith cleanup list * * \param[in] target Name of node to add */ void add_stonith_cleanup(const char *target) { stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target)); } /*! * \internal * \brief Remove a node from the stonith cleanup list * * \param[in] Name of node to remove */ void remove_stonith_cleanup(const char *target) { GList *iter = stonith_cleanup_list; while (iter != NULL) { GList *tmp = iter; char *iter_name = tmp->data; iter = iter->next; if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) { crm_trace("Removing %s from the cleanup list", iter_name); stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); free(iter_name); } } } /*! * \internal * \brief Purge all entries from the stonith cleanup list */ void purge_stonith_cleanup() { if (stonith_cleanup_list) { GList *iter = NULL; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_info("Purging %s from stonith cleanup list", target); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } } /*! * \internal * \brief Send stonith updates for all entries in cleanup list, then purge it */ void execute_stonith_cleanup() { GList *iter; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_node_t *target_node = crm_get_peer(0, target); const char *uuid = crm_peer_uuid(target_node); crm_notice("Marking %s, target of a previous stonith action, as clean", target); send_stonith_update(NULL, target, uuid); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } /* end stonith cleanup list functions */ /* stonith API client * * Functions that need to interact directly with the fencer via its API */ static stonith_t *stonith_api = NULL; static crm_trigger_t *stonith_reconnect = NULL; static char *te_client_id = NULL; static gboolean fail_incompletable_stonith(pcmk__graph_t *graph) { GList *lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GList *lpc2 = NULL; - synapse_t *synapse = (synapse_t *) lpc->data; + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data; if ((action->type != pcmk__cluster_graph_action) || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (task && pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { crm__set_graph_action_flags(action, pcmk__graph_action_failed); last_action = action->xml; pcmk__update_graph(graph, action); crm_notice("Failing action %d (%s): fencer terminated", action->id, ID(action->xml)); } } } if (last_action != NULL) { crm_warn("Fencer failure resulted in unrunnable actions"); abort_for_stonith_failure(tg_restart, NULL, last_action); return TRUE; } return FALSE; } static void tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e) { te_cleanup_stonith_history_sync(st, FALSE); if (pcmk_is_set(fsa_input_register, R_ST_REQUIRED)) { crm_crit("Fencing daemon connection failed"); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencing daemon disconnected"); } if (stonith_api) { /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(st); } stonith_api->cmds->remove_notification(stonith_api, NULL); } if (AM_I_DC) { fail_incompletable_stonith(transition_graph); trigger_graph(); } } /*! * \internal * \brief Handle an event notification from the fencing API * * \param[in] st Fencing API connection * \param[in] event Fencing API event notification */ static void handle_fence_notification(stonith_t *st, stonith_event_t *event) { bool succeeded = true; const char *executioner = "the cluster"; const char *client = "a client"; const char *reason = NULL; int exec_status; if (te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } if (event == NULL) { crm_err("Notify data not found"); return; } if (event->executioner != NULL) { executioner = event->executioner; } if (event->client_origin != NULL) { client = event->client_origin; } exec_status = stonith__event_execution_status(event); if ((stonith__event_exit_status(event) != CRM_EX_OK) || (exec_status != PCMK_EXEC_DONE)) { succeeded = false; if (exec_status == PCMK_EXEC_DONE) { exec_status = PCMK_EXEC_ERROR; } } reason = stonith__event_exit_reason(event); crmd_alert_fencing_op(event); if (pcmk__str_eq("on", event->action, pcmk__str_none)) { // Unfencing doesn't need special handling, just a log message if (succeeded) { crm_notice("%s was unfenced by %s at the request of %s@%s", event->target, executioner, client, event->origin); } else { crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d", event->target, executioner, pcmk_exec_status_str(exec_status), ((reason == NULL)? "" : ": "), ((reason == NULL)? "" : reason), stonith__event_exit_status(event)); } return; } if (succeeded && pcmk__str_eq(event->target, fsa_our_uname, pcmk__str_casei)) { /* We were notified of our own fencing. Most likely, either fencing was * misconfigured, or fabric fencing that doesn't cut cluster * communication is in use. * * Either way, shutting down the local host is a good idea, to require * administrator intervention. Also, other nodes would otherwise likely * set our status to lost because of the fencing callback and discard * our subsequent election votes as "not part of our cluster". */ crm_crit("We were allegedly just fenced by %s for %s!", executioner, event->origin); // Dumps blackbox if enabled if (fence_reaction_panic) { pcmk__panic(__func__); } else { crm_exit(CRM_EX_FATAL); } return; // Should never get here } /* Update the count of fencing failures for this target, in case we become * DC later. The current DC has already updated its fail count in * tengine_stonith_callback(). */ if (!AM_I_DC) { if (succeeded) { st_fail_count_reset(event->target); } else { st_fail_count_increment(event->target); } } crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: " "%s%s%s%s " CRM_XS " event=%s", event->target, (succeeded? "" : " not"), event->action, executioner, client, event->origin, (succeeded? "OK" : pcmk_exec_status_str(exec_status)), ((reason == NULL)? "" : " ("), ((reason == NULL)? "" : reason), ((reason == NULL)? "" : ")"), event->id); if (succeeded) { crm_node_t *peer = pcmk__search_known_node_cache(0, event->target, CRM_GET_PEER_ANY); const char *uuid = NULL; if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); if (AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. * Unfortunately, the controller doesn't have a simple, reliable way * to map hosts to guests. It might be possible to track this in the * peer cache via crm_remote_peer_cache_refresh(). For now, we rely * on the scheduler creating fence pseudo-events for the guests. */ if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) { /* Abort the current transition if it wasn't the cluster that * initiated fencing. */ crm_info("External fencing operation from %s fenced %s", client, event->target); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } /* Assume it was our leader if we don't currently have one */ } else if (pcmk__str_eq(fsa_our_dc, event->target, pcmk__str_null_matches|pcmk__str_casei) && !pcmk_is_set(peer->flags, crm_remote_node)) { crm_notice("Fencing target %s %s our leader", event->target, (fsa_our_dc? "was" : "may have been")); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (pcmk__str_eq(event->executioner, fsa_our_uname, pcmk__str_casei)) { send_stonith_update(NULL, event->target, uuid); } add_stonith_cleanup(event->target); } /* If the target is a remote node, and we host its connection, * immediately fail all monitors so it can be recovered quickly. * The connection won't necessarily drop when a remote node is fenced, * so the failure might not otherwise be detected until the next poke. */ if (pcmk_is_set(peer->flags, crm_remote_node)) { remote_ra_fail(event->target); } crmd_peer_down(peer, TRUE); } } /*! * \brief Connect to fencer * * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop * * \return TRUE * \note If user_data is NULL, this will wait 2s between attempts, for up to * 30 attempts, meaning the controller could be blocked as long as 58s. */ static gboolean te_connect_stonith(gpointer user_data) { int rc = pcmk_ok; if (stonith_api == NULL) { stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return TRUE; } } if (stonith_api->state != stonith_disconnected) { crm_trace("Already connected to fencer, no need to retry"); return TRUE; } if (user_data == NULL) { // Blocking (retry failures now until successful) rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 30 attempts: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } } else { // Non-blocking (retry failures later in main loop) rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); if (rc != pcmk_ok) { if (pcmk_is_set(fsa_input_register, R_ST_REQUIRED)) { crm_notice("Fencer connection failed (will retry): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencer connection failed (ignoring because no longer required): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } return TRUE; } } if (rc == pcmk_ok) { stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE, handle_fence_notification); stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED, tengine_stonith_history_synced); te_trigger_stonith_history_sync(TRUE); crm_notice("Fencer successfully connected"); } return TRUE; } /*! \internal \brief Schedule fencer connection attempt in main loop */ void controld_trigger_fencer_connect() { if (stonith_reconnect == NULL) { stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, GINT_TO_POINTER(TRUE)); } controld_set_fsa_input_flags(R_ST_REQUIRED); mainloop_set_trigger(stonith_reconnect); } void controld_disconnect_fencer(bool destroy) { if (stonith_api) { // Prevent fencer connection from coming up again controld_clear_fsa_input_flags(R_ST_REQUIRED); if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(stonith_api); } stonith_api->cmds->remove_notification(stonith_api, NULL); } if (destroy) { if (stonith_api) { stonith_api->cmds->free(stonith_api); stonith_api = NULL; } if (stonith_reconnect) { mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL; } if (te_client_id) { free(te_client_id); te_client_id = NULL; } } } static gboolean do_stonith_history_sync(gpointer user_data) { if (stonith_api && (stonith_api->state != stonith_disconnected)) { stonith_history_t *history = NULL; te_cleanup_stonith_history_sync(stonith_api, FALSE); stonith_api->cmds->history(stonith_api, st_opt_sync_call | st_opt_broadcast, NULL, &history, 5); stonith_history_free(history); return TRUE; } else { crm_info("Skip triggering stonith history-sync as stonith is disconnected"); return FALSE; } } static void tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) { char *uuid = NULL; int stonith_id = -1; int transition_id = -1; pcmk__graph_action_t *action = NULL; const char *target = NULL; if ((data == NULL) || (data->userdata == NULL)) { crm_err("Ignoring fence operation %d result: " "No transition key given (bug?)", ((data == NULL)? -1 : data->call_id)); return; } if (!AM_I_DC) { const char *reason = stonith__exit_reason(data); if (reason == NULL) { reason = pcmk_exec_status_str(stonith__execution_status(data)); } crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s", data->call_id, stonith__exit_status(data), reason, (const char *) data->userdata); return; } CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id, &stonith_id, NULL), goto bail); if (transition_graph->complete || (stonith_id < 0) || !pcmk__str_eq(uuid, te_uuid, pcmk__str_none) || (transition_graph->id != transition_id)) { crm_info("Ignoring fence operation %d result: " "Not from current transition " CRM_XS " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)", data->call_id, pcmk__btoa(transition_graph->complete), stonith_id, uuid, te_uuid, transition_id, transition_graph->id); goto bail; } action = controld_get_action(stonith_id); if (action == NULL) { crm_err("Ignoring fence operation %d result: " "Action %d not found in transition graph (bug?) " CRM_XS " uuid=%s transition=%d", data->call_id, stonith_id, uuid, transition_id); goto bail; } target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); if (target == NULL) { crm_err("Ignoring fence operation %d result: No target given (bug?)", data->call_id); goto bail; } stop_te_timer(action); if (stonith__exit_status(data) == CRM_EX_OK) { const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); const char *op = crm_meta_value(action->params, "stonith_action"); crm_info("Fence operation %d for %s succeeded", data->call_id, target); if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) { te_action_confirmed(action, NULL); if (pcmk__str_eq("on", op, pcmk__str_casei)) { const char *value = NULL; char *now = pcmk__ttoa(time(NULL)); gboolean is_remote_node = FALSE; /* This check is not 100% reliable, since this node is not * guaranteed to have the remote node cached. However, it * doesn't have to be reliable, since the attribute manager can * learn a node's "remoteness" by other means sooner or later. * This allows it to learn more quickly if this node does have * the information. */ if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) { is_remote_node = TRUE; } update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, is_remote_node); free(now); value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL); update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, is_remote_node); value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE); update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, is_remote_node); } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) { send_stonith_update(action, target, uuid); crm__set_graph_action_flags(action, pcmk__graph_action_sent_update); } } st_fail_count_reset(target); } else { enum transition_action abort_action = tg_restart; int status = stonith__execution_status(data); const char *reason = stonith__exit_reason(data); if (reason == NULL) { if (status == PCMK_EXEC_DONE) { reason = "Agent returned error"; } else { reason = pcmk_exec_status_str(status); } } crm__set_graph_action_flags(action, pcmk__graph_action_failed); /* If no fence devices were available, there's no use in immediately * checking again, so don't start a new transition in that case. */ if (status == PCMK_EXEC_NO_FENCE_DEVICE) { crm_warn("Fence operation %d for %s failed: %s " "(aborting transition and giving up for now)", data->call_id, target, reason); abort_action = tg_stop; } else { crm_notice("Fence operation %d for %s failed: %s " "(aborting transition)", data->call_id, target, reason); } /* Increment the fail count now, so abort_for_stonith_failure() can * check it. Non-DC nodes will increment it in * handle_fence_notification(). */ st_fail_count_increment(target); abort_for_stonith_failure(abort_action, target, NULL); } pcmk__update_graph(transition_graph, action); trigger_graph(); bail: free(data->userdata); free(uuid); return; } static int fence_with_delay(const char *target, const char *type, const char *delay) { uint32_t options = st_opt_none; // Group of enum stonith_call_options int timeout_sec = (int) (transition_graph->stonith_timeout / 1000); int delay_i; if (crmd_join_phase_count(crm_join_confirmed) == 1) { stonith__set_call_options(options, target, st_opt_allow_suicide); } pcmk__scan_min_int(delay, &delay_i, 0); return stonith_api->cmds->fence_with_delay(stonith_api, options, target, type, timeout_sec, 0, delay_i); } gboolean te_fence_node(pcmk__graph_t *graph, pcmk__graph_action_t *action) { int rc = 0; const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; char *transition_key = NULL; const char *priority_delay = NULL; gboolean invalid_action = FALSE; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = crm_meta_value(action->params, "stonith_action"); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return FALSE; } priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); crm_notice("Requesting fencing (%s) of node %s " CRM_XS " action=%s timeout=%u%s%s", type, target, id, transition_graph->stonith_timeout, priority_delay ? " priority_delay=" : "", priority_delay ? priority_delay : ""); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); rc = fence_with_delay(target, type, priority_delay); transition_key = pcmk__transition_key(transition_graph->id, action->id, 0, te_uuid), stonith_api->cmds->register_callback(stonith_api, rc, (int) (transition_graph->stonith_timeout / 1000), st_opt_timeout_updates, transition_key, "tengine_stonith_callback", tengine_stonith_callback); return TRUE; } bool controld_verify_stonith_watchdog_timeout(const char *value) { gboolean rv = TRUE; if (stonith_api && (stonith_api->state != stonith_disconnected) && stonith__watchdog_fencing_enabled_for_node_api(stonith_api, fsa_our_uname)) { rv = pcmk__valid_sbd_timeout(value); } return rv; } /* end stonith API client functions */ /* * stonith history synchronization * * Each node's fencer keeps track of a cluster-wide fencing history. When a node * joins or leaves, we need to synchronize the history across all nodes. */ static crm_trigger_t *stonith_history_sync_trigger = NULL; static mainloop_timer_t *stonith_history_sync_timer_short = NULL; static mainloop_timer_t *stonith_history_sync_timer_long = NULL; void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers) { if (free_timers) { mainloop_timer_del(stonith_history_sync_timer_short); stonith_history_sync_timer_short = NULL; mainloop_timer_del(stonith_history_sync_timer_long); stonith_history_sync_timer_long = NULL; } else { mainloop_timer_stop(stonith_history_sync_timer_short); mainloop_timer_stop(stonith_history_sync_timer_long); } if (st) { st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED); } } static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event) { te_cleanup_stonith_history_sync(st, FALSE); crm_debug("Fence-history synced - cancel all timers"); } static gboolean stonith_history_sync_set_trigger(gpointer user_data) { mainloop_set_trigger(stonith_history_sync_trigger); return FALSE; } void te_trigger_stonith_history_sync(bool long_timeout) { /* trigger a sync in 5s to give more nodes the * chance to show up so that we don't create * unnecessary stonith-history-sync traffic * * the long timeout of 30s is there as a fallback * so that after a successful connection to fenced * we will wait for 30s for the DC to trigger a * history-sync * if this doesn't happen we trigger a sync locally * (e.g. fenced segfaults and is restarted by pacemakerd) */ /* as we are finally checking the stonith-connection * in do_stonith_history_sync we should be fine * leaving stonith_history_sync_time & stonith_history_sync_trigger * around */ if (stonith_history_sync_trigger == NULL) { stonith_history_sync_trigger = mainloop_add_trigger(G_PRIORITY_LOW, do_stonith_history_sync, NULL); } if (long_timeout) { if(stonith_history_sync_timer_long == NULL) { stonith_history_sync_timer_long = mainloop_timer_add("history_sync_long", 30000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 30 seconds"); mainloop_timer_start(stonith_history_sync_timer_long); } else { if(stonith_history_sync_timer_short == NULL) { stonith_history_sync_timer_short = mainloop_timer_add("history_sync_short", 5000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); mainloop_timer_start(stonith_history_sync_timer_short); } } /* end stonith history synchronization functions */ diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c index 8cbc3a0ec6..63427f9ee5 100644 --- a/daemons/controld/controld_te_events.c +++ b/daemons/controld/controld_te_events.c @@ -1,508 +1,508 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include char *failed_stop_offset = NULL; char *failed_start_offset = NULL; gboolean fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node) { const char *target_uuid = NULL; const char *router = NULL; const char *router_uuid = NULL; xmlNode *last_action = NULL; GList *gIter = NULL; GList *gIter2 = NULL; if (graph == NULL || graph->complete) { return FALSE; } gIter = graph->synapses; for (; gIter != NULL; gIter = gIter->next) { - synapse_t *synapse = (synapse_t *) gIter->data; + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data; if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) { /* We've already been here */ continue; } gIter2 = synapse->actions; for (; gIter2 != NULL; gIter2 = gIter2->next) { pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data; if ((action->type == pcmk__pseudo_graph_action) || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { continue; } else if (action->type == pcmk__cluster_graph_action) { const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { continue; } } target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (router) { crm_node_t *node = crm_get_peer(0, router); if (node) { router_uuid = node->uuid; } } if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) { crm__set_graph_action_flags(action, pcmk__graph_action_failed); pcmk__set_synapse_flags(synapse, pcmk__synapse_failed); last_action = action->xml; stop_te_timer(action); pcmk__update_graph(graph, action); if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { crm_notice("Action %d (%s) was pending on %s (offline)", action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node); } else { crm_info("Action %d (%s) is scheduled for %s (offline)", action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node); } } } } if (last_action != NULL) { crm_info("Node %s shutdown resulted in un-runnable actions", down_node); abort_transition(INFINITY, tg_restart, "Node failure", last_action); return TRUE; } return FALSE; } /*! * \internal * \brief Update failure-related node attributes if warranted * * \param[in] event XML describing operation that (maybe) failed * \param[in] event_node_uuid Node that event occurred on * \param[in] rc Actual operation return code * \param[in] target_rc Expected operation return code * \param[in] do_update If TRUE, do update regardless of operation type * \param[in] ignore_failures If TRUE, update last failure but not fail count * * \return TRUE if this was not a direct nack, success or lrm status refresh */ static gboolean update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int target_rc, gboolean do_update, gboolean ignore_failures) { guint interval_ms = 0; char *task = NULL; char *rsc_id = NULL; const char *value = NULL; const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); const char *on_uname = crm_peer_uname(event_node_uuid); const char *origin = crm_element_value(event, XML_ATTR_ORIGIN); // Nothing needs to be done for success or status refresh if (rc == target_rc) { return FALSE; } else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) { crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh", id, rc, on_uname); return FALSE; } /* Sanity check */ CRM_CHECK(on_uname != NULL, return TRUE); CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms), crm_err("Couldn't parse: %s", ID(event)); goto bail); /* Decide whether update is necessary and what value to use */ if ((interval_ms > 0) || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei) || pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) { do_update = TRUE; } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) { do_update = TRUE; if (failed_start_offset == NULL) { failed_start_offset = strdup(CRM_INFINITY_S); } value = failed_start_offset; } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) { do_update = TRUE; if (failed_stop_offset == NULL) { failed_stop_offset = strdup(CRM_INFINITY_S); } value = failed_stop_offset; } /* Fail count will be either incremented or set to infinity */ if (!pcmk_str_is_infinity(value)) { value = XML_NVPAIR_ATTR_VALUE "++"; } if (do_update) { char *now = pcmk__ttoa(time(NULL)); char *attr_name = NULL; gboolean is_remote_node = FALSE; if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) { is_remote_node = TRUE; } crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)", (ignore_failures? "last failure" : "failcount"), rsc_id, on_uname, task, rc, value, now); /* Update the fail count, if we're not ignoring failures */ if (!ignore_failures) { attr_name = pcmk__failcount_name(rsc_id, task, interval_ms); update_attrd(on_uname, attr_name, value, NULL, is_remote_node); free(attr_name); } /* Update the last failure time (even if we're ignoring failures, * so that failure can still be detected and shown, e.g. by crm_mon) */ attr_name = pcmk__lastfailure_name(rsc_id, task, interval_ms); update_attrd(on_uname, attr_name, now, NULL, is_remote_node); free(attr_name); free(now); } bail: free(rsc_id); free(task); return TRUE; } pcmk__graph_action_t * controld_get_action(int id) { for (GList *item = transition_graph->synapses; item; item = item->next) { - synapse_t *synapse = (synapse_t *) item->data; + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data; for (GList *item2 = synapse->actions; item2; item2 = item2->next) { pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data; if (action->id == id) { return action; } } } return NULL; } pcmk__graph_action_t * get_cancel_action(const char *id, const char *node) { GList *gIter = NULL; GList *gIter2 = NULL; gIter = transition_graph->synapses; for (; gIter != NULL; gIter = gIter->next) { - synapse_t *synapse = (synapse_t *) gIter->data; + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data; gIter2 = synapse->actions; for (; gIter2 != NULL; gIter2 = gIter2->next) { const char *task = NULL; const char *target = NULL; pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); if (!pcmk__str_eq(task, id, pcmk__str_casei)) { crm_trace("Wrong key %s for %s on %s", task, id, node); continue; } target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) { crm_trace("Wrong node %s for %s on %s", target, id, node); continue; } crm_trace("Found %s on %s", id, node); return action; } } return NULL; } bool confirm_cancel_action(const char *id, const char *node_id) { const char *op_key = NULL; const char *node_name = NULL; pcmk__graph_action_t *cancel = get_cancel_action(id, node_id); if (cancel == NULL) { return FALSE; } op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY); node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET); stop_te_timer(cancel); te_action_confirmed(cancel, transition_graph); crm_info("Cancellation of %s on %s confirmed (action %d)", op_key, node_name, cancel->id); return TRUE; } /* downed nodes are listed like: ... */ #define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \ "/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']" /*! * \brief Find a transition event that would have made a specified node down * * \param[in] target UUID of node to match * * \return Matching event if found, NULL otherwise */ pcmk__graph_action_t * match_down_event(const char *target) { pcmk__graph_action_t *match = NULL; xmlXPathObjectPtr xpath_ret = NULL; GList *gIter, *gIter2; char *xpath = crm_strdup_printf(XPATH_DOWNED, target); for (gIter = transition_graph->synapses; gIter != NULL && match == NULL; gIter = gIter->next) { - for (gIter2 = ((synapse_t*)gIter->data)->actions; + for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions; gIter2 != NULL && match == NULL; gIter2 = gIter2->next) { match = (pcmk__graph_action_t *) gIter2->data; if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) { xpath_ret = xpath_search(match->xml, xpath); if (numXpathResults(xpath_ret) < 1) { match = NULL; } freeXpathObject(xpath_ret); } else { // Only actions that were actually started can match match = NULL; } } } free(xpath); if (match != NULL) { crm_debug("Shutdown action %d (%s) found for node %s", match->id, crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target); } else { crm_debug("No reason to expect node %s to be down", target); } return match; } void process_graph_event(xmlNode *event, const char *event_node) { int rc = -1; // Actual result int target_rc = -1; // Expected result int status = -1; // Executor status int callid = -1; // Executor call ID int transition_num = -1; // Transition number int action_num = -1; // Action number within transition char *update_te_uuid = NULL; bool ignore_failures = FALSE; const char *id = NULL; const char *desc = NULL; const char *magic = NULL; const char *uname = NULL; CRM_ASSERT(event != NULL); /* */ magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY); if (magic == NULL) { /* non-change */ return; } crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status); if (status == PCMK_EXEC_PENDING) { return; } id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); crm_element_value_int(event, XML_LRM_ATTR_RC, &rc); crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid); rc = pcmk__effective_rc(rc); if (decode_transition_key(magic, &update_te_uuid, &transition_num, &action_num, &target_rc) == FALSE) { // decode_transition_key() already logged the bad key crm_err("Can't process action %s result: Incompatible versions? " CRM_XS " call-id=%d", id, callid); abort_transition(INFINITY, tg_restart, "Bad event", event); return; } if (transition_num == -1) { // E.g. crm_resource --fail desc = "initiated outside of the cluster"; abort_transition(INFINITY, tg_restart, "Unexpected event", event); } else if ((action_num < 0) || !pcmk__str_eq(update_te_uuid, te_uuid, pcmk__str_none)) { desc = "initiated by a different DC"; abort_transition(INFINITY, tg_restart, "Foreign event", event); } else if ((transition_graph->id != transition_num) || (transition_graph->complete)) { // Action is not from currently active transition guint interval_ms = 0; if (parse_op_key(id, NULL, NULL, &interval_ms) && (interval_ms != 0)) { /* Recurring actions have the transition number they were first * scheduled in. */ if (status == PCMK_EXEC_CANCELLED) { confirm_cancel_action(id, get_node_id(event)); goto bail; } desc = "arrived after initial scheduling"; abort_transition(INFINITY, tg_restart, "Change in recurring result", event); } else if (transition_graph->id != transition_num) { desc = "arrived really late"; abort_transition(INFINITY, tg_restart, "Old event", event); } else { desc = "arrived late"; abort_transition(INFINITY, tg_restart, "Inactive graph", event); } } else { // Event is result of an action from currently active transition pcmk__graph_action_t *action = controld_get_action(action_num); if (action == NULL) { // Should never happen desc = "unknown"; abort_transition(INFINITY, tg_restart, "Unknown event", event); } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { /* Nothing further needs to be done if the action has already been * confirmed. This can happen e.g. when processing both an * "xxx_last_0" or "xxx_last_failure_0" record as well as the main * history record, which would otherwise result in incorrectly * bumping the fail count twice. */ crm_log_xml_debug(event, "Event already confirmed:"); goto bail; } else { /* An action result needs to be confirmed. * (This is the only case where desc == NULL.) */ if (pcmk__str_eq(crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore", pcmk__str_casei)) { ignore_failures = TRUE; } else if (rc != target_rc) { crm__set_graph_action_flags(action, pcmk__graph_action_failed); } stop_te_timer(action); te_action_confirmed(action, transition_graph); if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) { abort_transition(action->synapse->priority + 1, tg_restart, "Event failed", event); } } } if (id == NULL) { id = "unknown action"; } uname = crm_element_value(event, XML_LRM_ATTR_TARGET); if (uname == NULL) { uname = "unknown node"; } if (status == PCMK_EXEC_INVALID) { // We couldn't attempt the action crm_info("Transition %d action %d (%s on %s): %s", transition_num, action_num, id, uname, pcmk_exec_status_str(status)); } else if (desc && update_failcount(event, event_node, rc, target_rc, (transition_num == -1), FALSE)) { crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' " CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'", transition_num, action_num, id, uname, services_ocf_exitcode_str(target_rc), services_ocf_exitcode_str(rc), target_rc, rc, callid, desc); } else if (desc) { crm_info("Transition %d action %d (%s on %s): %s " CRM_XS " rc=%d target-rc=%d call-id=%d", transition_num, action_num, id, uname, desc, rc, target_rc, callid); } else if (rc == target_rc) { crm_info("Transition %d action %d (%s on %s) confirmed: %s " CRM_XS " rc=%d call-id=%d", transition_num, action_num, id, uname, services_ocf_exitcode_str(rc), rc, callid); } else { update_failcount(event, event_node, rc, target_rc, (transition_num == -1), ignore_failures); crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' " CRM_XS " target-rc=%d rc=%d call-id=%d", transition_num, action_num, id, uname, services_ocf_exitcode_str(target_rc), services_ocf_exitcode_str(rc), target_rc, rc, callid); } bail: free(update_te_uuid); } diff --git a/include/pcmki/pcmki_transition.h b/include/pcmki/pcmki_transition.h index 2f71da3072..c20cda4766 100644 --- a/include/pcmki/pcmki_transition.h +++ b/include/pcmki/pcmki_transition.h @@ -1,166 +1,166 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__PCMKI_PCMKI_TRANSITION__H # define PCMK__PCMKI_PCMKI_TRANSITION__H # include # include # include # include #ifdef __cplusplus extern "C" { #endif enum pcmk__graph_action_type { pcmk__pseudo_graph_action, pcmk__rsc_graph_action, pcmk__cluster_graph_action, }; enum pcmk__synapse_flags { pcmk__synapse_ready = (1 << 0), pcmk__synapse_failed = (1 << 1), pcmk__synapse_executed = (1 << 2), pcmk__synapse_confirmed = (1 << 3), }; -typedef struct synapse_s { +typedef struct { int id; int priority; uint32_t flags; // Group of pcmk__synapse_flags GList *actions; /* pcmk__graph_action_t* */ GList *inputs; /* pcmk__graph_action_t* */ -} synapse_t; +} pcmk__graph_synapse_t; -const char *synapse_state_str(synapse_t *synapse); +const char *synapse_state_str(pcmk__graph_synapse_t *synapse); #define pcmk__set_synapse_flags(synapse, flags_to_set) do { \ (synapse)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Synapse", "synapse", \ (synapse)->flags, (flags_to_set), #flags_to_set); \ } while (0) #define pcmk__clear_synapse_flags(synapse, flags_to_clear) do { \ (synapse)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Synapse", "synapse", \ (synapse)->flags, (flags_to_clear), #flags_to_clear); \ } while (0) enum pcmk__graph_action_flags { pcmk__graph_action_sent_update = (1 << 0), /* sent to the CIB */ pcmk__graph_action_executed = (1 << 1), /* sent to the CRM */ pcmk__graph_action_confirmed = (1 << 2), pcmk__graph_action_failed = (1 << 3), pcmk__graph_action_can_fail = (1 << 4), //! \deprecated Will be removed in a future release }; typedef struct { int id; int timeout; int timer; guint interval_ms; GHashTable *params; enum pcmk__graph_action_type type; - synapse_t *synapse; + pcmk__graph_synapse_t *synapse; uint32_t flags; // Group of pcmk__graph_action_flags xmlNode *xml; } pcmk__graph_action_t; #define crm__set_graph_action_flags(action, flags_to_set) do { \ (action)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", "action", \ (action)->flags, (flags_to_set), #flags_to_set); \ } while (0) #define crm__clear_graph_action_flags(action, flags_to_clear) do { \ (action)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", "action", \ (action)->flags, (flags_to_clear), #flags_to_clear); \ } while (0) /* order matters here */ enum transition_action { tg_done, tg_stop, tg_restart, tg_shutdown, }; typedef struct { int id; char *source; int abort_priority; gboolean complete; const char *abort_reason; enum transition_action completion_action; int num_actions; int num_synapses; int batch_limit; guint network_delay; guint stonith_timeout; int fired; int pending; int skipped; int completed; int incomplete; - GList *synapses; /* synapse_t* */ + GList *synapses; /* pcmk__graph_synapse_t* */ int migration_limit; } pcmk__graph_t; typedef struct crm_graph_functions_s { gboolean (*pseudo) (pcmk__graph_t *graph, pcmk__graph_action_t *action); gboolean (*rsc) (pcmk__graph_t *graph, pcmk__graph_action_t *action); gboolean (*crmd) (pcmk__graph_t *graph, pcmk__graph_action_t *action); gboolean (*stonith) (pcmk__graph_t *graph, pcmk__graph_action_t *action); gboolean (*allowed) (pcmk__graph_t *graph, pcmk__graph_action_t *action); } crm_graph_functions_t; enum transition_status { transition_active, transition_pending, /* active but no actions performed this time */ transition_complete, transition_terminated, }; void pcmk__set_graph_functions(crm_graph_functions_t *fns); pcmk__graph_t *pcmk__unpack_graph(xmlNode *xml_graph, const char *reference); enum transition_status pcmk__execute_graph(pcmk__graph_t *graph); void pcmk__update_graph(pcmk__graph_t *graph, pcmk__graph_action_t *action); void pcmk__free_graph(pcmk__graph_t *graph); const char *pcmk__graph_status2text(enum transition_status state); void pcmk__log_graph(unsigned int log_level, pcmk__graph_t *graph); void pcmk__log_graph_action(int log_level, pcmk__graph_action_t *action); lrmd_event_data_t *pcmk__event_from_graph_action(xmlNode *resource, pcmk__graph_action_t *action, int status, int rc, const char *exit_reason); #ifdef __cplusplus } #endif #endif diff --git a/lib/pacemaker/pcmk_graph_consumer.c b/lib/pacemaker/pcmk_graph_consumer.c index 00817bc17c..6975ef380b 100644 --- a/lib/pacemaker/pcmk_graph_consumer.c +++ b/lib/pacemaker/pcmk_graph_consumer.c @@ -1,855 +1,856 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include /* * Functions for updating graph */ /*! * \internal * \brief Update synapse after completed prerequisite * * A synapse is ready to be executed once all its prerequisite actions (inputs) * complete. Given a completed action, check whether it is an input for a given * synapse, and if so, mark the input as confirmed, and mark the synapse as * ready if appropriate. * * \param[in] synapse Transition graph synapse to update * \param[in] action_id ID of an action that completed * * \note The only substantial effect here is confirming synapse inputs. * should_fire_synapse() will recalculate pcmk__synapse_ready, so the only * thing that uses the pcmk__synapse_ready from here is * synapse_state_str(). */ static void -update_synapse_ready(synapse_t *synapse, int action_id) +update_synapse_ready(pcmk__graph_synapse_t *synapse, int action_id) { if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) { return; // All inputs have already been confirmed } pcmk__set_synapse_flags(synapse, pcmk__synapse_ready); // Presume ready until proven otherwise for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { pcmk__graph_action_t *prereq = (pcmk__graph_action_t *) lpc->data; if (prereq->id == action_id) { crm_trace("Confirming input %d of synapse %d", action_id, synapse->id); crm__set_graph_action_flags(prereq, pcmk__graph_action_confirmed); } else if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) { pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready); crm_trace("Synapse %d still not ready after action %d", synapse->id, action_id); } } if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) { crm_trace("Synapse %d is now ready to execute", synapse->id); } } /*! * \internal * \brief Update action and synapse confirmation after action completion * * \param[in] synapse Transition graph synapse that action belongs to * \param[in] action_id ID of action that completed */ static void -update_synapse_confirmed(synapse_t *synapse, int action_id) +update_synapse_confirmed(pcmk__graph_synapse_t *synapse, int action_id) { bool all_confirmed = true; for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc->data; if (action->id == action_id) { crm_trace("Confirmed action %d of synapse %d", action_id, synapse->id); crm__set_graph_action_flags(action, pcmk__graph_action_confirmed); } else if (all_confirmed && !(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) { all_confirmed = false; crm_trace("Synapse %d still not confirmed after action %d", synapse->id, action_id); } } if (all_confirmed && !(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) { crm_trace("Confirmed synapse %d", synapse->id); pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed); } } /*! * \internal * \brief Update the transition graph with a completed action result * * \param[in,out] graph Transition graph to update * \param[in] action Action that completed */ void pcmk__update_graph(pcmk__graph_t *graph, pcmk__graph_action_t *action) { for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { - synapse_t *synapse = (synapse_t *) lpc->data; + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) { continue; // This synapse already completed } else if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { update_synapse_confirmed(synapse, action->id); } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_failed)) || (synapse->priority == INFINITY)) { update_synapse_ready(synapse, action->id); } } } /* * Functions for executing graph */ /* A transition graph consists of various types of actions. The library caller * registers execution functions for each action type, which will be stored * here. */ static crm_graph_functions_t *graph_fns = NULL; /*! * \internal * \brief Set transition graph execution functions * * \param[in] Execution functions to use */ void pcmk__set_graph_functions(crm_graph_functions_t *fns) { crm_debug("Setting custom functions for executing transition graphs"); graph_fns = fns; CRM_ASSERT(graph_fns != NULL); CRM_ASSERT(graph_fns->rsc != NULL); CRM_ASSERT(graph_fns->crmd != NULL); CRM_ASSERT(graph_fns->pseudo != NULL); CRM_ASSERT(graph_fns->stonith != NULL); } /*! * \internal * \brief Check whether a graph synapse is ready to be executed * * \param[in] graph Transition graph that synapse is part of * \param[in] synapse Synapse to check * * \return true if synapse is ready, false otherwise */ static bool -should_fire_synapse(pcmk__graph_t *graph, synapse_t *synapse) +should_fire_synapse(pcmk__graph_t *graph, pcmk__graph_synapse_t *synapse) { GList *lpc = NULL; pcmk__set_synapse_flags(synapse, pcmk__synapse_ready); for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { pcmk__graph_action_t *prereq = (pcmk__graph_action_t *) lpc->data; if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) { crm_trace("Input %d for synapse %d not yet confirmed", prereq->id, synapse->id); pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready); break; } else if (pcmk_is_set(prereq->flags, pcmk__graph_action_failed) && !(pcmk_is_set(prereq->flags, pcmk__graph_action_can_fail))) { crm_trace("Input %d for synapse %d confirmed but failed", prereq->id, synapse->id); pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready); break; } } if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) { crm_trace("Synapse %d is ready to execute", synapse->id); } else { return false; } for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { pcmk__graph_action_t *a = (pcmk__graph_action_t *) lpc->data; if (a->type == pcmk__pseudo_graph_action) { /* None of the below applies to pseudo ops */ } else if (synapse->priority < graph->abort_priority) { crm_trace("Skipping synapse %d: priority %d is less than " "abort priority %d", synapse->id, synapse->priority, graph->abort_priority); graph->skipped++; return false; } else if (graph_fns->allowed && !(graph_fns->allowed(graph, a))) { crm_trace("Deferring synapse %d: not allowed", synapse->id); return false; } } return true; } /*! * \internal * \brief Initiate an action from a transition graph * * \param[in] graph Transition graph containing action * \param[in] action Action to execute * * \return Standard Pacemaker return code */ static int initiate_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { const char *id = ID(action->xml); CRM_CHECK(id != NULL, return EINVAL); CRM_CHECK(!pcmk_is_set(action->flags, pcmk__graph_action_executed), return pcmk_rc_already); crm__set_graph_action_flags(action, pcmk__graph_action_executed); switch (action->type) { case pcmk__pseudo_graph_action: crm_trace("Executing pseudo-action %d (%s)", action->id, id); return graph_fns->pseudo(graph, action)? pcmk_rc_ok : pcmk_rc_error; case pcmk__rsc_graph_action: crm_trace("Executing resource action %d (%s)", action->id, id); return graph_fns->rsc(graph, action)? pcmk_rc_ok : pcmk_rc_error; case pcmk__cluster_graph_action: if (pcmk__str_eq(crm_element_value(action->xml, XML_LRM_ATTR_TASK), CRM_OP_FENCE, pcmk__str_casei)) { crm_trace("Executing fencing action %d (%s)", action->id, id); return graph_fns->stonith(graph, action)? pcmk_rc_ok : pcmk_rc_error; } crm_trace("Executing control action %d (%s)", action->id, id); return graph_fns->crmd(graph, action)? pcmk_rc_ok : pcmk_rc_error; default: crm_err("Unsupported graph action type <%s id='%s'> (bug?)", crm_element_name(action->xml), id); return EINVAL; } } /*! * \internal * \brief Execute a graph synapse * * \param[in] graph Transition graph with synapse to execute * \param[in] synapse Synapse to execute * * \return Standard Pacemaker return value */ static int -fire_synapse(pcmk__graph_t *graph, synapse_t *synapse) +fire_synapse(pcmk__graph_t *graph, pcmk__graph_synapse_t *synapse) { pcmk__set_synapse_flags(synapse, pcmk__synapse_executed); for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc->data; if (initiate_action(graph, action) != pcmk_rc_ok) { crm_err("Failed initiating <%s id=%d> in synapse %d", crm_element_name(action->xml), action->id, synapse->id); pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed); crm__set_graph_action_flags(action, pcmk__graph_action_confirmed | pcmk__graph_action_failed); return pcmk_rc_error; } } return pcmk_rc_ok; } /*! * \internal * \brief Dummy graph method that can be used with simulations * * \param[in] graph Transition graph containing action * \param[in] action Action to be initiated * * \retval TRUE Action initiation was (simulated to be) successful * \retval FALSE Action initiation was (simulated to be) failed (due to the * PE_fail environment variable being set to the action ID) */ static gboolean pseudo_action_dummy(pcmk__graph_t * graph, pcmk__graph_action_t *action) { static int fail = -1; if (fail < 0) { long long fail_ll; if ((pcmk__scan_ll(getenv("PE_fail"), &fail_ll, 0LL) == pcmk_rc_ok) && (fail_ll > 0LL) && (fail_ll <= INT_MAX)) { fail = (int) fail_ll; } else { fail = 0; } } if (action->id == fail) { crm_err("Dummy event handler: pretending action %d failed", action->id); crm__set_graph_action_flags(action, pcmk__graph_action_failed); graph->abort_priority = INFINITY; } else { crm_trace("Dummy event handler: action %d initiated", action->id); } crm__set_graph_action_flags(action, pcmk__graph_action_confirmed); pcmk__update_graph(graph, action); return TRUE; } static crm_graph_functions_t default_fns = { pseudo_action_dummy, pseudo_action_dummy, pseudo_action_dummy, pseudo_action_dummy }; /*! * \internal * \brief Execute all actions in a transition graph * * \param[in] graph Transition graph to execute * * \return Status of transition after execution */ enum transition_status pcmk__execute_graph(pcmk__graph_t *graph) { GList *lpc = NULL; int log_level = LOG_DEBUG; enum transition_status pass_result = transition_active; const char *status = "In progress"; if (graph_fns == NULL) { graph_fns = &default_fns; } if (graph == NULL) { return transition_complete; } graph->fired = 0; graph->pending = 0; graph->skipped = 0; graph->completed = 0; graph->incomplete = 0; // Count completed and in-flight synapses for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { - synapse_t *synapse = (synapse_t *) lpc->data; + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) { graph->completed++; } else if (!(pcmk_is_set(synapse->flags, pcmk__synapse_failed)) && pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { graph->pending++; } } crm_trace("Executing graph %d (%d synapses already completed, %d pending)", graph->id, graph->completed, graph->pending); // Execute any synapses that are ready for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { - synapse_t *synapse = (synapse_t *) lpc->data; + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; if ((graph->batch_limit > 0) && (graph->pending >= graph->batch_limit)) { crm_debug("Throttling graph execution: batch limit (%d) reached", graph->batch_limit); break; } else if (pcmk_is_set(synapse->flags, pcmk__synapse_failed)) { graph->skipped++; continue; } else if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_executed)) { continue; // Already handled } else if (should_fire_synapse(graph, synapse)) { graph->fired++; if (fire_synapse(graph, synapse) != pcmk_rc_ok) { crm_err("Synapse %d failed to fire", synapse->id); log_level = LOG_ERR; graph->abort_priority = INFINITY; graph->incomplete++; graph->fired--; } if (!(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) { graph->pending++; } } else { crm_trace("Synapse %d cannot fire", synapse->id); graph->incomplete++; } } if ((graph->pending == 0) && (graph->fired == 0)) { graph->complete = TRUE; if ((graph->incomplete != 0) && (graph->abort_priority <= 0)) { log_level = LOG_WARNING; pass_result = transition_terminated; status = "Terminated"; } else if (graph->skipped != 0) { log_level = LOG_NOTICE; pass_result = transition_complete; status = "Stopped"; } else { log_level = LOG_NOTICE; pass_result = transition_complete; status = "Complete"; } } else if (graph->fired == 0) { pass_result = transition_pending; } do_crm_log(log_level, "Transition %d (Complete=%d, Pending=%d," " Fired=%d, Skipped=%d, Incomplete=%d, Source=%s): %s", graph->id, graph->completed, graph->pending, graph->fired, graph->skipped, graph->incomplete, graph->source, status); return pass_result; } /* * Functions for unpacking transition graph XML into structs */ /*! * \internal * \brief Unpack a transition graph action from XML * * \param[in] parent Synapse that action is part of * \param[in] xml_action Action XML to unparse * * \return Newly allocated action on success, or NULL otherwise */ static pcmk__graph_action_t * -unpack_action(synapse_t *parent, xmlNode *xml_action) +unpack_action(pcmk__graph_synapse_t *parent, xmlNode *xml_action) { enum pcmk__graph_action_type action_type; pcmk__graph_action_t *action = NULL; const char *element = TYPE(xml_action); const char *value = ID(xml_action); if (value == NULL) { crm_err("Ignoring transition graph action without id (bug?)"); crm_log_xml_trace(xml_action, "invalid"); return NULL; } if (pcmk__str_eq(element, XML_GRAPH_TAG_RSC_OP, pcmk__str_casei)) { action_type = pcmk__rsc_graph_action; } else if (pcmk__str_eq(element, XML_GRAPH_TAG_PSEUDO_EVENT, pcmk__str_casei)) { action_type = pcmk__pseudo_graph_action; } else if (pcmk__str_eq(element, XML_GRAPH_TAG_CRM_EVENT, pcmk__str_casei)) { action_type = pcmk__cluster_graph_action; } else { crm_err("Ignoring transition graph action of unknown type '%s' (bug?)", element); crm_log_xml_trace(xml_action, "invalid"); return NULL; } action = calloc(1, sizeof(pcmk__graph_action_t)); if (action == NULL) { crm_perror(LOG_CRIT, "Cannot unpack transition graph action"); crm_log_xml_trace(xml_action, "lost"); return NULL; } pcmk__scan_min_int(value, &(action->id), -1); action->type = pcmk__rsc_graph_action; action->xml = copy_xml(xml_action); action->synapse = parent; action->type = action_type; action->params = xml2list(action->xml); value = g_hash_table_lookup(action->params, "CRM_meta_timeout"); pcmk__scan_min_int(value, &(action->timeout), 0); /* Take start-delay into account for the timeout of the action timer */ value = g_hash_table_lookup(action->params, "CRM_meta_start_delay"); { int start_delay; pcmk__scan_min_int(value, &start_delay, 0); action->timeout += start_delay; } if (pcmk__guint_from_hash(action->params, CRM_META "_" XML_LRM_ATTR_INTERVAL, 0, &(action->interval_ms)) != pcmk_rc_ok) { action->interval_ms = 0; } value = g_hash_table_lookup(action->params, "CRM_meta_can_fail"); if (value != NULL) { gboolean can_fail = FALSE; crm_str_to_boolean(value, &can_fail); if (can_fail) { crm__set_graph_action_flags(action, pcmk__graph_action_can_fail); } else { crm__clear_graph_action_flags(action, pcmk__graph_action_can_fail); } #ifndef PCMK__COMPAT_2_0 if (pcmk_is_set(action->flags, pcmk__graph_action_can_fail)) { crm_warn("Support for the can_fail meta-attribute is deprecated" " and will be removed in a future release"); } #endif } crm_trace("Action %d has timer set to %dms", action->id, action->timeout); return action; } /*! * \internal * \brief Unpack transition graph synapse from XML * * \param[in] new_graph Transition graph that synapse is part of * \param[in] xml_synapse Synapse XML * * \return Newly allocated synapse on success, or NULL otherwise */ -static synapse_t * +static pcmk__graph_synapse_t * unpack_synapse(pcmk__graph_t *new_graph, xmlNode *xml_synapse) { const char *value = NULL; xmlNode *action_set = NULL; - synapse_t *new_synapse = NULL; + pcmk__graph_synapse_t *new_synapse = NULL; crm_trace("Unpacking synapse %s", ID(xml_synapse)); - new_synapse = calloc(1, sizeof(synapse_t)); + new_synapse = calloc(1, sizeof(pcmk__graph_synapse_t)); if (new_synapse == NULL) { return NULL; } pcmk__scan_min_int(ID(xml_synapse), &(new_synapse->id), 0); value = crm_element_value(xml_synapse, XML_CIB_ATTR_PRIORITY); pcmk__scan_min_int(value, &(new_synapse->priority), 0); CRM_CHECK(new_synapse->id >= 0, free(new_synapse); return NULL); new_graph->num_synapses++; crm_trace("Unpacking synapse %s action sets", crm_element_value(xml_synapse, XML_ATTR_ID)); for (action_set = first_named_child(xml_synapse, "action_set"); action_set != NULL; action_set = crm_next_same_xml(action_set)) { for (xmlNode *action = pcmk__xml_first_child(action_set); action != NULL; action = pcmk__xml_next(action)) { pcmk__graph_action_t *new_action = unpack_action(new_synapse, action); if (new_action == NULL) { continue; } crm_trace("Adding action %d to synapse %d", new_action->id, new_synapse->id); new_graph->num_actions++; new_synapse->actions = g_list_append(new_synapse->actions, new_action); } } crm_trace("Unpacking synapse %s inputs", ID(xml_synapse)); for (xmlNode *inputs = first_named_child(xml_synapse, "inputs"); inputs != NULL; inputs = crm_next_same_xml(inputs)) { for (xmlNode *trigger = first_named_child(inputs, "trigger"); trigger != NULL; trigger = crm_next_same_xml(trigger)) { for (xmlNode *input = pcmk__xml_first_child(trigger); input != NULL; input = pcmk__xml_next(input)) { pcmk__graph_action_t *new_input = unpack_action(new_synapse, input); if (new_input == NULL) { continue; } crm_trace("Adding input %d to synapse %d", new_input->id, new_synapse->id); new_synapse->inputs = g_list_append(new_synapse->inputs, new_input); } } } return new_synapse; } /*! * \internal * \brief Unpack transition graph XML * * \param[in] xml_graph Transition graph XML to unpack * \param[in] reference Where the XML came from (for logging) * * \return Newly allocated transition graph on success, NULL otherwise * \note The caller is responsible for freeing the return value using * pcmk__free_graph(). * \note The XML is expected to be structured like: ... ... */ pcmk__graph_t * pcmk__unpack_graph(xmlNode *xml_graph, const char *reference) { pcmk__graph_t *new_graph = NULL; const char *t_id = NULL; const char *time = NULL; new_graph = calloc(1, sizeof(pcmk__graph_t)); if (new_graph == NULL) { return NULL; } new_graph->source = strdup((reference == NULL)? "unknown" : reference); if (new_graph->source == NULL) { free(new_graph); return NULL; } new_graph->id = -1; new_graph->abort_priority = 0; new_graph->network_delay = 0; new_graph->stonith_timeout = 0; new_graph->completion_action = tg_done; // Parse top-level attributes from if (xml_graph != NULL) { t_id = crm_element_value(xml_graph, "transition_id"); CRM_CHECK(t_id != NULL, free(new_graph); return NULL); pcmk__scan_min_int(t_id, &(new_graph->id), -1); time = crm_element_value(xml_graph, "cluster-delay"); CRM_CHECK(time != NULL, free(new_graph); return NULL); new_graph->network_delay = crm_parse_interval_spec(time); time = crm_element_value(xml_graph, "stonith-timeout"); if (time == NULL) { new_graph->stonith_timeout = new_graph->network_delay; } else { new_graph->stonith_timeout = crm_parse_interval_spec(time); } // Use 0 (dynamic limit) as default/invalid, -1 (no limit) as minimum t_id = crm_element_value(xml_graph, "batch-limit"); if ((t_id == NULL) || (pcmk__scan_min_int(t_id, &(new_graph->batch_limit), -1) != pcmk_rc_ok)) { new_graph->batch_limit = 0; } t_id = crm_element_value(xml_graph, "migration-limit"); pcmk__scan_min_int(t_id, &(new_graph->migration_limit), -1); } // Unpack each child element for (xmlNode *synapse_xml = first_named_child(xml_graph, "synapse"); synapse_xml != NULL; synapse_xml = crm_next_same_xml(synapse_xml)) { - synapse_t *new_synapse = unpack_synapse(new_graph, synapse_xml); + pcmk__graph_synapse_t *new_synapse = unpack_synapse(new_graph, + synapse_xml); if (new_synapse != NULL) { new_graph->synapses = g_list_append(new_graph->synapses, new_synapse); } } crm_debug("Unpacked transition %d from %s: %d actions in %d synapses", new_graph->id, new_graph->source, new_graph->num_actions, new_graph->num_synapses); return new_graph; } /* * Functions for freeing transition graph objects */ /*! * \internal * \brief Free a transition graph action object * * \param[in] user_data Action to free */ static void free_graph_action(gpointer user_data) { pcmk__graph_action_t *action = user_data; if (action->timer != 0) { crm_warn("Cancelling timer for graph action %d", action->id); g_source_remove(action->timer); } if (action->params != NULL) { g_hash_table_destroy(action->params); } free_xml(action->xml); free(action); } /*! * \internal * \brief Free a transition graph synapse object * * \param[in] user_data Synapse to free */ static void free_graph_synapse(gpointer user_data) { - synapse_t *synapse = user_data; + pcmk__graph_synapse_t *synapse = user_data; g_list_free_full(synapse->actions, free_graph_action); g_list_free_full(synapse->inputs, free_graph_action); free(synapse); } /*! * \internal * \brief Free a transition graph object * * \param[in] graph Transition graph to free */ void pcmk__free_graph(pcmk__graph_t *graph) { if (graph != NULL) { g_list_free_full(graph->synapses, free_graph_synapse); free(graph->source); free(graph); } } /* * Other transition graph utilities */ /*! * \internal * \brief Synthesize an executor event from a graph action * * \param[in] resource If not NULL, use greater call ID than in this XML * \param[in] action Graph action * \param[in] status What to use as event execution status * \param[in] rc What to use as event exit status * \param[in] exit_reason What to use as event exit reason * * \return Newly allocated executor event on success, or NULL otherwise */ lrmd_event_data_t * pcmk__event_from_graph_action(xmlNode *resource, pcmk__graph_action_t *action, int status, int rc, const char *exit_reason) { lrmd_event_data_t *op = NULL; GHashTableIter iter; const char *name = NULL; const char *value = NULL; xmlNode *action_resource = NULL; CRM_CHECK(action != NULL, return NULL); CRM_CHECK(action->type == pcmk__rsc_graph_action, return NULL); action_resource = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "invalid"); return NULL); op = lrmd_new_event(ID(action_resource), crm_element_value(action->xml, XML_LRM_ATTR_TASK), action->interval_ms); lrmd__set_result(op, rc, status, exit_reason); op->t_run = time(NULL); op->t_rcchange = op->t_run; op->params = pcmk__strkey_table(free, free); g_hash_table_iter_init(&iter, action->params); while (g_hash_table_iter_next(&iter, (void **)&name, (void **)&value)) { g_hash_table_insert(op->params, strdup(name), strdup(value)); } for (xmlNode *xop = pcmk__xml_first_child(resource); xop != NULL; xop = pcmk__xml_next(xop)) { int tmp = 0; crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); crm_debug("Got call_id=%d for %s", tmp, ID(resource)); if (tmp > op->call_id) { op->call_id = tmp; } } op->call_id++; return op; } diff --git a/lib/pacemaker/pcmk_graph_logging.c b/lib/pacemaker/pcmk_graph_logging.c index 486b9c9e26..fedc9ead59 100644 --- a/lib/pacemaker/pcmk_graph_logging.c +++ b/lib/pacemaker/pcmk_graph_logging.c @@ -1,210 +1,211 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include /*! * \internal * \brief Return text equivalent of an enum transition_status for logging * * \param[in] state Transition status * * \return Human-readable text equivalent of \p state */ const char * pcmk__graph_status2text(enum transition_status state) { switch (state) { case transition_active: return "active"; case transition_pending: return "pending"; case transition_complete: return "complete"; case transition_terminated: return "terminated"; } return "unknown"; } static const char * actiontype2text(enum pcmk__graph_action_type type) { switch (type) { case pcmk__pseudo_graph_action: return "pseudo"; case pcmk__rsc_graph_action: return "resource"; case pcmk__cluster_graph_action: return "cluster"; } return "invalid"; } /*! * \internal * \brief Find a transition graph action by ID * * \param[in] graph Transition graph to search * \param[in] id Action ID to search for * * \return Transition graph action corresponding to \p id, or NULL if none */ static pcmk__graph_action_t * find_graph_action_by_id(pcmk__graph_t *graph, int id) { if (graph == NULL) { return NULL; } for (GList *sIter = graph->synapses; sIter != NULL; sIter = sIter->next) { - synapse_t *synapse = (synapse_t *) sIter->data; + pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) sIter->data; for (GList *aIter = synapse->actions; aIter != NULL; aIter = aIter->next) { pcmk__graph_action_t *action = (pcmk__graph_action_t *) aIter->data; if (action->id == id) { return action; } } } return NULL; } const char * -synapse_state_str(synapse_t *synapse) +synapse_state_str(pcmk__graph_synapse_t *synapse) { if (pcmk_is_set(synapse->flags, pcmk__synapse_failed)) { return "Failed"; } else if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) { return "Completed"; } else if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { return "In-flight"; } else if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) { return "Ready"; } return "Pending"; } // List action IDs of inputs in graph that haven't completed successfully static char * -synapse_pending_inputs(pcmk__graph_t *graph, synapse_t *synapse) +synapse_pending_inputs(pcmk__graph_t *graph, pcmk__graph_synapse_t *synapse) { char *pending = NULL; size_t pending_len = 0; for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { pcmk__graph_action_t *input = (pcmk__graph_action_t *) lpc->data; if (pcmk_is_set(input->flags, pcmk__graph_action_failed)) { pcmk__add_word(&pending, &pending_len, ID(input->xml)); } else if (pcmk_is_set(input->flags, pcmk__graph_action_confirmed)) { // Confirmed successful inputs are not pending } else if (find_graph_action_by_id(graph, input->id) != NULL) { // In-flight or pending pcmk__add_word(&pending, &pending_len, ID(input->xml)); } } if (pending == NULL) { pending = strdup("none"); } return pending; } // Log synapse inputs that aren't in graph static void log_unresolved_inputs(unsigned int log_level, pcmk__graph_t *graph, - synapse_t *synapse) + pcmk__graph_synapse_t *synapse) { for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { pcmk__graph_action_t *input = (pcmk__graph_action_t *) lpc->data; const char *key = crm_element_value(input->xml, XML_LRM_ATTR_TASK_KEY); const char *host = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); if (find_graph_action_by_id(graph, input->id) == NULL) { do_crm_log(log_level, " * [Input %2d]: Unresolved dependency %s op %s%s%s", input->id, actiontype2text(input->type), key, (host? " on " : ""), (host? host : "")); } } } static void -log_synapse_action(unsigned int log_level, synapse_t *synapse, +log_synapse_action(unsigned int log_level, pcmk__graph_synapse_t *synapse, pcmk__graph_action_t *action, const char *pending_inputs) { const char *key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *host = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); char *desc = crm_strdup_printf("%s %s op %s", synapse_state_str(synapse), actiontype2text(action->type), key); do_crm_log(log_level, "[Action %4d]: %-50s%s%s (priority: %d, waiting: %s)", action->id, desc, (host? " on " : ""), (host? host : ""), synapse->priority, pending_inputs); free(desc); } static void -log_synapse(unsigned int log_level, pcmk__graph_t *graph, synapse_t *synapse) +log_synapse(unsigned int log_level, pcmk__graph_t *graph, + pcmk__graph_synapse_t *synapse) { char *pending = NULL; if (!pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { pending = synapse_pending_inputs(graph, synapse); } for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { log_synapse_action(log_level, synapse, (pcmk__graph_action_t *) lpc->data, pending); } free(pending); if (!pcmk_is_set(synapse->flags, pcmk__synapse_executed)) { log_unresolved_inputs(log_level, graph, synapse); } } void pcmk__log_graph_action(int log_level, pcmk__graph_action_t *action) { log_synapse(log_level, NULL, action->synapse); } void pcmk__log_graph(unsigned int log_level, pcmk__graph_t *graph) { if ((graph == NULL) || (graph->num_actions == 0)) { if (log_level == LOG_TRACE) { crm_debug("Empty transition graph"); } return; } do_crm_log(log_level, "Graph %d with %d actions:" " batch-limit=%d jobs, network-delay=%ums", graph->id, graph->num_actions, graph->batch_limit, graph->network_delay); for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { - log_synapse(log_level, graph, (synapse_t *) lpc->data); + log_synapse(log_level, graph, (pcmk__graph_synapse_t *) lpc->data); } }