diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c index e9dbbdc6c8..1a42983135 100644 --- a/daemons/controld/controld_callbacks.c +++ b/daemons/controld/controld_callbacks.c @@ -1,361 +1,361 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include /* From join_dc... */ extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); void crmd_ha_msg_filter(xmlNode * msg) { if (AM_I_DC) { const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) { const char *from = crm_element_value(msg, F_ORIG); if (!pcmk__str_eq(from, fsa_our_uname, pcmk__str_casei)) { int level = LOG_INFO; const char *op = crm_element_value(msg, F_CRM_TASK); /* make sure the election happens NOW */ if (fsa_state != S_ELECTION) { ha_msg_input_t new_input; level = LOG_WARNING; new_input.msg = msg; register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input, __func__); } do_crm_log(level, "Another DC detected: %s (op=%s)", from, op); goto done; } } } else { const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) { return; } } /* crm_log_xml_trace("HA[inbound]", msg); */ route_message(C_HA_MESSAGE, msg); done: trigger_fsa(); } /*! * \internal * \brief Check whether a node is online * * \param[in] node Node to check * * \retval -1 if completely dead * \retval 0 if partially alive * \retval 1 if completely alive */ static int node_alive(const crm_node_t *node) { if (pcmk_is_set(node->flags, crm_remote_node)) { // Pacemaker Remote nodes can't be partially alive return pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei) ? 1: -1; } else if (crm_is_peer_active(node)) { // Completely up cluster node: both cluster member and peer return 1; } else if (!pcmk_is_set(node->processes, crm_get_cluster_proc()) && !pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) { // Completely down cluster node: neither cluster member nor peer return -1; } // Partially up cluster node: only cluster member or only peer return 0; } #define state_text(state) ((state)? (const char *)(state) : "in unknown state") bool controld_dc_left = false; void peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { uint32_t old = 0; bool appeared = FALSE; bool is_remote = pcmk_is_set(node->flags, crm_remote_node); /* The controller waits to receive some information from the membership * layer before declaring itself operational. If this is being called for a * cluster node, indicate that we have it. */ if (!is_remote) { controld_set_fsa_input_flags(R_PEER_DATA); } if (type == crm_status_processes && pcmk_is_set(node->processes, crm_get_cluster_proc()) && !AM_I_DC && !is_remote) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in crmd_ha_msg_filter() */ xmlNode *query = create_request(CRM_OP_HELLO, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); crm_debug("Sending hello to node %u so that it learns our node name", node->id); send_cluster_message(node, crm_msg_crmd, query, FALSE); free_xml(query); } if (node->uname == NULL) { return; } switch (type) { case crm_status_uname: /* If we've never seen the node, then it also won't be in the status section */ crm_info("%s node %s is now %s", (is_remote? "Remote" : "Cluster"), node->uname, state_text(node->state)); return; case crm_status_nstate: /* This callback should not be called unless the state actually * changed, but here's a failsafe just in case. */ CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei), return); crm_info("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), node->uname, state_text(node->state), state_text(data)); if (pcmk__str_eq(CRM_NODE_MEMBER, node->state, pcmk__str_casei)) { appeared = TRUE; if (!is_remote) { remove_stonith_cleanup(node->uname); } } else { controld_remove_voter(node->uname); } crmd_alert_node_event(node); break; case crm_status_processes: CRM_CHECK(data != NULL, return); old = *(const uint32_t *)data; appeared = pcmk_is_set(node->processes, crm_get_cluster_proc()); crm_info("Node %s is %s a peer " CRM_XS " DC=%s old=0x%07x new=0x%07x", node->uname, (appeared? "now" : "no longer"), (AM_I_DC? "true" : (fsa_our_dc? fsa_our_dc : "")), old, node->processes); if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) { /* Peer status did not change. This should not be possible, * since we don't track process flags other than peer status. */ crm_trace("Process flag 0x%7x did not change from 0x%7x to 0x%7x", crm_get_cluster_proc(), old, node->processes); return; } if (!appeared) { controld_remove_voter(node->uname); } if (!pcmk_is_set(fsa_input_register, R_CIB_CONNECTED)) { crm_trace("Ignoring peer status change because not connected to CIB"); return; } else if (fsa_state == S_STOPPING) { crm_trace("Ignoring peer status change because stopping"); return; } if (pcmk__str_eq(node->uname, fsa_our_uname, pcmk__str_casei) && !appeared) { /* Did we get evicted? */ crm_notice("Our peer connection failed"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL); } else if (pcmk__str_eq(node->uname, fsa_our_dc, pcmk__str_casei) && crm_is_peer_active(node) == FALSE) { /* Did the DC leave us? */ crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); /* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't * want to fence it. Newer DCs will send their shutdown request * to all peers, who will update the DC's expected state to * down, thus avoiding fencing. We can safely erase the DC's * transient attributes when it leaves in that case. However, * the only way to avoid fencing older DCs is to leave the * transient attributes intact until it rejoins. */ if (compare_version(fsa_our_dc_version, "3.0.9") > 0) { controld_delete_node_state(node->uname, controld_section_attrs, cib_scope_local); } } else if (AM_I_DC || controld_dc_left || (fsa_our_dc == NULL)) { /* This only needs to be done once, so normally the DC should do * it. However if there is no DC, every node must do it, since * there is no other way to ensure some one node does it. */ if (appeared) { te_trigger_stonith_history_sync(FALSE); } else { controld_delete_node_state(node->uname, controld_section_attrs, cib_scope_local); } } break; } if (AM_I_DC) { xmlNode *update = NULL; int flags = node_update_peer; int alive = node_alive(node); crm_action_t *down = match_down_event(node->uuid); crm_trace("Alive=%d, appeared=%d, down=%d", alive, appeared, (down? down->id : -1)); if (appeared && (alive > 0) && !is_remote) { register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } if (down) { const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK); if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { /* tengine_stonith_callback() confirms fence actions */ crm_trace("Updating CIB %s fencer reported fencing of %s complete", (down->confirmed? "after" : "before"), node->uname); } else if (!appeared && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { // Shutdown actions are immediately confirmed (i.e. no_wait) if (!is_remote) { flags |= node_update_join | node_update_expected; crmd_peer_down(node, FALSE); check_join_state(fsa_state, __func__); } if (alive >= 0) { crm_info("%s of peer %s is in progress " CRM_XS " action=%d", task, node->uname, down->id); } else { crm_notice("%s of peer %s is complete " CRM_XS " action=%d", task, node->uname, down->id); - update_graph(transition_graph, down); + pcmk__update_graph(transition_graph, down); trigger_graph(); } } else { crm_trace("Node %s is %s, was expected to %s (op %d)", node->uname, ((alive > 0)? "alive" : ((alive < 0)? "dead" : "partially alive")), task, down->id); } } else if (appeared == FALSE) { crm_warn("Stonith/shutdown of node %s was not expected", node->uname); if (!is_remote) { crm_update_peer_join(__func__, node, crm_join_none); check_join_state(fsa_state, __func__); } abort_transition(INFINITY, tg_restart, "Node failure", NULL); fail_incompletable_actions(transition_graph, node->uuid); } else { crm_trace("Node %s came up, was not expected to be down", node->uname); } if (is_remote) { /* A pacemaker_remote node won't have its cluster status updated * in the CIB by membership-layer callbacks, so do it here. */ flags |= node_update_cluster; /* Trigger resource placement on newly integrated nodes */ if (appeared) { abort_transition(INFINITY, tg_restart, "pacemaker_remote node integrated", NULL); } } /* Update the CIB node state */ update = create_node_state_update(node, flags, NULL, __func__); if (update == NULL) { crm_debug("Node state update not yet possible for %s", node->uname); } else { fsa_cib_anon_update(XML_CIB_TAG_STATUS, update); } free_xml(update); } trigger_fsa(); } void crmd_cib_connection_destroy(gpointer user_data) { CRM_CHECK(user_data == fsa_cib_conn,;); crm_trace("Invoked"); trigger_fsa(); fsa_cib_conn->state = cib_disconnected; if (!pcmk_is_set(fsa_input_register, R_CIB_CONNECTED)) { crm_info("Connection to the CIB manager terminated"); return; } // @TODO This should trigger a reconnect, not a shutdown crm_crit("Lost connection to the CIB manager, shutting down"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_CIB_CONNECTED); return; } gboolean crm_fsa_trigger(gpointer user_data) { crm_trace("Invoked (queue len: %d)", g_list_length(fsa_message_queue)); s_crmd_fsa(C_FSA_INTERNAL); crm_trace("Exited (queue len: %d)", g_list_length(fsa_message_queue)); return TRUE; } diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index 6c2a6c5508..fd3f5b67f6 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -1,997 +1,997 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event); /* * stonith failure counting * * We don't want to get stuck in a permanent fencing loop. Keep track of the * number of fencing failures for each target node, and the most we'll restart a * transition for. */ struct st_fail_rec { int count; }; static bool fence_reaction_panic = FALSE; static unsigned long int stonith_max_attempts = 10; static GHashTable *stonith_failures = NULL; void update_stonith_max_attempts(const char *value) { stonith_max_attempts = char2score(value); if (stonith_max_attempts < 1UL) { stonith_max_attempts = 10UL; } } void set_fence_reaction(const char *reaction_s) { if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) { fence_reaction_panic = TRUE; } else { if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) { crm_warn("Invalid value '%s' for %s, using 'stop'", reaction_s, XML_CONFIG_ATTR_FENCE_REACTION); } fence_reaction_panic = FALSE; } } static gboolean too_many_st_failures(const char *target) { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *value = NULL; if (stonith_failures == NULL) { return FALSE; } if (target == NULL) { g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { if (value->count >= stonith_max_attempts) { target = (const char*)key; goto too_many; } } } else { value = g_hash_table_lookup(stonith_failures, target); if ((value != NULL) && (value->count >= stonith_max_attempts)) { goto too_many; } } return FALSE; too_many: crm_warn("Too many failures (%d) to fence %s, giving up", value->count, target); return TRUE; } /*! * \internal * \brief Reset a stonith fail count * * \param[in] target Name of node to reset, or NULL for all */ void st_fail_count_reset(const char *target) { if (stonith_failures == NULL) { return; } if (target) { struct st_fail_rec *rec = NULL; rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count = 0; } } else { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *rec = NULL; g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rec)) { rec->count = 0; } } } static void st_fail_count_increment(const char *target) { struct st_fail_rec *rec = NULL; if (stonith_failures == NULL) { stonith_failures = pcmk__strkey_table(free, free); } rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count++; } else { rec = malloc(sizeof(struct st_fail_rec)); if(rec == NULL) { return; } rec->count = 1; g_hash_table_insert(stonith_failures, strdup(target), rec); } } /* end stonith fail count functions */ static void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if (rc < pcmk_ok) { crm_err("Fencing update %d for %s: failed - %s (%d)", call_id, (char *)user_data, pcmk_strerror(rc), rc); crm_log_xml_warn(msg, "Failed update"); abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL); } else { crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); } } static void send_stonith_update(crm_action_t *action, const char *target, const char *uuid) { int rc = pcmk_ok; crm_node_t *peer = NULL; /* We (usually) rely on the membership layer to do node_update_cluster, * and the peer status callback to do node_update_peer, because the node * might have already rejoined before we get the stonith result here. */ int flags = node_update_join | node_update_expected; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = NULL; CRM_CHECK(target != NULL, return); CRM_CHECK(uuid != NULL, return); /* Make sure the membership and join caches are accurate */ peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return); if (peer->state == NULL) { /* Usually, we rely on the membership layer to update the cluster state * in the CIB. However, if the node has never been seen, do it here, so * the node is not considered unclean. */ flags |= node_update_cluster; } if (peer->uuid == NULL) { crm_info("Recording uuid '%s' for node '%s'", uuid, target); peer->uuid = strdup(uuid); } crmd_peer_down(peer, TRUE); /* Generate a node state update for the CIB */ node_state = create_node_state_update(peer, flags, NULL, __func__); /* we have to mark whether or not remote nodes have already been fenced */ if (peer->flags & crm_remote_node) { char *now_s = pcmk__ttoa(time(NULL)); crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s); free(now_s); } /* Force our known ID */ crm_xml_add(node_state, XML_ATTR_UUID, uuid); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, cib_quorum_override | cib_scope_local | cib_can_create); /* Delay processing the trigger until the update completes */ crm_debug("Sending fencing update %d for %s", rc, target); fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated); /* Make sure it sticks */ /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */ controld_delete_node_state(peer->uname, controld_section_all, cib_scope_local); free_xml(node_state); return; } /*! * \internal * \brief Abort transition due to stonith failure * * \param[in] abort_action Whether to restart or stop transition * \param[in] target Don't restart if this (NULL for any) has too many failures * \param[in] reason Log this stonith action XML as abort reason (or NULL) */ static void abort_for_stonith_failure(enum transition_action abort_action, const char *target, xmlNode *reason) { /* If stonith repeatedly fails, we eventually give up on starting a new * transition for that reason. */ if ((abort_action != tg_stop) && too_many_st_failures(target)) { abort_action = tg_stop; } abort_transition(INFINITY, abort_action, "Stonith failed", reason); } /* * stonith cleanup list * * If the DC is shot, proper notifications might not go out. * The stonith cleanup list allows the cluster to (re-)send * notifications once a new DC is elected. */ static GList *stonith_cleanup_list = NULL; /*! * \internal * \brief Add a node to the stonith cleanup list * * \param[in] target Name of node to add */ void add_stonith_cleanup(const char *target) { stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target)); } /*! * \internal * \brief Remove a node from the stonith cleanup list * * \param[in] Name of node to remove */ void remove_stonith_cleanup(const char *target) { GList *iter = stonith_cleanup_list; while (iter != NULL) { GList *tmp = iter; char *iter_name = tmp->data; iter = iter->next; if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) { crm_trace("Removing %s from the cleanup list", iter_name); stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); free(iter_name); } } } /*! * \internal * \brief Purge all entries from the stonith cleanup list */ void purge_stonith_cleanup() { if (stonith_cleanup_list) { GList *iter = NULL; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_info("Purging %s from stonith cleanup list", target); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } } /*! * \internal * \brief Send stonith updates for all entries in cleanup list, then purge it */ void execute_stonith_cleanup() { GList *iter; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_node_t *target_node = crm_get_peer(0, target); const char *uuid = crm_peer_uuid(target_node); crm_notice("Marking %s, target of a previous stonith action, as clean", target); send_stonith_update(NULL, target, uuid); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } /* end stonith cleanup list functions */ /* stonith API client * * Functions that need to interact directly with the fencer via its API */ static stonith_t *stonith_api = NULL; static crm_trigger_t *stonith_reconnect = NULL; static char *te_client_id = NULL; static gboolean fail_incompletable_stonith(crm_graph_t *graph) { GList *lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GList *lpc2 = NULL; synapse_t *synapse = (synapse_t *) lpc->data; if (synapse->confirmed) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { crm_action_t *action = (crm_action_t *) lpc2->data; if (action->type != action_type_crm || action->confirmed) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (task && pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { action->failed = TRUE; last_action = action->xml; - update_graph(graph, action); + pcmk__update_graph(graph, action); crm_notice("Failing action %d (%s): fencer terminated", action->id, ID(action->xml)); } } } if (last_action != NULL) { crm_warn("Fencer failure resulted in unrunnable actions"); abort_for_stonith_failure(tg_restart, NULL, last_action); return TRUE; } return FALSE; } static void tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e) { te_cleanup_stonith_history_sync(st, FALSE); if (pcmk_is_set(fsa_input_register, R_ST_REQUIRED)) { crm_crit("Fencing daemon connection failed"); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencing daemon disconnected"); } if (stonith_api) { /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(st); } stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT); stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE); stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED); } if (AM_I_DC) { fail_incompletable_stonith(transition_graph); trigger_graph(); } } static void tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event) { if (te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } if (st_event == NULL) { crm_err("Notify data not found"); return; } crmd_alert_fencing_op(st_event); if ((st_event->result == pcmk_ok) && pcmk__str_eq("on", st_event->action, pcmk__str_casei)) { crm_notice("%s was successfully unfenced by %s (at the request of %s)", st_event->target, st_event->executioner? st_event->executioner : "", st_event->origin); /* TODO: Hook up st_event->device */ return; } else if (pcmk__str_eq("on", st_event->action, pcmk__str_casei)) { crm_err("Unfencing of %s by %s failed: %s (%d)", st_event->target, st_event->executioner? st_event->executioner : "", pcmk_strerror(st_event->result), st_event->result); return; } else if ((st_event->result == pcmk_ok) && pcmk__str_eq(st_event->target, fsa_our_uname, pcmk__str_none)) { /* We were notified of our own fencing. Most likely, either fencing was * misconfigured, or fabric fencing that doesn't cut cluster * communication is in use. * * Either way, shutting down the local host is a good idea, to require * administrator intervention. Also, other nodes would otherwise likely * set our status to lost because of the fencing callback and discard * our subsequent election votes as "not part of our cluster". */ crm_crit("We were allegedly just fenced by %s for %s!", st_event->executioner? st_event->executioner : "the cluster", st_event->origin); /* Dumps blackbox if enabled */ if (fence_reaction_panic) { pcmk__panic(__func__); } else { crm_exit(CRM_EX_FATAL); } return; } /* Update the count of stonith failures for this target, in case we become * DC later. The current DC has already updated its fail count in * tengine_stonith_callback(). */ if (!AM_I_DC && pcmk__str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) { if (st_event->result == pcmk_ok) { st_fail_count_reset(st_event->target); } else { st_fail_count_increment(st_event->target); } } crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s " CRM_XS " initiator=%s ref=%s", st_event->target, st_event->result == pcmk_ok ? "" : " not", st_event->action, st_event->executioner ? st_event->executioner : "", (st_event->client_origin? st_event->client_origin : ""), pcmk_strerror(st_event->result), st_event->origin, st_event->id); if (st_event->result == pcmk_ok) { crm_node_t *peer = pcmk__search_known_node_cache(0, st_event->target, CRM_GET_PEER_ANY); const char *uuid = NULL; gboolean we_are_executioner = pcmk__str_eq(st_event->executioner, fsa_our_uname, pcmk__str_casei); if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); if(AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. * Unfortunately, the controller doesn't have a simple, reliable way * to map hosts to guests. It might be possible to track this in the * peer cache via crm_remote_peer_cache_refresh(). For now, we rely * on the scheduler creating fence pseudo-events for the guests. */ if (st_event->client_origin && !pcmk__str_eq(st_event->client_origin, te_client_id, pcmk__str_casei)) { /* Abort the current transition graph if it wasn't us * that invoked stonith to fence someone */ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } /* Assume it was our leader if we don't currently have one */ } else if (pcmk__str_eq(fsa_our_dc, st_event->target, pcmk__str_null_matches | pcmk__str_casei) && !pcmk_is_set(peer->flags, crm_remote_node)) { crm_notice("Fencing target %s %s our leader", st_event->target, (fsa_our_dc? "was" : "may have been")); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (we_are_executioner) { send_stonith_update(NULL, st_event->target, uuid); } add_stonith_cleanup(st_event->target); } /* If the target is a remote node, and we host its connection, * immediately fail all monitors so it can be recovered quickly. * The connection won't necessarily drop when a remote node is fenced, * so the failure might not otherwise be detected until the next poke. */ if (pcmk_is_set(peer->flags, crm_remote_node)) { remote_ra_fail(st_event->target); } crmd_peer_down(peer, TRUE); } } /*! * \brief Connect to fencer * * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop * * \return TRUE * \note If user_data is NULL, this will wait 2s between attempts, for up to * 30 attempts, meaning the controller could be blocked as long as 58s. */ static gboolean te_connect_stonith(gpointer user_data) { int rc = pcmk_ok; if (stonith_api == NULL) { stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return TRUE; } } if (stonith_api->state != stonith_disconnected) { crm_trace("Already connected to fencer, no need to retry"); return TRUE; } if (user_data == NULL) { // Blocking (retry failures now until successful) rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 30 attempts: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } } else { // Non-blocking (retry failures later in main loop) rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); if (rc != pcmk_ok) { if (pcmk_is_set(fsa_input_register, R_ST_REQUIRED)) { crm_notice("Fencer connection failed (will retry): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencer connection failed (ignoring because no longer required): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } return TRUE; } } if (rc == pcmk_ok) { stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE, tengine_stonith_notify); stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED, tengine_stonith_history_synced); te_trigger_stonith_history_sync(TRUE); crm_notice("Fencer successfully connected"); } return TRUE; } /*! \internal \brief Schedule fencer connection attempt in main loop */ void controld_trigger_fencer_connect() { if (stonith_reconnect == NULL) { stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, GINT_TO_POINTER(TRUE)); } controld_set_fsa_input_flags(R_ST_REQUIRED); mainloop_set_trigger(stonith_reconnect); } void controld_disconnect_fencer(bool destroy) { if (stonith_api) { // Prevent fencer connection from coming up again controld_clear_fsa_input_flags(R_ST_REQUIRED); if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(stonith_api); } stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT); stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE); stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED); } if (destroy) { if (stonith_api) { stonith_api->cmds->free(stonith_api); stonith_api = NULL; } if (stonith_reconnect) { mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL; } if (te_client_id) { free(te_client_id); te_client_id = NULL; } } } static gboolean do_stonith_history_sync(gpointer user_data) { if (stonith_api && (stonith_api->state != stonith_disconnected)) { stonith_history_t *history = NULL; te_cleanup_stonith_history_sync(stonith_api, FALSE); stonith_api->cmds->history(stonith_api, st_opt_sync_call | st_opt_broadcast, NULL, &history, 5); stonith_history_free(history); return TRUE; } else { crm_info("Skip triggering stonith history-sync as stonith is disconnected"); return FALSE; } } static void tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) { char *uuid = NULL; int stonith_id = -1; int transition_id = -1; crm_action_t *action = NULL; int call_id = data->call_id; int rc = data->rc; char *userdata = data->userdata; CRM_CHECK(userdata != NULL, return); crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata, pcmk_strerror(rc), rc); if (AM_I_DC == FALSE) { return; } /* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */ /* op->call_id, op->optype, op->node_name, op->op_result, */ /* (char *)op->node_list, op->private_data); */ /* filter out old STONITH actions */ CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, NULL), goto bail); if (transition_graph->complete || stonith_id < 0 || !pcmk__str_eq(uuid, te_uuid, pcmk__str_casei) || transition_graph->id != transition_id) { crm_info("Ignoring STONITH action initiated outside of the current transition"); goto bail; } action = controld_get_action(stonith_id); if (action == NULL) { crm_err("Stonith action not matched"); goto bail; } stop_te_timer(action->timer); if (rc == pcmk_ok) { const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); const char *op = crm_meta_value(action->params, "stonith_action"); crm_info("Stonith operation %d for %s passed", call_id, target); if (action->confirmed == FALSE) { te_action_confirmed(action, NULL); if (pcmk__str_eq("on", op, pcmk__str_casei)) { const char *value = NULL; char *now = pcmk__ttoa(time(NULL)); gboolean is_remote_node = FALSE; /* This check is not 100% reliable, since this node is not * guaranteed to have the remote node cached. However, it * doesn't have to be reliable, since the attribute manager can * learn a node's "remoteness" by other means sooner or later. * This allows it to learn more quickly if this node does have * the information. */ if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) { is_remote_node = TRUE; } update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, is_remote_node); free(now); value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL); update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, is_remote_node); value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE); update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, is_remote_node); } else if (action->sent_update == FALSE) { send_stonith_update(action, target, uuid); action->sent_update = TRUE; } } st_fail_count_reset(target); } else { const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); enum transition_action abort_action = tg_restart; action->failed = TRUE; crm_notice("Stonith operation %d for %s failed (%s): aborting transition.", call_id, target, pcmk_strerror(rc)); /* If no fence devices were available, there's no use in immediately * checking again, so don't start a new transition in that case. */ if (rc == -ENODEV) { crm_warn("No devices found in cluster to fence %s, giving up", target); abort_action = tg_stop; } /* Increment the fail count now, so abort_for_stonith_failure() can * check it. Non-DC nodes will increment it in tengine_stonith_notify(). */ st_fail_count_increment(target); abort_for_stonith_failure(abort_action, target, NULL); } - update_graph(transition_graph, action); + pcmk__update_graph(transition_graph, action); trigger_graph(); bail: free(userdata); free(uuid); return; } static int fence_with_delay(const char *target, const char *type, const char *delay) { uint32_t options = st_opt_none; // Group of enum stonith_call_options int timeout_sec = (int) (transition_graph->stonith_timeout / 1000); int delay_i; if (crmd_join_phase_count(crm_join_confirmed) == 1) { stonith__set_call_options(options, target, st_opt_allow_suicide); } pcmk__scan_min_int(delay, &delay_i, 0); return stonith_api->cmds->fence_with_delay(stonith_api, options, target, type, timeout_sec, 0, delay_i); } gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action) { int rc = 0; const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; char *transition_key = NULL; const char *priority_delay = NULL; gboolean invalid_action = FALSE; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = crm_meta_value(action->params, "stonith_action"); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return FALSE; } priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); crm_notice("Requesting fencing (%s) of node %s " CRM_XS " action=%s timeout=%u%s%s", type, target, id, transition_graph->stonith_timeout, priority_delay ? " priority_delay=" : "", priority_delay ? priority_delay : ""); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); rc = fence_with_delay(target, type, priority_delay); transition_key = pcmk__transition_key(transition_graph->id, action->id, 0, te_uuid), stonith_api->cmds->register_callback(stonith_api, rc, (int) (transition_graph->stonith_timeout / 1000), st_opt_timeout_updates, transition_key, "tengine_stonith_callback", tengine_stonith_callback); return TRUE; } bool controld_verify_stonith_watchdog_timeout(const char *value) { gboolean rv = TRUE; if (stonith_api && (stonith_api->state != stonith_disconnected) && stonith__watchdog_fencing_enabled_for_node_api(stonith_api, fsa_our_uname)) { rv = pcmk__valid_sbd_timeout(value); } return rv; } /* end stonith API client functions */ /* * stonith history synchronization * * Each node's fencer keeps track of a cluster-wide fencing history. When a node * joins or leaves, we need to synchronize the history across all nodes. */ static crm_trigger_t *stonith_history_sync_trigger = NULL; static mainloop_timer_t *stonith_history_sync_timer_short = NULL; static mainloop_timer_t *stonith_history_sync_timer_long = NULL; void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers) { if (free_timers) { mainloop_timer_del(stonith_history_sync_timer_short); stonith_history_sync_timer_short = NULL; mainloop_timer_del(stonith_history_sync_timer_long); stonith_history_sync_timer_long = NULL; } else { mainloop_timer_stop(stonith_history_sync_timer_short); mainloop_timer_stop(stonith_history_sync_timer_long); } if (st) { st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED); } } static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event) { te_cleanup_stonith_history_sync(st, FALSE); crm_debug("Fence-history synced - cancel all timers"); } static gboolean stonith_history_sync_set_trigger(gpointer user_data) { mainloop_set_trigger(stonith_history_sync_trigger); return FALSE; } void te_trigger_stonith_history_sync(bool long_timeout) { /* trigger a sync in 5s to give more nodes the * chance to show up so that we don't create * unnecessary stonith-history-sync traffic * * the long timeout of 30s is there as a fallback * so that after a successful connection to fenced * we will wait for 30s for the DC to trigger a * history-sync * if this doesn't happen we trigger a sync locally * (e.g. fenced segfaults and is restarted by pacemakerd) */ /* as we are finally checking the stonith-connection * in do_stonith_history_sync we should be fine * leaving stonith_history_sync_time & stonith_history_sync_trigger * around */ if (stonith_history_sync_trigger == NULL) { stonith_history_sync_trigger = mainloop_add_trigger(G_PRIORITY_LOW, do_stonith_history_sync, NULL); } if (long_timeout) { if(stonith_history_sync_timer_long == NULL) { stonith_history_sync_timer_long = mainloop_timer_add("history_sync_long", 30000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 30 seconds"); mainloop_timer_start(stonith_history_sync_timer_long); } else { if(stonith_history_sync_timer_short == NULL) { stonith_history_sync_timer_short = mainloop_timer_add("history_sync_short", 5000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); mainloop_timer_start(stonith_history_sync_timer_short); } } /* end stonith history synchronization functions */ diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c index 2204b6ef34..38505bf2a8 100644 --- a/daemons/controld/controld_te_actions.c +++ b/daemons/controld/controld_te_actions.c @@ -1,643 +1,643 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // lrmd_event_data_t, lrmd_free_event() #include #include #include #include #include char *te_uuid = NULL; GHashTable *te_targets = NULL; void send_rsc_command(crm_action_t * action); static void te_update_job_count(crm_action_t * action, int offset); static void te_start_action_timer(crm_graph_t * graph, crm_action_t * action) { action->timer = calloc(1, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->action = action; action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay, action_timer_callback, (void *)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo) { const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK); /* send to peers as well? */ if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) { GHashTableIter iter; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { xmlNode *cmd = NULL; if (pcmk__str_eq(fsa_our_uname, node->uname, pcmk__str_casei)) { continue; } cmd = create_request(task, pseudo->xml, node->uname, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); send_cluster_message(node, crm_msg_crmd, cmd, FALSE); free_xml(cmd); } remote_ra_process_maintenance_nodes(pseudo->xml); } else { /* Check action for Pacemaker Remote node side effects */ remote_ra_process_pseudo(pseudo->xml); } crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id, crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY)); te_action_confirmed(pseudo, graph); return TRUE; } static int get_target_rc(crm_action_t * action) { int exit_status; pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC), &exit_status, 0); return exit_status; } static gboolean te_crm_command(crm_graph_t * graph, crm_action_t * action) { char *counter = NULL; xmlNode *cmd = NULL; gboolean is_local = FALSE; const char *id = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (!router_node) { router_node = on_node; if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { const char *mode = crm_element_value(action->xml, PCMK__XA_MODE); if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) { router_node = fsa_our_uname; } } } CRM_CHECK(on_node != NULL && strlen(on_node) != 0, crm_err("Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE); if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } crm_info("Executing crm-event (%s)%s%s: %s on %s", crm_str(id), (is_local? " locally" : ""), (no_wait? " without waiting" : ""), crm_str(task), on_node); if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { /* defer until everything else completes */ crm_info("crm-event (%s) is a local shutdown", crm_str(id)); graph->completion_action = tg_shutdown; graph->abort_reason = "local shutdown"; te_action_confirmed(action, graph); return TRUE; } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { crm_node_t *peer = crm_get_peer(0, router_node); pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); } cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = pcmk__transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE); free(counter); free_xml(cmd); if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if (no_wait) { te_action_confirmed(action, graph); } else { if (action->timeout <= 0) { crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead", action->id, task, on_node, action->timeout, graph->network_delay); action->timeout = (int) graph->network_delay; } te_start_action_timer(graph, action); } return TRUE; } void controld_record_action_timeout(crm_action_t *action) { lrmd_event_data_t *op = NULL; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *xml_op = NULL; xmlNode *action_rsc = NULL; int rc = pcmk_ok; const char *rsc_id = NULL; const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override | cib_scope_local; int target_rc = get_target_rc(action); crm_warn("%s %d: %s on %s timed out", crm_element_name(action->xml), action->id, task_uuid, target); action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if (action_rsc == NULL) { return; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE); crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS); crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER); /* If the executor gets a timeout while waiting for the action to complete, * that will be reported via the usual callback. This timeout means that we * didn't hear from the executor or the controller that relayed the action * to the executor. * * @TODO Using PCMK_OCF_UNKNOWN_ERROR instead of PCMK_OCF_TIMEOUT is one way * to distinguish those situations, but perhaps PCMK_OCF_TIMEOUT would be * preferable anyway. */ - op = convert_graph_action(NULL, action, PCMK_LRM_OP_TIMEOUT, - PCMK_OCF_UNKNOWN_ERROR); + op = pcmk__event_from_graph_action(NULL, action, PCMK_LRM_OP_TIMEOUT, + PCMK_OCF_UNKNOWN_ERROR); op->call_id = -1; op->user_data = pcmk__transition_key(transition_graph->id, action->id, target_rc, te_uuid); xml_op = pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target, __func__, LOG_INFO); lrmd_free_event(op); crm_log_xml_trace(xml_op, "Action timeout"); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options); fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated); free_xml(state); crm_trace("Sent CIB update (call ID %d) for timeout of action %d (%s on %s)", rc, action->id, task_uuid, target); action->sent_update = TRUE; } static gboolean te_rsc_command(crm_graph_t * graph, crm_action_t * action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; gboolean is_local = FALSE; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE); if (!router_node) { router_node = on_node; } counter = pcmk__transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d", task, task_uuid, (is_local? " locally" : ""), on_node, (no_wait? " without waiting" : ""), action->id); cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); if (is_local) { /* shortcut local resource commands */ ha_msg_input_t data = { .msg = cmd, .xml = rsc_op, }; fsa_data_t msg = { .id = 0, .data = &data, .data_type = fsa_dt_ha_msg, .fsa_input = I_NULL, .fsa_cause = C_FSA_INTERNAL, .actions = A_LRM_INVOKE, .origin = __func__, }; do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg); } else { rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE); } free(counter); free_xml(cmd); action->executed = TRUE; if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if (no_wait) { crm_info("Action %d confirmed - no wait", action->id); action->confirmed = TRUE; /* Just mark confirmed. * Don't bump the job count only to immediately decrement it */ - update_graph(transition_graph, action); + pcmk__update_graph(transition_graph, action); trigger_graph(); } else if (action->confirmed == TRUE) { crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.", action->id, task, task_uuid, on_node, action->timeout); } else { if (action->timeout <= 0) { crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead", action->id, task, task_uuid, on_node, action->timeout, graph->network_delay); action->timeout = (int) graph->network_delay; } te_update_job_count(action, 1); te_start_action_timer(graph, action); } return TRUE; } struct te_peer_s { char *name; int jobs; int migrate_jobs; }; static void te_peer_free(gpointer p) { struct te_peer_s *peer = p; free(peer->name); free(peer); } void te_reset_job_counts(void) { GHashTableIter iter; struct te_peer_s *peer = NULL; if(te_targets == NULL) { te_targets = pcmk__strkey_table(NULL, te_peer_free); } g_hash_table_iter_init(&iter, te_targets); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) { peer->jobs = 0; peer->migrate_jobs = 0; } } static void te_update_job_count_on(const char *target, int offset, bool migrate) { struct te_peer_s *r = NULL; if(target == NULL || te_targets == NULL) { return; } r = g_hash_table_lookup(te_targets, target); if(r == NULL) { r = calloc(1, sizeof(struct te_peer_s)); r->name = strdup(target); g_hash_table_insert(te_targets, r->name, r); } r->jobs += offset; if(migrate) { r->migrate_jobs += offset; } crm_trace("jobs[%s] = %d", target, r->jobs); } static void te_update_job_count(crm_action_t * action, int offset) { const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); if (action->type != action_type_rsc || target == NULL) { /* No limit on these */ return; } /* if we have a router node, this means the action is performing * on a remote node. For now, we count all actions occurring on a * remote node against the job list on the cluster node hosting * the connection resources */ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); te_update_job_count_on(t1, offset, TRUE); te_update_job_count_on(t2, offset, TRUE); return; } else if (target == NULL) { target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); } te_update_job_count_on(target, offset, FALSE); } static gboolean te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const char *target) { int limit = 0; struct te_peer_s *r = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); if(target == NULL) { /* No limit on these */ return TRUE; } else if(te_targets == NULL) { return FALSE; } r = g_hash_table_lookup(te_targets, target); limit = throttle_get_job_limit(target); if(r == NULL) { r = calloc(1, sizeof(struct te_peer_s)); r->name = strdup(target); g_hash_table_insert(te_targets, r->name, r); } if(limit <= r->jobs) { crm_trace("Peer %s is over their job limit of %d (%d): deferring %s", target, limit, r->jobs, id); return FALSE; } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) { if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s", target, graph->migration_limit, r->migrate_jobs, id); return FALSE; } } crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit); return TRUE; } static gboolean te_should_perform_action(crm_graph_t * graph, crm_action_t * action) { const char *target = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (action->type != action_type_rsc) { /* No limit on these */ return TRUE; } /* if we have a router node, this means the action is performing * on a remote node. For now, we count all actions occurring on a * remote node against the job list on the cluster node hosting * the connection resources */ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); if(te_should_perform_action_on(graph, action, target) == FALSE) { return FALSE; } target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); } else if (target == NULL) { target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); } return te_should_perform_action_on(graph, action, target); } /*! * \brief Confirm a graph action (and optionally update graph) * * \param[in] action Action to confirm * \param[in] graph Update and trigger this graph (if non-NULL) */ void te_action_confirmed(crm_action_t *action, crm_graph_t *graph) { if (action->confirmed == FALSE) { if ((action->type == action_type_rsc) && (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) { te_update_job_count(action, -1); } action->confirmed = TRUE; } if (graph) { - update_graph(graph, action); + pcmk__update_graph(graph, action); trigger_graph(); } } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node, te_should_perform_action, }; void notify_crmd(crm_graph_t * graph) { const char *type = "unknown"; enum crmd_fsa_input event = I_NULL; crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state)); if (graph->complete == FALSE) { CRM_CHECK(graph->complete,); graph->complete = TRUE; } switch (graph->completion_action) { case tg_stop: type = "stop"; if (fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case tg_done: type = "done"; if (fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case tg_restart: type = "restart"; if (fsa_state == S_TRANSITION_ENGINE) { if (transition_timer->period_ms > 0) { controld_stop_timer(transition_timer); controld_start_timer(transition_timer); } else { event = I_PE_CALC; } } else if (fsa_state == S_POLICY_ENGINE) { controld_set_fsa_action_flags(A_PE_INVOKE); trigger_fsa(); } break; case tg_shutdown: type = "shutdown"; if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) { event = I_STOP; } else { crm_err("We didn't ask to be shut down, yet the scheduler is telling us to"); event = I_TERMINATE; } } crm_debug("Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason)); graph->abort_reason = NULL; graph->completion_action = tg_done; controld_clear_fsa_input_flags(R_IN_TRANSITION); if (event != I_NULL) { register_fsa_input(C_FSA_INTERNAL, event, NULL); } else if (fsa_source) { mainloop_set_trigger(fsa_source); } } diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c index 4e3e4e641f..347cb8b029 100644 --- a/daemons/controld/controld_te_callbacks.c +++ b/daemons/controld/controld_te_callbacks.c @@ -1,700 +1,700 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include /* For ONLINESTATUS etc */ #include void te_update_confirm(const char *event, xmlNode * msg); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; crm_trigger_t *transition_trigger = NULL; /* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */ #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']" // An explicit shutdown-lock of 0 means the lock has been cleared static bool shutdown_lock_cleared(xmlNode *lrm_resource) { time_t shutdown_lock = 0; return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK, &shutdown_lock) == pcmk_ok) && (shutdown_lock == 0); } static void te_update_diff_v1(const char *event, xmlNode *diff) { int lpc, max; xmlXPathObject *xpathObj = NULL; CRM_CHECK(diff != NULL, return); xml_log_patchset(LOG_TRACE, __func__, diff); if (cib_config_changed(NULL, NULL, &diff)) { abort_transition(INFINITY, tg_restart, "Non-status change", diff); goto bail; /* configuration changed */ } /* Tickets Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted); goto bail; } freeXpathObject(xpathObj); /* Tickets Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); /* Transient Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR); max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *attr = getXpathResult(xpathObj, lpc); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); const char *value = NULL; if (pcmk__str_eq(CRM_OP_PROBED, name, pcmk__str_casei)) { value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); } if (crm_is_true(value) == FALSE) { abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr); crm_log_xml_trace(attr, "Abort"); goto bail; } } freeXpathObject(xpathObj); /* Transient Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_TRANSIENT_NODEATTRS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); // Check for lrm_resource entries xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RESOURCE); max = numXpathResults(xpathObj); /* * Updates by, or in response to, graph actions will never affect more than * one resource at a time, so such updates indicate an LRM refresh. In that * case, start a new transition rather than check each result individually, * which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ if ((transition_graph->pending == 0) && (max > 1)) { crm_debug("Ignoring resource operation updates due to history refresh of %d resources", max); crm_log_xml_trace(diff, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "History refresh", NULL); goto bail; } if (max == 1) { xmlNode *lrm_resource = getXpathResult(xpathObj, 0); if (shutdown_lock_cleared(lrm_resource)) { // @TODO would be more efficient to abort once after transition done abort_transition(INFINITY, tg_restart, "Shutdown lock cleared", lrm_resource); // Still process results, so we stop timers and update failcounts } } freeXpathObject(xpathObj); /* Process operation updates */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); if (max > 0) { int lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } } freeXpathObject(xpathObj); /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { int path_max = 0; const char *op_id = NULL; char *rsc_op_xpath = NULL; xmlXPathObject *op_match = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; op_id = ID(match); path_max = strlen(RSC_OP_TEMPLATE) + strlen(op_id) + 1; rsc_op_xpath = calloc(1, path_max); snprintf(rsc_op_xpath, path_max, RSC_OP_TEMPLATE, op_id); op_match = xpath_search(diff, rsc_op_xpath); if (numXpathResults(op_match) == 0) { /* Prevent false positives by matching cancelations too */ const char *node = get_node_id(match); crm_action_t *cancelled = get_cancel_action(op_id, node); if (cancelled == NULL) { crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, node); abort_transition(INFINITY, tg_restart, "Resource op removal", match); freeXpathObject(op_match); free(rsc_op_xpath); goto bail; } else { crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", op_id, node, cancelled->id); } } freeXpathObject(op_match); free(rsc_op_xpath); } bail: freeXpathObject(xpathObj); } static void process_lrm_resource_diff(xmlNode *lrm_resource, const char *node) { for (xmlNode *rsc_op = pcmk__xml_first_child(lrm_resource); rsc_op != NULL; rsc_op = pcmk__xml_next(rsc_op)) { process_graph_event(rsc_op, node); } if (shutdown_lock_cleared(lrm_resource)) { // @TODO would be more efficient to abort once after transition done abort_transition(INFINITY, tg_restart, "Shutdown lock cleared", lrm_resource); } } static void process_resource_updates(const char *node, xmlNode *xml, xmlNode *change, const char *op, const char *xpath) { xmlNode *rsc = NULL; if (xml == NULL) { return; } else if (strcmp((const char*)xml->name, XML_CIB_TAG_LRM) == 0) { xml = first_named_child(xml, XML_LRM_TAG_RESOURCES); crm_trace("Got %p in %s", xml, XML_CIB_TAG_LRM); } CRM_ASSERT(strcmp((const char*)xml->name, XML_LRM_TAG_RESOURCES) == 0); /* * Updates by, or in response to, TE actions will never contain updates * for more than one resource at a time, so such updates indicate an * LRM refresh. * * In that case, start a new transition rather than check each result * individually, which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ if ((transition_graph->pending == 0) && xml->children && xml->children->next) { crm_log_xml_trace(change, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "History refresh", NULL); return; } for (rsc = pcmk__xml_first_child(xml); rsc != NULL; rsc = pcmk__xml_next(rsc)) { crm_trace("Processing %s", ID(rsc)); process_lrm_resource_diff(rsc, node); } } static char *extract_node_uuid(const char *xpath) { char *mutable_path = strdup(xpath); char *node_uuid = NULL; char *search = NULL; char *match = NULL; match = strstr(mutable_path, "node_state[@id=\'"); if (match == NULL) { free(mutable_path); return NULL; } match += strlen("node_state[@id=\'"); search = strchr(match, '\''); if (search == NULL) { free(mutable_path); return NULL; } search[0] = 0; node_uuid = strdup(match); free(mutable_path); return node_uuid; } static void abort_unless_down(const char *xpath, const char *op, xmlNode *change, const char *reason) { char *node_uuid = NULL; crm_action_t *down = NULL; if(!pcmk__str_eq(op, "delete", pcmk__str_casei)) { abort_transition(INFINITY, tg_restart, reason, change); return; } node_uuid = extract_node_uuid(xpath); if(node_uuid == NULL) { crm_err("Could not extract node ID from %s", xpath); abort_transition(INFINITY, tg_restart, reason, change); return; } down = match_down_event(node_uuid); if (down == NULL) { crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath); abort_transition(INFINITY, tg_restart, reason, change); } else { crm_trace("Expecting changes to %s (%s)", node_uuid, xpath); } free(node_uuid); } static void process_op_deletion(const char *xpath, xmlNode *change) { char *mutable_key = strdup(xpath); char *key; char *node_uuid; // Extract the part of xpath between last pair of single quotes key = strrchr(mutable_key, '\''); if (key != NULL) { *key = '\0'; key = strrchr(mutable_key, '\''); } if (key == NULL) { crm_warn("Ignoring malformed CIB update (resource deletion of %s)", xpath); free(mutable_key); return; } ++key; node_uuid = extract_node_uuid(xpath); if (confirm_cancel_action(key, node_uuid) == FALSE) { abort_transition(INFINITY, tg_restart, "Resource operation removal", change); } free(mutable_key); free(node_uuid); } static void process_delete_diff(const char *xpath, const char *op, xmlNode *change) { if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) { process_op_deletion(xpath, change); } else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) { abort_unless_down(xpath, op, change, "Resource state removal"); } else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) { abort_unless_down(xpath, op, change, "Node state removal"); } else { crm_trace("Ignoring delete of %s", xpath); } } static void process_node_state_diff(xmlNode *state, xmlNode *change, const char *op, const char *xpath) { xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM); process_resource_updates(ID(state), lrm, change, op, xpath); } static void process_status_diff(xmlNode *status, xmlNode *change, const char *op, const char *xpath) { for (xmlNode *state = pcmk__xml_first_child(status); state != NULL; state = pcmk__xml_next(state)) { process_node_state_diff(state, change, op, xpath); } } static void process_cib_diff(xmlNode *cib, xmlNode *change, const char *op, const char *xpath) { xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS); xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION); if (status) { process_status_diff(status, change, op, xpath); } if (config) { abort_transition(INFINITY, tg_restart, "Non-status-only change", change); } } static void te_update_diff_v2(xmlNode *diff) { crm_log_xml_trace(diff, "Patch:Raw"); for (xmlNode *change = pcmk__xml_first_child(diff); change != NULL; change = pcmk__xml_next(change)) { xmlNode *match = NULL; const char *name = NULL; const char *xpath = crm_element_value(change, XML_DIFF_PATH); // Possible ops: create, modify, delete, move const char *op = crm_element_value(change, XML_DIFF_OP); // Ignore uninteresting updates if (op == NULL) { continue; } else if (xpath == NULL) { crm_trace("Ignoring %s change for version field", op); continue; } else if (strcmp(op, "move") == 0) { crm_trace("Ignoring move change at %s", xpath); continue; } // Find the result of create/modify ops if (strcmp(op, "create") == 0) { match = change->children; } else if (strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } else if (strcmp(op, "delete") != 0) { crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)", op, xpath); continue; } if (match) { if (match->type == XML_COMMENT_NODE) { crm_trace("Ignoring %s operation for comment at %s", op, xpath); continue; } name = (const char *)match->name; } crm_trace("Handling %s operation for %s%s%s", op, (xpath? xpath : "CIB"), (name? " matched by " : ""), (name? name : "")); if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) { abort_transition(INFINITY, tg_restart, "Configuration change", change); break; // Won't be packaged with operation results we may be waiting for } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS) || pcmk__str_eq(name, XML_CIB_TAG_TICKETS, pcmk__str_casei)) { abort_transition(INFINITY, tg_restart, "Ticket attribute change", change); break; // Won't be packaged with operation results we may be waiting for } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[") || pcmk__str_eq(name, XML_TAG_TRANSIENT_NODEATTRS, pcmk__str_casei)) { abort_unless_down(xpath, op, change, "Transient attribute change"); break; // Won't be packaged with operation results we may be waiting for } else if (strcmp(op, "delete") == 0) { process_delete_diff(xpath, op, change); } else if (name == NULL) { crm_warn("Ignoring malformed CIB update (%s at %s has no result)", op, xpath); } else if (strcmp(name, XML_TAG_CIB) == 0) { process_cib_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) { process_status_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_STATE) == 0) { process_node_state_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) { process_resource_updates(ID(match), match, change, op, xpath); } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); process_resource_updates(local_node, match, change, op, xpath); free(local_node); } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); process_lrm_resource_diff(match, local_node); free(local_node); } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); process_graph_event(match, local_node); free(local_node); } else { crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)", op, xpath, name); } } } void te_update_diff(const char *event, xmlNode * msg) { xmlNode *diff = NULL; const char *op = NULL; int rc = -EINVAL; int format = 1; int p_add[] = { 0, 0, 0 }; int p_del[] = { 0, 0, 0 }; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if (transition_graph == NULL) { crm_trace("No graph"); return; } else if (rc < pcmk_ok) { crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); return; } else if (transition_graph->complete && fsa_state != S_IDLE && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state), transition_graph->complete); return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_patch_versions(diff, p_add, p_del); crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op, p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2], fsa_state2string(fsa_state)); crm_element_value_int(diff, "format", &format); switch (format) { case 1: te_update_diff_v1(event, diff); break; case 2: te_update_diff_v2(diff); break; default: crm_warn("Ignoring malformed CIB update (unknown patch format %d)", format); } } gboolean process_te_message(xmlNode * msg, xmlNode * xml_data) { const char *from = crm_element_value(msg, F_ORIG); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *ref = crm_element_value(msg, F_CRM_REFERENCE); const char *op = crm_element_value(msg, F_CRM_TASK); const char *type = crm_element_value(msg, F_CRM_MSG_TYPE); crm_trace("Processing %s (%s) message", op, ref); crm_log_xml_trace(msg, "ipc"); if (op == NULL) { /* error */ } else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_trace("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if (pcmk__str_eq(op, CRM_OP_INVOKE_LRM, pcmk__str_casei) && pcmk__str_eq(sys_from, CRM_SYSTEM_LRMD, pcmk__str_casei) /* && pcmk__str_eq(type, XML_ATTR_RESPONSE, pcmk__str_casei) */ ) { xmlXPathObject *xpathObj = NULL; crm_log_xml_trace(msg, "Processing (N)ACK"); crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from); xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP); if (numXpathResults(xpathObj)) { int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } freeXpathObject(xpathObj); } else { crm_log_xml_err(msg, "Invalid (N)ACK"); freeXpathObject(xpathObj); return FALSE; } } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_trace("finished processing message"); return TRUE; } void cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc < pcmk_ok) { crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc)); } } /*! * \brief Handle a timeout in node-to-node communication * * \param[in] data Pointer to action timer * * \return FALSE (indicating that source should be not be re-added) */ gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; const char *task = NULL; const char *on_node = NULL; const char *via_node = NULL; CRM_CHECK(data != NULL, return FALSE); timer = (crm_action_timer_t *) data; stop_te_timer(timer); CRM_CHECK(timer->action != NULL, return FALSE); task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_TARGET); via_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_ROUTER_NODE); if (transition_graph->complete) { crm_notice("Node %s did not send %s result (via %s) within %dms " "(ignoring because transition not in progress)", (on_node? on_node : ""), (task? task : "unknown action"), (via_node? via_node : "controller"), timer->timeout); } else { /* fail the action */ crm_err("Node %s did not send %s result (via %s) within %dms " "(action timeout plus cluster-delay)", (on_node? on_node : ""), (task? task : "unknown action"), (via_node? via_node : "controller"), timer->timeout + transition_graph->network_delay); - print_action(LOG_ERR, "Aborting transition, action lost: ", timer->action); + pcmk__log_graph_action(LOG_ERR, timer->action); timer->action->failed = TRUE; te_action_confirmed(timer->action, transition_graph); abort_transition(INFINITY, tg_restart, "Action lost", NULL); // Record timeout in the CIB if appropriate if ((timer->action->type == action_type_rsc) && controld_action_is_recordable(task)) { controld_record_action_timeout(timer->action); } } return FALSE; } diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c index 03d84e50e6..186844992f 100644 --- a/daemons/controld/controld_te_events.c +++ b/daemons/controld/controld_te_events.c @@ -1,507 +1,507 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include char *failed_stop_offset = NULL; char *failed_start_offset = NULL; gboolean fail_incompletable_actions(crm_graph_t * graph, const char *down_node) { const char *target_uuid = NULL; const char *router = NULL; const char *router_uuid = NULL; xmlNode *last_action = NULL; GList *gIter = NULL; GList *gIter2 = NULL; if (graph == NULL || graph->complete) { return FALSE; } gIter = graph->synapses; for (; gIter != NULL; gIter = gIter->next) { synapse_t *synapse = (synapse_t *) gIter->data; if (synapse->confirmed || synapse->failed) { /* We've already been here */ continue; } gIter2 = synapse->actions; for (; gIter2 != NULL; gIter2 = gIter2->next) { crm_action_t *action = (crm_action_t *) gIter2->data; if (action->type == action_type_pseudo || action->confirmed) { continue; } else if (action->type == action_type_crm) { const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { continue; } } target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (router) { crm_node_t *node = crm_get_peer(0, router); if (node) { router_uuid = node->uuid; } } if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) { action->failed = TRUE; synapse->failed = TRUE; last_action = action->xml; stop_te_timer(action->timer); - update_graph(graph, action); + pcmk__update_graph(graph, action); if (synapse->executed) { crm_notice("Action %d (%s) was pending on %s (offline)", action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node); } else { crm_info("Action %d (%s) is scheduled for %s (offline)", action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node); } } } } if (last_action != NULL) { crm_info("Node %s shutdown resulted in un-runnable actions", down_node); abort_transition(INFINITY, tg_restart, "Node failure", last_action); return TRUE; } return FALSE; } /*! * \internal * \brief Update failure-related node attributes if warranted * * \param[in] event XML describing operation that (maybe) failed * \param[in] event_node_uuid Node that event occurred on * \param[in] rc Actual operation return code * \param[in] target_rc Expected operation return code * \param[in] do_update If TRUE, do update regardless of operation type * \param[in] ignore_failures If TRUE, update last failure but not fail count * * \return TRUE if this was not a direct nack, success or lrm status refresh */ static gboolean update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int target_rc, gboolean do_update, gboolean ignore_failures) { guint interval_ms = 0; char *task = NULL; char *rsc_id = NULL; const char *value = NULL; const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); const char *on_uname = crm_peer_uname(event_node_uuid); const char *origin = crm_element_value(event, XML_ATTR_ORIGIN); // Nothing needs to be done for success or status refresh if (rc == target_rc) { return FALSE; } else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) { crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh", id, rc, on_uname); return FALSE; } /* Sanity check */ CRM_CHECK(on_uname != NULL, return TRUE); CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms), crm_err("Couldn't parse: %s", ID(event)); goto bail); /* Decide whether update is necessary and what value to use */ if ((interval_ms > 0) || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei) || pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) { do_update = TRUE; } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) { do_update = TRUE; if (failed_start_offset == NULL) { failed_start_offset = strdup(CRM_INFINITY_S); } value = failed_start_offset; } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) { do_update = TRUE; if (failed_stop_offset == NULL) { failed_stop_offset = strdup(CRM_INFINITY_S); } value = failed_stop_offset; } /* Fail count will be either incremented or set to infinity */ if (!pcmk_str_is_infinity(value)) { value = XML_NVPAIR_ATTR_VALUE "++"; } if (do_update) { char *now = pcmk__ttoa(time(NULL)); char *attr_name = NULL; gboolean is_remote_node = FALSE; if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) { is_remote_node = TRUE; } crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)", (ignore_failures? "last failure" : "failcount"), rsc_id, on_uname, task, rc, value, now); /* Update the fail count, if we're not ignoring failures */ if (!ignore_failures) { attr_name = pcmk__failcount_name(rsc_id, task, interval_ms); update_attrd(on_uname, attr_name, value, NULL, is_remote_node); free(attr_name); } /* Update the last failure time (even if we're ignoring failures, * so that failure can still be detected and shown, e.g. by crm_mon) */ attr_name = pcmk__lastfailure_name(rsc_id, task, interval_ms); update_attrd(on_uname, attr_name, now, NULL, is_remote_node); free(attr_name); free(now); } bail: free(rsc_id); free(task); return TRUE; } crm_action_t * controld_get_action(int id) { for (GList *item = transition_graph->synapses; item; item = item->next) { synapse_t *synapse = (synapse_t *) item->data; for (GList *item2 = synapse->actions; item2; item2 = item2->next) { crm_action_t *action = (crm_action_t *) item2->data; if (action->id == id) { return action; } } } return NULL; } crm_action_t * get_cancel_action(const char *id, const char *node) { GList *gIter = NULL; GList *gIter2 = NULL; gIter = transition_graph->synapses; for (; gIter != NULL; gIter = gIter->next) { synapse_t *synapse = (synapse_t *) gIter->data; gIter2 = synapse->actions; for (; gIter2 != NULL; gIter2 = gIter2->next) { const char *task = NULL; const char *target = NULL; crm_action_t *action = (crm_action_t *) gIter2->data; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); if (!pcmk__str_eq(task, id, pcmk__str_casei)) { crm_trace("Wrong key %s for %s on %s", task, id, node); continue; } target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) { crm_trace("Wrong node %s for %s on %s", target, id, node); continue; } crm_trace("Found %s on %s", id, node); return action; } } return NULL; } bool confirm_cancel_action(const char *id, const char *node_id) { const char *op_key = NULL; const char *node_name = NULL; crm_action_t *cancel = get_cancel_action(id, node_id); if (cancel == NULL) { return FALSE; } op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY); node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET); stop_te_timer(cancel->timer); te_action_confirmed(cancel, transition_graph); crm_info("Cancellation of %s on %s confirmed (action %d)", op_key, node_name, cancel->id); return TRUE; } /* downed nodes are listed like: ... */ #define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \ "/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']" /*! * \brief Find a transition event that would have made a specified node down * * \param[in] target UUID of node to match * * \return Matching event if found, NULL otherwise */ crm_action_t * match_down_event(const char *target) { crm_action_t *match = NULL; xmlXPathObjectPtr xpath_ret = NULL; GList *gIter, *gIter2; char *xpath = crm_strdup_printf(XPATH_DOWNED, target); for (gIter = transition_graph->synapses; gIter != NULL && match == NULL; gIter = gIter->next) { for (gIter2 = ((synapse_t*)gIter->data)->actions; gIter2 != NULL && match == NULL; gIter2 = gIter2->next) { match = (crm_action_t*)gIter2->data; if (match->executed) { xpath_ret = xpath_search(match->xml, xpath); if (numXpathResults(xpath_ret) < 1) { match = NULL; } freeXpathObject(xpath_ret); } else { // Only actions that were actually started can match match = NULL; } } } free(xpath); if (match != NULL) { crm_debug("Shutdown action %d (%s) found for node %s", match->id, crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target); } else { crm_debug("No reason to expect node %s to be down", target); } return match; } void process_graph_event(xmlNode *event, const char *event_node) { int rc = -1; // Actual result int target_rc = -1; // Expected result int status = -1; // Executor status int callid = -1; // Executor call ID int transition_num = -1; // Transition number int action_num = -1; // Action number within transition char *update_te_uuid = NULL; bool ignore_failures = FALSE; const char *id = NULL; const char *desc = NULL; const char *magic = NULL; const char *uname = NULL; CRM_ASSERT(event != NULL); /* */ magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY); if (magic == NULL) { /* non-change */ return; } crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status); if (status == PCMK_LRM_OP_PENDING) { return; } id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); crm_element_value_int(event, XML_LRM_ATTR_RC, &rc); crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid); rc = pcmk__effective_rc(rc); if (decode_transition_key(magic, &update_te_uuid, &transition_num, &action_num, &target_rc) == FALSE) { // decode_transition_key() already logged the bad key crm_err("Can't process action %s result: Incompatible versions? " CRM_XS " call-id=%d", id, callid); abort_transition(INFINITY, tg_restart, "Bad event", event); return; } if (transition_num == -1) { // E.g. crm_resource --fail desc = "initiated outside of the cluster"; abort_transition(INFINITY, tg_restart, "Unexpected event", event); } else if ((action_num < 0) || !pcmk__str_eq(update_te_uuid, te_uuid, pcmk__str_none)) { desc = "initiated by a different DC"; abort_transition(INFINITY, tg_restart, "Foreign event", event); } else if ((transition_graph->id != transition_num) || (transition_graph->complete)) { // Action is not from currently active transition guint interval_ms = 0; if (parse_op_key(id, NULL, NULL, &interval_ms) && (interval_ms != 0)) { /* Recurring actions have the transition number they were first * scheduled in. */ if (status == PCMK_LRM_OP_CANCELLED) { confirm_cancel_action(id, get_node_id(event)); goto bail; } desc = "arrived after initial scheduling"; abort_transition(INFINITY, tg_restart, "Change in recurring result", event); } else if (transition_graph->id != transition_num) { desc = "arrived really late"; abort_transition(INFINITY, tg_restart, "Old event", event); } else { desc = "arrived late"; abort_transition(INFINITY, tg_restart, "Inactive graph", event); } } else { // Event is result of an action from currently active transition crm_action_t *action = controld_get_action(action_num); if (action == NULL) { // Should never happen desc = "unknown"; abort_transition(INFINITY, tg_restart, "Unknown event", event); } else if (action->confirmed == TRUE) { /* Nothing further needs to be done if the action has already been * confirmed. This can happen e.g. when processing both an * "xxx_last_0" or "xxx_last_failure_0" record as well as the main * history record, which would otherwise result in incorrectly * bumping the fail count twice. */ crm_log_xml_debug(event, "Event already confirmed:"); goto bail; } else { /* An action result needs to be confirmed. * (This is the only case where desc == NULL.) */ if (pcmk__str_eq(crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore", pcmk__str_casei)) { ignore_failures = TRUE; } else if (rc != target_rc) { action->failed = TRUE; } stop_te_timer(action->timer); te_action_confirmed(action, transition_graph); if (action->failed) { abort_transition(action->synapse->priority + 1, tg_restart, "Event failed", event); } } } if (id == NULL) { id = "unknown action"; } uname = crm_element_value(event, XML_LRM_ATTR_TARGET); if (uname == NULL) { uname = "unknown node"; } if (status == PCMK_LRM_OP_INVALID) { // We couldn't attempt the action crm_info("Transition %d action %d (%s on %s): %s", transition_num, action_num, id, uname, services_lrm_status_str(status)); } else if (desc && update_failcount(event, event_node, rc, target_rc, (transition_num == -1), FALSE)) { crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' " CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'", transition_num, action_num, id, uname, services_ocf_exitcode_str(target_rc), services_ocf_exitcode_str(rc), target_rc, rc, callid, desc); } else if (desc) { crm_info("Transition %d action %d (%s on %s): %s " CRM_XS " rc=%d target-rc=%d call-id=%d", transition_num, action_num, id, uname, desc, rc, target_rc, callid); } else if (rc == target_rc) { crm_info("Transition %d action %d (%s on %s) confirmed: %s " CRM_XS " rc=%d call-id=%d", transition_num, action_num, id, uname, services_ocf_exitcode_str(rc), rc, callid); } else { update_failcount(event, event_node, rc, target_rc, (transition_num == -1), ignore_failures); crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' " CRM_XS " target-rc=%d rc=%d call-id=%d", transition_num, action_num, id, uname, services_ocf_exitcode_str(target_rc), services_ocf_exitcode_str(rc), target_rc, rc, callid); } bail: free(update_te_uuid); } diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c index 54d81270a7..de85fd63ca 100644 --- a/daemons/controld/controld_te_utils.c +++ b/daemons/controld/controld_te_utils.c @@ -1,292 +1,335 @@ /* - * Copyright 2004-2020 the Pacemaker project contributors + * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include gboolean stop_te_timer(crm_action_timer_t * timer) { if (timer == NULL) { return FALSE; } if (timer->source_id != 0) { crm_trace("Stopping action timer"); g_source_remove(timer->source_id); timer->source_id = 0; } else { crm_trace("Action timer was already stopped"); return FALSE; } return TRUE; } gboolean te_graph_trigger(gpointer user_data) { - enum transition_status graph_rc = -1; - if (transition_graph == NULL) { crm_debug("Nothing to do"); return TRUE; } crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state)); switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: return TRUE; default: break; } if (transition_graph->complete == FALSE) { + enum transition_status graph_rc; int limit = transition_graph->batch_limit; transition_graph->batch_limit = throttle_get_total_job_limit(limit); - graph_rc = run_graph(transition_graph); + graph_rc = pcmk__execute_graph(transition_graph); transition_graph->batch_limit = limit; /* Restore the configured value */ - /* significant overhead... */ - /* print_graph(LOG_TRACE, transition_graph); */ - if (graph_rc == transition_active) { crm_trace("Transition not yet complete"); return TRUE; } else if (graph_rc == transition_pending) { crm_trace("Transition not yet complete - no actions fired"); return TRUE; } if (graph_rc != transition_complete) { - crm_warn("Transition failed: %s", transition_status(graph_rc)); - print_graph(LOG_NOTICE, transition_graph); + crm_warn("Transition failed: %s", + pcmk__graph_status2text(graph_rc)); + pcmk__log_graph(LOG_NOTICE, transition_graph); } } crm_debug("Transition %d is now complete", transition_graph->id); transition_graph->complete = TRUE; notify_crmd(transition_graph); return TRUE; } void trigger_graph_processing(const char *fn, int line) { crm_trace("%s:%d - Triggered graph processing", fn, line); mainloop_set_trigger(transition_trigger); } static struct abort_timer_s { bool aborted; guint id; int priority; enum transition_action action; const char *text; } abort_timer = { 0, }; static gboolean abort_timer_popped(gpointer data) { if (AM_I_DC && (abort_timer.aborted == FALSE)) { abort_transition(abort_timer.priority, abort_timer.action, abort_timer.text, NULL); } abort_timer.id = 0; return FALSE; // do not immediately reschedule timer } /*! * \internal * \brief Abort transition after delay, if not already aborted in that time * * \param[in] abort_text Must be literal string */ void abort_after_delay(int abort_priority, enum transition_action abort_action, const char *abort_text, guint delay_ms) { if (abort_timer.id) { // Timer already in progress, stop and reschedule g_source_remove(abort_timer.id); } abort_timer.aborted = FALSE; abort_timer.priority = abort_priority; abort_timer.action = abort_action; abort_timer.text = abort_text; abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, NULL); } +static const char * +abort2text(enum transition_action abort_action) +{ + switch (abort_action) { + case tg_done: + return "done"; + case tg_stop: + return "stop"; + case tg_restart: + return "restart"; + case tg_shutdown: + return "shutdown"; + } + return "unknown"; +} + +static bool +update_abort_priority(crm_graph_t *graph, int priority, + enum transition_action action, const char *abort_reason) +{ + bool change = FALSE; + + if (graph == NULL) { + return change; + } + + if (graph->abort_priority < priority) { + crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority); + graph->abort_priority = priority; + if (graph->abort_reason != NULL) { + crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason); + } + graph->abort_reason = abort_reason; + change = TRUE; + } + + if (graph->completion_action < action) { + crm_debug("Abort action %s superseded by %s: %s", + abort2text(graph->completion_action), abort2text(action), abort_reason); + graph->completion_action = action; + change = TRUE; + } + + return change; +} + void abort_transition_graph(int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode * reason, const char *fn, int line) { int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; int level = LOG_INFO; xmlNode *diff = NULL; xmlNode *change = NULL; CRM_CHECK(transition_graph != NULL, return); switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: crm_info("Abort %s suppressed: state=%s (complete=%d)", abort_text, fsa_state2string(fsa_state), transition_graph->complete); return; default: break; } abort_timer.aborted = TRUE; controld_expect_sched_reply(NULL); if (transition_graph->complete == FALSE) { if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) { level = LOG_NOTICE; } } if(reason) { xmlNode *search = NULL; for(search = reason; search; search = search->parent) { if (pcmk__str_eq(XML_TAG_DIFF, TYPE(search), pcmk__str_casei)) { diff = search; break; } } if(diff) { xml_patch_versions(diff, add, del); for(search = reason; search; search = search->parent) { if (pcmk__str_eq(XML_DIFF_CHANGE, TYPE(search), pcmk__str_casei)) { change = search; break; } } } } if(reason == NULL) { do_crm_log(level, "Transition %d aborted: %s "CRM_XS" source=%s:%d complete=%s", transition_graph->id, abort_text, fn, line, pcmk__btoa(transition_graph->complete)); } else if(change == NULL) { char *local_path = xml_get_path(reason); do_crm_log(level, "Transition %d aborted by %s.%s: %s " CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", transition_graph->id, TYPE(reason), ID(reason), abort_text, add[0], add[1], add[2], fn, line, local_path, pcmk__btoa(transition_graph->complete)); free(local_path); } else { const char *kind = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *path = crm_element_value(change, XML_DIFF_PATH); if(change == reason) { if(strcmp(op, "create") == 0) { reason = reason->children; } else if(strcmp(op, "modify") == 0) { reason = first_named_child(reason, XML_DIFF_RESULT); if(reason) { reason = reason->children; } } } kind = TYPE(reason); if(strcmp(op, "delete") == 0) { const char *shortpath = strrchr(path, '/'); do_crm_log(level, "Transition %d aborted by deletion of %s: %s " CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", transition_graph->id, (shortpath? (shortpath + 1) : path), abort_text, add[0], add[1], add[2], fn, line, path, pcmk__btoa(transition_graph->complete)); } else if (pcmk__str_eq(XML_CIB_TAG_NVPAIR, kind, pcmk__str_casei)) { do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s " CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", transition_graph->id, crm_element_value(reason, XML_ATTR_ID), op, crm_element_value(reason, XML_NVPAIR_ATTR_NAME), crm_element_value(reason, XML_NVPAIR_ATTR_VALUE), abort_text, add[0], add[1], add[2], fn, line, path, pcmk__btoa(transition_graph->complete)); } else if (pcmk__str_eq(XML_LRM_TAG_RSC_OP, kind, pcmk__str_casei)) { const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s " CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s", transition_graph->id, crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op, crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text, magic, add[0], add[1], add[2], fn, line, pcmk__btoa(transition_graph->complete)); } else if (pcmk__strcase_any_of(kind, XML_CIB_TAG_STATE, XML_CIB_TAG_NODE, NULL)) { const char *uname = crm_peer_uname(ID(reason)); do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s " CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s", transition_graph->id, kind, op, (uname? uname : ID(reason)), abort_text, add[0], add[1], add[2], fn, line, pcmk__btoa(transition_graph->complete)); } else { const char *id = ID(reason); do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s " CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", transition_graph->id, TYPE(reason), (id? id : ""), (op? op : "change"), abort_text, add[0], add[1], add[2], fn, line, path, pcmk__btoa(transition_graph->complete)); } } if (transition_graph->complete) { if (transition_timer->period_ms > 0) { controld_stop_timer(transition_timer); controld_start_timer(transition_timer); } else { register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); } return; } mainloop_set_trigger(transition_trigger); } diff --git a/daemons/controld/controld_transition.c b/daemons/controld/controld_transition.c index 14764ef899..05d2eae987 100644 --- a/daemons/controld/controld_transition.c +++ b/daemons/controld/controld_transition.c @@ -1,217 +1,217 @@ /* - * Copyright 2004-2020 the Pacemaker project contributors + * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include extern crm_graph_functions_t te_graph_fns; static void global_cib_callback(const xmlNode * msg, int callid, int rc, xmlNode * output) { } static crm_graph_t * create_blank_graph(void) { - crm_graph_t *a_graph = unpack_graph(NULL, NULL); + crm_graph_t *a_graph = pcmk__unpack_graph(NULL, NULL); a_graph->complete = TRUE; a_graph->abort_reason = "DC Takeover"; a_graph->completion_action = tg_restart; return a_graph; } /* A_TE_START, A_TE_STOP, O_TE_RESTART */ void do_te_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean init_ok = TRUE; if (action & A_TE_STOP) { if (transition_graph) { - destroy_graph(transition_graph); + pcmk__free_graph(transition_graph); transition_graph = NULL; } if (fsa_cib_conn) { fsa_cib_conn->cmds->del_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff); } controld_clear_fsa_input_flags(R_TE_CONNECTED); crm_info("Transitioner is now inactive"); } if ((action & A_TE_START) == 0) { return; } else if (pcmk_is_set(fsa_input_register, R_TE_CONNECTED)) { crm_debug("The transitioner is already active"); return; } else if ((action & A_TE_START) && cur_state == S_STOPPING) { crm_info("Ignoring request to start the transitioner while shutting down"); return; } if (te_uuid == NULL) { te_uuid = crm_generate_uuid(); crm_info("Registering TE UUID: %s", te_uuid); } if (fsa_cib_conn == NULL) { crm_err("Could not set CIB callbacks"); init_ok = FALSE; } else { if (fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff) != pcmk_ok) { crm_err("Could not set CIB notification callback"); init_ok = FALSE; } if (fsa_cib_conn->cmds->set_op_callback(fsa_cib_conn, global_cib_callback) != pcmk_ok) { crm_err("Could not set CIB global callback"); init_ok = FALSE; } } if (init_ok) { - set_graph_functions(&te_graph_fns); + pcmk__set_graph_functions(&te_graph_fns); if (transition_graph) { - destroy_graph(transition_graph); + pcmk__free_graph(transition_graph); } /* create a blank one */ crm_debug("Transitioner is now active"); transition_graph = create_blank_graph(); controld_set_fsa_input_flags(R_TE_CONNECTED); } } /* A_TE_INVOKE, A_TE_CANCEL */ void do_te_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (AM_I_DC == FALSE || (fsa_state != S_TRANSITION_ENGINE && (action & A_TE_INVOKE))) { crm_notice("No need to invoke the TE (%s) in state %s", fsa_action2string(action), fsa_state2string(fsa_state)); return; } if (action & A_TE_CANCEL) { crm_debug("Cancelling the transition: %s", transition_graph->complete ? "inactive" : "active"); abort_transition(INFINITY, tg_restart, "Peer Cancelled", NULL); if (transition_graph->complete == FALSE) { crmd_fsa_stall(FALSE); } } else if (action & A_TE_HALT) { crm_debug("Halting the transition: %s", transition_graph->complete ? "inactive" : "active"); abort_transition(INFINITY, tg_stop, "Peer Halt", NULL); if (transition_graph->complete == FALSE) { crmd_fsa_stall(FALSE); } } else if (action & A_TE_INVOKE) { const char *value = NULL; xmlNode *graph_data = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *ref = crm_element_value(input->msg, XML_ATTR_REFERENCE); const char *graph_file = crm_element_value(input->msg, F_CRM_TGRAPH); const char *graph_input = crm_element_value(input->msg, F_CRM_TGRAPH_INPUT); if (graph_file == NULL && input->xml == NULL) { crm_log_xml_err(input->msg, "Bad command"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } if (transition_graph->complete == FALSE) { crm_info("Another transition is already active"); abort_transition(INFINITY, tg_restart, "Transition Active", NULL); return; } if (fsa_pe_ref == NULL || !pcmk__str_eq(fsa_pe_ref, ref, pcmk__str_casei)) { crm_info("Transition is redundant: %s vs. %s", crm_str(fsa_pe_ref), crm_str(ref)); abort_transition(INFINITY, tg_restart, "Transition Redundant", NULL); } graph_data = input->xml; if (graph_data == NULL && graph_file != NULL) { graph_data = filename2xml(graph_file); } if (is_timer_started(transition_timer)) { crm_debug("The transitioner wait for a transition timer"); return; } CRM_CHECK(graph_data != NULL, crm_err("Input raised by %s is invalid", msg_data->origin); crm_log_xml_err(input->msg, "Bad command"); return); - destroy_graph(transition_graph); - transition_graph = unpack_graph(graph_data, graph_input); + pcmk__free_graph(transition_graph); + transition_graph = pcmk__unpack_graph(graph_data, graph_input); if (transition_graph == NULL) { CRM_CHECK(transition_graph != NULL,); transition_graph = create_blank_graph(); return; } crm_info("Processing graph %d (ref=%s) derived from %s", transition_graph->id, ref, graph_input); te_reset_job_counts(); value = crm_element_value(graph_data, "failed-stop-offset"); if (value) { free(failed_stop_offset); failed_stop_offset = strdup(value); } value = crm_element_value(graph_data, "failed-start-offset"); if (value) { free(failed_start_offset); failed_start_offset = strdup(value); } if ((crm_element_value_epoch(graph_data, "recheck-by", &recheck_by) != pcmk_ok) || (recheck_by < 0)) { recheck_by = 0; } trigger_graph(); - print_graph(LOG_TRACE, transition_graph); + pcmk__log_graph(LOG_TRACE, transition_graph); if (graph_data != input->xml) { free_xml(graph_data); } } } diff --git a/include/pcmki/pcmki_transition.h b/include/pcmki/pcmki_transition.h index 1b0682b83a..26d331145f 100644 --- a/include/pcmki/pcmki_transition.h +++ b/include/pcmki/pcmki_transition.h @@ -1,143 +1,140 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRM_TRANSITION__H # define CRM_TRANSITION__H #ifdef __cplusplus extern "C" { #endif #include #include #include #include typedef enum { action_type_pseudo, action_type_rsc, action_type_crm } action_type_e; typedef struct te_timer_s crm_action_timer_t; typedef struct crm_graph_s crm_graph_t; typedef struct synapse_s { int id; int priority; gboolean ready; gboolean failed; gboolean executed; gboolean confirmed; GList *actions; /* crm_action_t* */ GList *inputs; /* crm_action_t* */ } synapse_t; typedef struct crm_action_s { int id; int timeout; guint interval_ms; GHashTable *params; action_type_e type; crm_action_timer_t *timer; synapse_t *synapse; gboolean sent_update; /* sent to the CIB */ gboolean executed; /* sent to the CRM */ gboolean confirmed; gboolean failed; gboolean can_fail; //! \deprecated Will be removed in a future release xmlNode *xml; } crm_action_t; struct te_timer_s { int source_id; int timeout; crm_action_t *action; }; /* order matters here */ enum transition_action { tg_done, tg_stop, tg_restart, tg_shutdown, }; struct crm_graph_s { int id; char *source; int abort_priority; gboolean complete; const char *abort_reason; enum transition_action completion_action; int num_actions; int num_synapses; int batch_limit; guint network_delay; guint stonith_timeout; int fired; int pending; int skipped; int completed; int incomplete; GList *synapses; /* synapse_t* */ int migration_limit; }; typedef struct crm_graph_functions_s { gboolean(*pseudo) (crm_graph_t * graph, crm_action_t * action); gboolean(*rsc) (crm_graph_t * graph, crm_action_t * action); gboolean(*crmd) (crm_graph_t * graph, crm_action_t * action); gboolean(*stonith) (crm_graph_t * graph, crm_action_t * action); gboolean(*allowed) (crm_graph_t * graph, crm_action_t * action); } crm_graph_functions_t; enum transition_status { transition_active, transition_pending, /* active but no actions performed this time */ transition_complete, transition_stopped, transition_terminated, transition_action_failed, transition_failed, }; -void set_default_graph_functions(void); -void set_graph_functions(crm_graph_functions_t * fns); -crm_graph_t *unpack_graph(xmlNode * xml_graph, const char *reference); -int run_graph(crm_graph_t * graph); -gboolean update_graph(crm_graph_t * graph, crm_action_t * action); -void destroy_graph(crm_graph_t * graph); -const char *transition_status(enum transition_status state); -void print_graph(unsigned int log_level, crm_graph_t * graph); -void print_action(int log_level, const char *prefix, crm_action_t * action); -bool update_abort_priority(crm_graph_t * graph, int priority, - enum transition_action action, const char *abort_reason); -const char *actiontype2text(action_type_e type); -lrmd_event_data_t *convert_graph_action(xmlNode * resource, crm_action_t * action, int status, - int rc); +void pcmk__set_graph_functions(crm_graph_functions_t *fns); +crm_graph_t *pcmk__unpack_graph(xmlNode *xml_graph, const char *reference); +enum transition_status pcmk__execute_graph(crm_graph_t *graph); +void pcmk__update_graph(crm_graph_t *graph, crm_action_t *action); +void pcmk__free_graph(crm_graph_t *graph); +const char *pcmk__graph_status2text(enum transition_status state); +void pcmk__log_graph(unsigned int log_level, crm_graph_t *graph); +void pcmk__log_graph_action(int log_level, crm_action_t *action); +lrmd_event_data_t *pcmk__event_from_graph_action(xmlNode *resource, + crm_action_t *action, + int status, int rc); #ifdef __cplusplus } #endif #endif diff --git a/lib/pacemaker/Makefile.am b/lib/pacemaker/Makefile.am index e9425dd5ba..242e1aec1f 100644 --- a/lib/pacemaker/Makefile.am +++ b/lib/pacemaker/Makefile.am @@ -1,51 +1,50 @@ # # Copyright 2004-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/mk/common.mk AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir) ## libraries lib_LTLIBRARIES = libpacemaker.la ## SOURCES libpacemaker_la_LDFLAGS = -version-info 3:1:2 libpacemaker_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libpacemaker_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libpacemaker_la_LIBADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/common/libcrmcommon.la # -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version # Use += rather than backlashed continuation lines for parsing by bumplibs libpacemaker_la_SOURCES = libpacemaker_la_SOURCES += pcmk_cluster_queries.c libpacemaker_la_SOURCES += pcmk_fence.c +libpacemaker_la_SOURCES += pcmk_graph_consumer.c +libpacemaker_la_SOURCES += pcmk_graph_logging.c +libpacemaker_la_SOURCES += pcmk_graph_producer.c libpacemaker_la_SOURCES += pcmk_output.c libpacemaker_la_SOURCES += pcmk_output_utils.c libpacemaker_la_SOURCES += pcmk_resource.c libpacemaker_la_SOURCES += pcmk_sched_allocate.c libpacemaker_la_SOURCES += pcmk_sched_bundle.c libpacemaker_la_SOURCES += pcmk_sched_clone.c libpacemaker_la_SOURCES += pcmk_sched_constraints.c -libpacemaker_la_SOURCES += pcmk_sched_graph.c libpacemaker_la_SOURCES += pcmk_sched_group.c libpacemaker_la_SOURCES += pcmk_sched_messages.c libpacemaker_la_SOURCES += pcmk_sched_native.c libpacemaker_la_SOURCES += pcmk_sched_notif.c libpacemaker_la_SOURCES += pcmk_sched_promotable.c libpacemaker_la_SOURCES += pcmk_sched_transition.c libpacemaker_la_SOURCES += pcmk_sched_utilization.c libpacemaker_la_SOURCES += pcmk_sched_utils.c -libpacemaker_la_SOURCES += pcmk_trans_graph.c -libpacemaker_la_SOURCES += pcmk_trans_unpack.c -libpacemaker_la_SOURCES += pcmk_trans_utils.c diff --git a/lib/pacemaker/pcmk_graph_consumer.c b/lib/pacemaker/pcmk_graph_consumer.c new file mode 100644 index 0000000000..08198a850f --- /dev/null +++ b/lib/pacemaker/pcmk_graph_consumer.c @@ -0,0 +1,845 @@ +/* + * Copyright 2004-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include + +#include +#include +#include +#include +#include + + +/* + * Functions for updating graph + */ + +/*! + * \internal + * \brief Update synapse after completed prerequisite + * + * A synapse is ready to be executed once all its prerequisite actions (inputs) + * complete. Given a completed action, check whether it is an input for a given + * synapse, and if so, mark the input as confirmed, and mark the synapse as + * ready if appropriate. + * + * \param[in] synapse Transition graph synapse to update + * \param[in] action_id ID of an action that completed + * + * \note The only substantial effect here is confirming synapse inputs. + * should_fire_synapse() will recalculate synapse->ready, so the only + * thing that uses the synapse->ready value from here is + * synapse_state_str(). + */ +static void +update_synapse_ready(synapse_t *synapse, int action_id) +{ + if (synapse->ready) { + return; // All inputs have already been confirmed + } + synapse->ready = TRUE; // Presume ready until proven otherwise + for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { + crm_action_t *prereq = (crm_action_t *) lpc->data; + + if (prereq->id == action_id) { + crm_trace("Confirming input %d of synapse %d", + action_id, synapse->id); + prereq->confirmed = TRUE; + + } else if (!(prereq->confirmed)) { + synapse->ready = FALSE; + crm_trace("Synapse %d still not ready after action %d", + synapse->id, action_id); + } + } + if (synapse->ready) { + crm_trace("Synapse %d is now ready to execute", synapse->id); + } +} + +/*! + * \internal + * \brief Update action and synapse confirmation after action completion + * + * \param[in] synapse Transition graph synapse that action belongs to + * \param[in] action_id ID of action that completed + */ +static void +update_synapse_confirmed(synapse_t *synapse, int action_id) +{ + bool all_confirmed = true; + + for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { + crm_action_t *action = (crm_action_t *) lpc->data; + + if (action->id == action_id) { + crm_trace("Confirmed action %d of synapse %d", + action_id, synapse->id); + action->confirmed = TRUE; + + } else if (all_confirmed && !(action->confirmed)) { + all_confirmed = false; + crm_trace("Synapse %d still not confirmed after action %d", + synapse->id, action_id); + } + } + + if (all_confirmed && !(synapse->confirmed)) { + crm_trace("Confirmed synapse %d", synapse->id); + synapse->confirmed = TRUE; + } +} + +/*! + * \internal + * \brief Update the transition graph with a completed action result + * + * \param[in,out] graph Transition graph to update + * \param[in] action Action that completed + */ +void +pcmk__update_graph(crm_graph_t *graph, crm_action_t *action) +{ + for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { + synapse_t *synapse = (synapse_t *) lpc->data; + + if (synapse->confirmed || synapse->failed) { + continue; // This synapse already completed + + } else if (synapse->executed) { + update_synapse_confirmed(synapse, action->id); + + } else if (!(action->failed) || (synapse->priority == INFINITY)) { + update_synapse_ready(synapse, action->id); + } + } +} + + +/* + * Functions for executing graph + */ + +/* A transition graph consists of various types of actions. The library caller + * registers execution functions for each action type, which will be stored + * here. + */ +static crm_graph_functions_t *graph_fns = NULL; + +/*! + * \internal + * \brief Set transition graph execution functions + * + * \param[in] Execution functions to use + */ +void +pcmk__set_graph_functions(crm_graph_functions_t *fns) +{ + crm_debug("Setting custom functions for executing transition graphs"); + graph_fns = fns; + + CRM_ASSERT(graph_fns != NULL); + CRM_ASSERT(graph_fns->rsc != NULL); + CRM_ASSERT(graph_fns->crmd != NULL); + CRM_ASSERT(graph_fns->pseudo != NULL); + CRM_ASSERT(graph_fns->stonith != NULL); +} + +/*! + * \internal + * \brief Check whether a graph synapse is ready to be executed + * + * \param[in] graph Transition graph that synapse is part of + * \param[in] synapse Synapse to check + * + * \return true if synapse is ready, false otherwise + */ +static bool +should_fire_synapse(crm_graph_t *graph, synapse_t *synapse) +{ + GList *lpc = NULL; + + synapse->ready = TRUE; + for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { + crm_action_t *prereq = (crm_action_t *) lpc->data; + + if (!(prereq->confirmed)) { + crm_trace("Input %d for synapse %d not yet confirmed", + prereq->id, synapse->id); + synapse->ready = FALSE; + break; + + } else if (prereq->failed && !(prereq->can_fail)) { + crm_trace("Input %d for synapse %d confirmed but failed", + prereq->id, synapse->id); + synapse->ready = FALSE; + break; + } + } + if (synapse->ready) { + crm_trace("Synapse %d is ready to execute", synapse->id); + } else { + return false; + } + + for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { + crm_action_t *a = (crm_action_t *) lpc->data; + + if (a->type == action_type_pseudo) { + /* None of the below applies to pseudo ops */ + + } else if (synapse->priority < graph->abort_priority) { + crm_trace("Skipping synapse %d: priority %d is less than " + "abort priority %d", + synapse->id, synapse->priority, graph->abort_priority); + graph->skipped++; + return false; + + } else if (graph_fns->allowed && !(graph_fns->allowed(graph, a))) { + crm_trace("Deferring synapse %d: not allowed", synapse->id); + return false; + } + } + + return true; +} + +/*! + * \internal + * \brief Initiate an action from a transition graph + * + * \param[in] graph Transition graph containing action + * \parma[in] action Action to execute + * + * \return TRUE if action was initiated, FALSE otherwise + */ +static gboolean +initiate_action(crm_graph_t *graph, crm_action_t *action) +{ + const char *id = ID(action->xml); + + CRM_CHECK(!(action->executed), return FALSE); + CRM_CHECK(id != NULL, return FALSE); + + action->executed = TRUE; + switch (action->type) { + case action_type_pseudo: + crm_trace("Executing pseudo-action %d (%s)", action->id, id); + return graph_fns->pseudo(graph, action); + + case action_type_rsc: + crm_trace("Executing resource action %d (%s)", action->id, id); + return graph_fns->rsc(graph, action); + + case action_type_crm: + if (pcmk__str_eq(crm_element_value(action->xml, XML_LRM_ATTR_TASK), + CRM_OP_FENCE, pcmk__str_casei)) { + crm_trace("Executing fencing action %d (%s)", + action->id, id); + return graph_fns->stonith(graph, action); + } + crm_trace("Executing control action %d (%s)", action->id, id); + return graph_fns->crmd(graph, action); + + default: + crm_err("Unsupported graph action type <%s id='%s'> (bug?)", + crm_element_name(action->xml), id); + return FALSE; + } +} + +/*! + * \internal + * \brief Execute a graph synapse + * + * \param[in] graph Transition graph with synapse to execute + * \param[in] synapse Synapse to execute + * + * \return Standard Pacemaker return value + */ +static int +fire_synapse(crm_graph_t *graph, synapse_t *synapse) +{ + synapse->executed = TRUE; + for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { + crm_action_t *action = (crm_action_t *) lpc->data; + + if (!initiate_action(graph, action)) { + crm_err("Failed initiating <%s id=%d> in synapse %d", + crm_element_name(action->xml), action->id, synapse->id); + synapse->confirmed = TRUE; + action->confirmed = TRUE; + action->failed = TRUE; + return pcmk_rc_error; + } + } + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Dummy graph method that can be used with simulations + * + * \param[in] graph Transition graph containing action + * \param[in] action Action to be initiated + * + * \retval TRUE Action initiation was (simulated to be) successful + * \retval FALSE Action initiation was (simulated to be) failed (due to the + * PE_fail environment variable being set to the action ID) + */ +static gboolean +pseudo_action_dummy(crm_graph_t * graph, crm_action_t * action) +{ + static int fail = -1; + + if (fail < 0) { + long long fail_ll; + + if ((pcmk__scan_ll(getenv("PE_fail"), &fail_ll, 0LL) == pcmk_rc_ok) + && (fail_ll > 0LL) && (fail_ll <= INT_MAX)) { + fail = (int) fail_ll; + } else { + fail = 0; + } + } + + if (action->id == fail) { + crm_err("Dummy event handler: pretending action %d failed", action->id); + action->failed = TRUE; + graph->abort_priority = INFINITY; + } else { + crm_trace("Dummy event handler: action %d initiated", action->id); + } + action->confirmed = TRUE; + pcmk__update_graph(graph, action); + return TRUE; +} + +static crm_graph_functions_t default_fns = { + pseudo_action_dummy, + pseudo_action_dummy, + pseudo_action_dummy, + pseudo_action_dummy +}; + +/*! + * \internal + * \brief Execute all actions in a transition graph + * + * \param[in] graph Transition graph to execute + * + * \return Status of transition after execution + */ +enum transition_status +pcmk__execute_graph(crm_graph_t *graph) +{ + GList *lpc = NULL; + int log_level = LOG_DEBUG; + enum transition_status pass_result = transition_active; + const char *status = "In progress"; + + if (graph_fns == NULL) { + graph_fns = &default_fns; + } + if (graph == NULL) { + return transition_complete; + } + + graph->fired = 0; + graph->pending = 0; + graph->skipped = 0; + graph->completed = 0; + graph->incomplete = 0; + + // Count completed and in-flight synapses + for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { + synapse_t *synapse = (synapse_t *) lpc->data; + + if (synapse->confirmed) { + graph->completed++; + + } else if (!(synapse->failed) && synapse->executed) { + graph->pending++; + } + } + crm_trace("Executing graph %d (%d synapses already completed, %d pending)", + graph->id, graph->completed, graph->pending); + + // Execute any synapses that are ready + for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { + synapse_t *synapse = (synapse_t *) lpc->data; + + if ((graph->batch_limit > 0) + && (graph->pending >= graph->batch_limit)) { + + crm_debug("Throttling graph execution: batch limit (%d) reached", + graph->batch_limit); + break; + + } else if (synapse->failed) { + graph->skipped++; + continue; + + } else if (synapse->confirmed || synapse->executed) { + continue; // Already handled + + } else if (should_fire_synapse(graph, synapse)) { + graph->fired++; + if (fire_synapse(graph, synapse) != pcmk_rc_ok) { + crm_err("Synapse %d failed to fire", synapse->id); + log_level = LOG_ERR; + graph->abort_priority = INFINITY; + graph->incomplete++; + graph->fired--; + } + + if (!(synapse->confirmed)) { + graph->pending++; + } + + } else { + crm_trace("Synapse %d cannot fire", synapse->id); + graph->incomplete++; + } + } + + if ((graph->pending == 0) && (graph->fired == 0)) { + graph->complete = TRUE; + + if ((graph->incomplete != 0) && (graph->abort_priority <= 0)) { + log_level = LOG_WARNING; + pass_result = transition_terminated; + status = "Terminated"; + + } else if (graph->skipped != 0) { + log_level = LOG_NOTICE; + pass_result = transition_complete; + status = "Stopped"; + + } else { + log_level = LOG_NOTICE; + pass_result = transition_complete; + status = "Complete"; + } + + } else if (graph->fired == 0) { + pass_result = transition_pending; + } + + do_crm_log(log_level, + "Transition %d (Complete=%d, Pending=%d," + " Fired=%d, Skipped=%d, Incomplete=%d, Source=%s): %s", + graph->id, graph->completed, graph->pending, graph->fired, + graph->skipped, graph->incomplete, graph->source, status); + + return pass_result; +} + + +/* + * Functions for unpacking transition graph XML into structs + */ + +/*! + * \internal + * \brief Unpack a transition graph action from XML + * + * \param[in] parent Synapse that action is part of + * \param[in] xml_action Action XML to unparse + * + * \return Newly allocated action on success, or NULL otherwise + */ +static crm_action_t * +unpack_action(synapse_t *parent, xmlNode *xml_action) +{ + action_type_e action_type; + crm_action_t *action = NULL; + const char *element = TYPE(xml_action); + const char *value = ID(xml_action); + + if (value == NULL) { + crm_err("Ignoring transition graph action without id (bug?)"); + crm_log_xml_trace(xml_action, "invalid"); + return NULL; + } + + if (pcmk__str_eq(element, XML_GRAPH_TAG_RSC_OP, pcmk__str_casei)) { + action_type = action_type_rsc; + + } else if (pcmk__str_eq(element, XML_GRAPH_TAG_PSEUDO_EVENT, + pcmk__str_casei)) { + action_type = action_type_pseudo; + + } else if (pcmk__str_eq(element, XML_GRAPH_TAG_CRM_EVENT, + pcmk__str_casei)) { + action_type = action_type_crm; + + } else { + crm_err("Ignoring transition graph action of unknown type '%s' (bug?)", + element); + crm_log_xml_trace(xml_action, "invalid"); + return NULL; + } + + action = calloc(1, sizeof(crm_action_t)); + if (action == NULL) { + crm_perror(LOG_CRIT, "Cannot unpack transition graph action"); + crm_log_xml_trace(xml_action, "lost"); + return NULL; + } + + pcmk__scan_min_int(value, &(action->id), -1); + action->type = action_type_rsc; + action->xml = copy_xml(xml_action); + action->synapse = parent; + action->type = action_type; + action->params = xml2list(action->xml); + + value = g_hash_table_lookup(action->params, "CRM_meta_timeout"); + pcmk__scan_min_int(value, &(action->timeout), 0); + + /* Take start-delay into account for the timeout of the action timer */ + value = g_hash_table_lookup(action->params, "CRM_meta_start_delay"); + { + int start_delay; + + pcmk__scan_min_int(value, &start_delay, 0); + action->timeout += start_delay; + } + + if (pcmk__guint_from_hash(action->params, + CRM_META "_" XML_LRM_ATTR_INTERVAL, 0, + &(action->interval_ms)) != pcmk_rc_ok) { + action->interval_ms = 0; + } + + value = g_hash_table_lookup(action->params, "CRM_meta_can_fail"); + if (value != NULL) { + crm_str_to_boolean(value, &(action->can_fail)); +#ifndef PCMK__COMPAT_2_0 + if (action->can_fail) { + crm_warn("Support for the can_fail meta-attribute is deprecated" + " and will be removed in a future release"); + } +#endif + } + + crm_trace("Action %d has timer set to %dms", action->id, action->timeout); + + return action; +} + +/*! + * \internal + * \brief Unpack transition graph synapse from XML + * + * \param[in] new_graph Transition graph that synapse is part of + * \param[in] xml_synapse Synapse XML + * + * \return Newly allocated synapse on success, or NULL otherwise + */ +static synapse_t * +unpack_synapse(crm_graph_t *new_graph, xmlNode *xml_synapse) +{ + const char *value = NULL; + xmlNode *action_set = NULL; + synapse_t *new_synapse = NULL; + + crm_trace("Unpacking synapse %s", ID(xml_synapse)); + + new_synapse = calloc(1, sizeof(synapse_t)); + if (new_synapse == NULL) { + return NULL; + } + + pcmk__scan_min_int(ID(xml_synapse), &(new_synapse->id), 0); + + value = crm_element_value(xml_synapse, XML_CIB_ATTR_PRIORITY); + pcmk__scan_min_int(value, &(new_synapse->priority), 0); + + CRM_CHECK(new_synapse->id >= 0, free(new_synapse); + return NULL); + + new_graph->num_synapses++; + + crm_trace("Unpacking synapse %s action sets", + crm_element_value(xml_synapse, XML_ATTR_ID)); + + for (action_set = first_named_child(xml_synapse, "action_set"); + action_set != NULL; action_set = crm_next_same_xml(action_set)) { + + for (xmlNode *action = pcmk__xml_first_child(action_set); + action != NULL; action = pcmk__xml_next(action)) { + + crm_action_t *new_action = unpack_action(new_synapse, action); + + if (new_action == NULL) { + continue; + } + + crm_trace("Adding action %d to synapse %d", + new_action->id, new_synapse->id); + new_graph->num_actions++; + new_synapse->actions = g_list_append(new_synapse->actions, + new_action); + } + } + + crm_trace("Unpacking synapse %s inputs", ID(xml_synapse)); + + for (xmlNode *inputs = first_named_child(xml_synapse, "inputs"); + inputs != NULL; inputs = crm_next_same_xml(inputs)) { + + for (xmlNode *trigger = first_named_child(inputs, "trigger"); + trigger != NULL; trigger = crm_next_same_xml(trigger)) { + + for (xmlNode *input = pcmk__xml_first_child(trigger); + input != NULL; input = pcmk__xml_next(input)) { + + crm_action_t *new_input = unpack_action(new_synapse, input); + + if (new_input == NULL) { + continue; + } + + crm_trace("Adding input %d to synapse %d", + new_input->id, new_synapse->id); + + new_synapse->inputs = g_list_append(new_synapse->inputs, + new_input); + } + } + } + + return new_synapse; +} + +/*! + * \internal + * \brief Unpack transition graph XML + * + * \param[in] xml_graph Transition graph XML to unpack + * \param[in] reference Where the XML came from (for logging) + * + * \return Newly allocated transition graph on success, NULL otherwise + * \note The caller is responsible for freeing the return value using + * pcmk__free_graph(). + * \note The XML is expected to be structured like: + + + + + ... + + + + + ... + + */ +crm_graph_t * +pcmk__unpack_graph(xmlNode *xml_graph, const char *reference) +{ + crm_graph_t *new_graph = NULL; + const char *t_id = NULL; + const char *time = NULL; + + new_graph = calloc(1, sizeof(crm_graph_t)); + if (new_graph == NULL) { + return NULL; + } + + new_graph->source = strdup((reference == NULL)? "unknown" : reference); + if (new_graph->source == NULL) { + free(new_graph); + return NULL; + } + + new_graph->id = -1; + new_graph->abort_priority = 0; + new_graph->network_delay = 0; + new_graph->stonith_timeout = 0; + new_graph->completion_action = tg_done; + + // Parse top-level attributes from + if (xml_graph != NULL) { + t_id = crm_element_value(xml_graph, "transition_id"); + CRM_CHECK(t_id != NULL, free(new_graph); + return NULL); + pcmk__scan_min_int(t_id, &(new_graph->id), -1); + + time = crm_element_value(xml_graph, "cluster-delay"); + CRM_CHECK(time != NULL, free(new_graph); + return NULL); + new_graph->network_delay = crm_parse_interval_spec(time); + + time = crm_element_value(xml_graph, "stonith-timeout"); + if (time == NULL) { + new_graph->stonith_timeout = new_graph->network_delay; + } else { + new_graph->stonith_timeout = crm_parse_interval_spec(time); + } + + // Use 0 (dynamic limit) as default/invalid, -1 (no limit) as minimum + t_id = crm_element_value(xml_graph, "batch-limit"); + if ((t_id == NULL) + || (pcmk__scan_min_int(t_id, &(new_graph->batch_limit), + -1) != pcmk_rc_ok)) { + new_graph->batch_limit = 0; + } + + t_id = crm_element_value(xml_graph, "migration-limit"); + pcmk__scan_min_int(t_id, &(new_graph->migration_limit), -1); + } + + // Unpack each child element + for (xmlNode *synapse_xml = first_named_child(xml_graph, "synapse"); + synapse_xml != NULL; synapse_xml = crm_next_same_xml(synapse_xml)) { + + synapse_t *new_synapse = unpack_synapse(new_graph, synapse_xml); + + if (new_synapse != NULL) { + new_graph->synapses = g_list_append(new_graph->synapses, + new_synapse); + } + } + + crm_debug("Unpacked transition %d from %s: %d actions in %d synapses", + new_graph->id, new_graph->source, new_graph->num_actions, + new_graph->num_synapses); + + return new_graph; +} + + +/* + * Functions for freeing transition graph objects + */ + +/*! + * \internal + * \brief Free a transition graph action object + * + * \param[in] user_data Action to free + */ +static void +free_graph_action(gpointer user_data) +{ + crm_action_t *action = user_data; + + if ((action->timer != NULL) && (action->timer->source_id != 0)) { + crm_warn("Cancelling timer for graph action %d", action->id); + g_source_remove(action->timer->source_id); + } + if (action->params != NULL) { + g_hash_table_destroy(action->params); + } + free_xml(action->xml); + free(action->timer); + free(action); +} + +/*! + * \internal + * \brief Free a transition graph synapse object + * + * \param[in] user_data Synapse to free + */ +static void +free_graph_synapse(gpointer user_data) +{ + synapse_t *synapse = user_data; + + g_list_free_full(synapse->actions, free_graph_action); + g_list_free_full(synapse->inputs, free_graph_action); + free(synapse); +} + +/*! + * \internal + * \brief Free a transition graph object + * + * \param[in] graph Transition graph to free + */ +void +pcmk__free_graph(crm_graph_t *graph) +{ + if (graph != NULL) { + g_list_free_full(graph->synapses, free_graph_synapse); + free(graph->source); + free(graph); + } +} + + +/* + * Other transition graph utilities + */ + +/*! + * \internal + * \brief Synthesize an executor event from a graph action + * + * \param[in] resource If not NULL, use greater call ID than in this XML + * \param[in] action Graph action + * \param[in] status What to use as event execution status + * \param[in] rc What to use as event exit status + * + * \return Newly allocated executor event on success, or NULL otherwise + */ +lrmd_event_data_t * +pcmk__event_from_graph_action(xmlNode *resource, crm_action_t *action, + int status, int rc) +{ + lrmd_event_data_t *op = NULL; + GHashTableIter iter; + const char *name = NULL; + const char *value = NULL; + xmlNode *action_resource = NULL; + + CRM_CHECK(action != NULL, return NULL); + CRM_CHECK(action->type == action_type_rsc, return NULL); + + action_resource = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); + CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "invalid"); + return NULL); + + op = lrmd_new_event(ID(action_resource), + crm_element_value(action->xml, XML_LRM_ATTR_TASK), + action->interval_ms); + op->rc = rc; + op->op_status = status; + op->t_run = time(NULL); + op->t_rcchange = op->t_run; + op->params = pcmk__strkey_table(free, free); + + g_hash_table_iter_init(&iter, action->params); + while (g_hash_table_iter_next(&iter, (void **)&name, (void **)&value)) { + g_hash_table_insert(op->params, strdup(name), strdup(value)); + } + + for (xmlNode *xop = pcmk__xml_first_child(resource); xop != NULL; + xop = pcmk__xml_next(xop)) { + int tmp = 0; + + crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); + crm_debug("Got call_id=%d for %s", tmp, ID(resource)); + if (tmp > op->call_id) { + op->call_id = tmp; + } + } + + op->call_id++; + return op; +} diff --git a/lib/pacemaker/pcmk_trans_utils.c b/lib/pacemaker/pcmk_graph_logging.c similarity index 58% rename from lib/pacemaker/pcmk_trans_utils.c rename to lib/pacemaker/pcmk_graph_logging.c index 2a143f47ac..7af06b92b3 100644 --- a/lib/pacemaker/pcmk_trans_utils.c +++ b/lib/pacemaker/pcmk_graph_logging.c @@ -1,305 +1,216 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include -extern crm_graph_functions_t *graph_fns; - -static gboolean -pseudo_action_dummy(crm_graph_t * graph, crm_action_t * action) -{ - static int fail = -1; - - if (fail < 0) { - long long fail_ll; - - if ((pcmk__scan_ll(getenv("PE_fail"), &fail_ll, 0LL) == pcmk_rc_ok) - && (fail_ll > 0LL) && (fail_ll <= INT_MAX)) { - fail = (int) fail_ll; - } else { - fail = 0; - } - } - - crm_trace("Dummy event handler: action %d executed", action->id); - if (action->id == fail) { - crm_err("Dummy event handler: pretending action %d failed", action->id); - action->failed = TRUE; - graph->abort_priority = INFINITY; - } - action->confirmed = TRUE; - update_graph(graph, action); - return TRUE; -} - -crm_graph_functions_t default_fns = { - pseudo_action_dummy, - pseudo_action_dummy, - pseudo_action_dummy, - pseudo_action_dummy -}; - -void -set_default_graph_functions(void) -{ - graph_fns = &default_fns; -} - -void -set_graph_functions(crm_graph_functions_t * fns) -{ - crm_info("Setting custom graph functions"); - graph_fns = fns; - - CRM_ASSERT(graph_fns != NULL); - CRM_ASSERT(graph_fns->rsc != NULL); - CRM_ASSERT(graph_fns->crmd != NULL); - CRM_ASSERT(graph_fns->pseudo != NULL); - CRM_ASSERT(graph_fns->stonith != NULL); -} - +/*! + * \internal + * \brief Return text equivalent of an enum transition_status for logging + * + * \param[in] state Transition status + * + * \return Human-readable text equivalent of \p state + */ const char * -transition_status(enum transition_status state) +pcmk__graph_status2text(enum transition_status state) { switch (state) { case transition_active: return "active"; case transition_pending: return "pending"; case transition_complete: return "complete"; case transition_stopped: return "stopped"; case transition_terminated: return "terminated"; case transition_action_failed: return "failed (action)"; case transition_failed: return "failed"; } return "unknown"; } -const char * +static const char * actiontype2text(action_type_e type) { switch (type) { case action_type_pseudo: return "pseudo"; case action_type_rsc: return "resource"; case action_type_crm: return "cluster"; } return "invalid"; } +/*! + * \internal + * \brief Find a transition graph action by ID + * + * \param[in] graph Transition graph to search + * \param[in] id Action ID to search for + * + * \return Transition graph action corresponding to \p id, or NULL if none + */ static crm_action_t * -find_action(crm_graph_t * graph, int id) +find_graph_action_by_id(crm_graph_t *graph, int id) { - GList *sIter = NULL; - if (graph == NULL) { return NULL; } - for (sIter = graph->synapses; sIter != NULL; sIter = sIter->next) { - GList *aIter = NULL; + for (GList *sIter = graph->synapses; sIter != NULL; sIter = sIter->next) { synapse_t *synapse = (synapse_t *) sIter->data; - for (aIter = synapse->actions; aIter != NULL; aIter = aIter->next) { + for (GList *aIter = synapse->actions; aIter != NULL; + aIter = aIter->next) { + crm_action_t *action = (crm_action_t *) aIter->data; if (action->id == id) { return action; } } } return NULL; } static const char * synapse_state_str(synapse_t *synapse) { if (synapse->failed) { return "Failed"; } else if (synapse->confirmed) { return "Completed"; } else if (synapse->executed) { return "In-flight"; } else if (synapse->ready) { return "Ready"; } return "Pending"; } // List action IDs of inputs in graph that haven't completed successfully static char * synapse_pending_inputs(crm_graph_t *graph, synapse_t *synapse) { char *pending = NULL; size_t pending_len = 0; for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { crm_action_t *input = (crm_action_t *) lpc->data; if (input->failed) { pcmk__add_word(&pending, &pending_len, ID(input->xml)); } else if (input->confirmed) { // Confirmed successful inputs are not pending - } else if (find_action(graph, input->id) != NULL) { + } else if (find_graph_action_by_id(graph, input->id) != NULL) { // In-flight or pending pcmk__add_word(&pending, &pending_len, ID(input->xml)); } } if (pending == NULL) { pending = strdup("none"); } return pending; } // Log synapse inputs that aren't in graph static void log_unresolved_inputs(unsigned int log_level, crm_graph_t *graph, synapse_t *synapse) { for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { crm_action_t *input = (crm_action_t *) lpc->data; const char *key = crm_element_value(input->xml, XML_LRM_ATTR_TASK_KEY); const char *host = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); - if (find_action(graph, input->id) == NULL) { + if (find_graph_action_by_id(graph, input->id) == NULL) { do_crm_log(log_level, " * [Input %2d]: Unresolved dependency %s op %s%s%s", input->id, actiontype2text(input->type), key, (host? " on " : ""), (host? host : "")); } } } static void log_synapse_action(unsigned int log_level, synapse_t *synapse, crm_action_t *action, const char *pending_inputs) { const char *key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *host = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); char *desc = crm_strdup_printf("%s %s op %s", synapse_state_str(synapse), actiontype2text(action->type), key); do_crm_log(log_level, "[Action %4d]: %-50s%s%s (priority: %d, waiting: %s)", action->id, desc, (host? " on " : ""), (host? host : ""), synapse->priority, pending_inputs); free(desc); } static void -print_synapse(unsigned int log_level, crm_graph_t * graph, synapse_t * synapse) +log_synapse(unsigned int log_level, crm_graph_t *graph, synapse_t *synapse) { char *pending = NULL; if (!synapse->executed) { pending = synapse_pending_inputs(graph, synapse); } for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { log_synapse_action(log_level, synapse, (crm_action_t *) lpc->data, pending); } free(pending); if (!synapse->executed) { log_unresolved_inputs(log_level, graph, synapse); } } void -print_action(int log_level, const char *prefix, crm_action_t * action) +pcmk__log_graph_action(int log_level, crm_action_t *action) { - print_synapse(log_level, NULL, action->synapse); + log_synapse(log_level, NULL, action->synapse); } void -print_graph(unsigned int log_level, crm_graph_t * graph) +pcmk__log_graph(unsigned int log_level, crm_graph_t *graph) { - GList *lpc = NULL; - - if (graph == NULL || graph->num_actions == 0) { + if ((graph == NULL) || (graph->num_actions == 0)) { if (log_level == LOG_TRACE) { crm_debug("Empty transition graph"); } return; } do_crm_log(log_level, "Graph %d with %d actions:" " batch-limit=%d jobs, network-delay=%ums", graph->id, graph->num_actions, graph->batch_limit, graph->network_delay); - for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { - synapse_t *synapse = (synapse_t *) lpc->data; - - print_synapse(log_level, graph, synapse); + for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { + log_synapse(log_level, graph, (synapse_t *) lpc->data); } } - -static const char * -abort2text(enum transition_action abort_action) -{ - switch (abort_action) { - case tg_done: - return "done"; - case tg_stop: - return "stop"; - case tg_restart: - return "restart"; - case tg_shutdown: - return "shutdown"; - } - return "unknown"; -} - -bool -update_abort_priority(crm_graph_t * graph, int priority, - enum transition_action action, const char *abort_reason) -{ - bool change = FALSE; - - if (graph == NULL) { - return change; - } - - if (graph->abort_priority < priority) { - crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority); - graph->abort_priority = priority; - if (graph->abort_reason != NULL) { - crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason); - } - graph->abort_reason = abort_reason; - change = TRUE; - } - - if (graph->completion_action < action) { - crm_debug("Abort action %s superseded by %s: %s", - abort2text(graph->completion_action), abort2text(action), abort_reason); - graph->completion_action = action; - change = TRUE; - } - - return change; -} diff --git a/lib/pacemaker/pcmk_sched_graph.c b/lib/pacemaker/pcmk_graph_producer.c similarity index 100% rename from lib/pacemaker/pcmk_sched_graph.c rename to lib/pacemaker/pcmk_graph_producer.c diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c index ad395a19e8..0dbfd1893b 100644 --- a/lib/pacemaker/pcmk_sched_allocate.c +++ b/lib/pacemaker/pcmk_sched_allocate.c @@ -1,3043 +1,3035 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pacemaker); extern bool pcmk__is_daemon; void set_alloc_actions(pe_working_set_t * data_set); extern void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set); extern gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set); static void apply_remote_node_ordering(pe_working_set_t *data_set); static enum remote_connection_state get_remote_node_state(pe_node_t *node); enum remote_connection_state { remote_state_unknown = 0, remote_state_alive = 1, remote_state_resting = 2, remote_state_failed = 3, remote_state_stopped = 4 }; static const char * state2text(enum remote_connection_state state) { switch (state) { case remote_state_unknown: return "unknown"; case remote_state_alive: return "alive"; case remote_state_resting: return "resting"; case remote_state_failed: return "failed"; case remote_state_stopped: return "stopped"; } return "impossible"; } resource_alloc_functions_t resource_class_alloc_functions[] = { { pcmk__native_merge_weights, pcmk__native_allocate, native_create_actions, native_create_probe, native_internal_constraints, native_rsc_colocation_lh, native_rsc_colocation_rh, native_rsc_location, native_action_flags, native_update_actions, native_expand, native_append_meta, }, { pcmk__group_merge_weights, pcmk__group_allocate, group_create_actions, native_create_probe, group_internal_constraints, group_rsc_colocation_lh, group_rsc_colocation_rh, group_rsc_location, group_action_flags, group_update_actions, group_expand, group_append_meta, }, { pcmk__native_merge_weights, pcmk__clone_allocate, clone_create_actions, clone_create_probe, clone_internal_constraints, clone_rsc_colocation_lh, clone_rsc_colocation_rh, clone_rsc_location, clone_action_flags, pcmk__multi_update_actions, clone_expand, clone_append_meta, }, { pcmk__native_merge_weights, pcmk__bundle_allocate, pcmk__bundle_create_actions, pcmk__bundle_create_probe, pcmk__bundle_internal_constraints, pcmk__bundle_rsc_colocation_lh, pcmk__bundle_rsc_colocation_rh, pcmk__bundle_rsc_location, pcmk__bundle_action_flags, pcmk__multi_update_actions, pcmk__bundle_expand, pcmk__bundle_append_meta, } }; static gboolean check_rsc_parameters(pe_resource_t * rsc, pe_node_t * node, xmlNode * rsc_entry, gboolean active_here, pe_working_set_t * data_set) { int attr_lpc = 0; gboolean force_restart = FALSE; gboolean delete_resource = FALSE; gboolean changed = FALSE; const char *value = NULL; const char *old_value = NULL; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; for (; attr_lpc < PCMK__NELEM(attr_list); attr_lpc++) { value = crm_element_value(rsc->xml, attr_list[attr_lpc]); old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]); if (value == old_value /* i.e. NULL */ || pcmk__str_eq(value, old_value, pcmk__str_none)) { continue; } changed = TRUE; trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set); if (active_here) { force_restart = TRUE; crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s", rsc->id, node->details->uname, attr_list[attr_lpc], crm_str(old_value), crm_str(value)); } } if (force_restart) { /* make sure the restart happens */ stop_action(rsc, node, FALSE); pe__set_resource_flags(rsc, pe_rsc_start_pending); delete_resource = TRUE; } else if (changed) { delete_resource = TRUE; } return delete_resource; } static void CancelXmlOp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * active_node, const char *reason, pe_working_set_t * data_set) { guint interval_ms = 0; pe_action_t *cancel = NULL; const char *task = NULL; const char *call_id = NULL; CRM_CHECK(xml_op != NULL, return); CRM_CHECK(active_node != NULL, return); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); crm_info("Action " PCMK__OP_FMT " on %s will be stopped: %s", rsc->id, task, interval_ms, active_node->details->uname, (reason? reason : "unknown")); cancel = pe_cancel_op(rsc, task, interval_ms, active_node, data_set); add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set); } static gboolean check_action_definition(pe_resource_t * rsc, pe_node_t * active_node, xmlNode * xml_op, pe_working_set_t * data_set) { char *key = NULL; guint interval_ms = 0; const op_digest_cache_t *digest_data = NULL; gboolean did_change = FALSE; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *digest_secure = NULL; CRM_CHECK(active_node != NULL, return FALSE); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if (interval_ms > 0) { xmlNode *op_match = NULL; /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = pcmk__op_key(rsc->id, task, interval_ms); pe_rsc_trace(rsc, "Checking parameters for %s", key); op_match = find_rsc_op_entry(rsc, key); if ((op_match == NULL) && pcmk_is_set(data_set->flags, pe_flag_stop_action_orphans)) { CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set); free(key); return TRUE; } else if (op_match == NULL) { pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname); free(key); return TRUE; } free(key); key = NULL; } crm_trace("Testing " PCMK__OP_FMT " on %s", rsc->id, task, interval_ms, active_node->details->uname); if ((interval_ms == 0) && pcmk__str_eq(task, RSC_STATUS, pcmk__str_casei)) { /* Reload based on the start action not a probe */ task = RSC_START; } else if ((interval_ms == 0) && pcmk__str_eq(task, RSC_MIGRATED, pcmk__str_casei)) { /* Reload based on the start action not a migrate */ task = RSC_START; } else if ((interval_ms == 0) && pcmk__str_eq(task, RSC_PROMOTE, pcmk__str_casei)) { /* Reload based on the start action not a promote */ task = RSC_START; } digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set); if (pcmk_is_set(data_set->flags, pe_flag_sanitized)) { digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST); } if(digest_data->rc != RSC_DIGEST_MATCH && digest_secure && digest_data->digest_secure_calc && strcmp(digest_data->digest_secure_calc, digest_secure) == 0) { if (!pcmk__is_daemon && data_set->priv != NULL) { pcmk__output_t *out = data_set->priv; out->info(out, "Only 'private' parameters to " PCMK__OP_FMT " on %s changed: %s", rsc->id, task, interval_ms, active_node->details->uname, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); } } else if (digest_data->rc == RSC_DIGEST_RESTART) { /* Changes that force a restart */ pe_action_t *required = NULL; did_change = TRUE; key = pcmk__op_key(rsc->id, task, interval_ms); crm_log_xml_info(digest_data->params_restart, "params:restart"); required = custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); pe_action_set_reason(required, "resource definition change", true); trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set); } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) { // Changes that can potentially be handled by an agent reload const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); did_change = TRUE; trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set); crm_log_xml_info(digest_data->params_all, "params:reload"); key = pcmk__op_key(rsc->id, task, interval_ms); if (interval_ms > 0) { pe_action_t *op = NULL; #if 0 /* Always reload/restart the entire resource */ ReloadRsc(rsc, active_node, data_set); #else /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */ op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set); pe__set_action_flags(op, pe_action_reschedule); #endif } else if (digest_restart) { pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id); /* Reload this resource */ ReloadRsc(rsc, active_node, data_set); free(key); } else { pe_action_t *required = NULL; pe_rsc_trace(rsc, "Resource %s doesn't support agent reloads", rsc->id); /* Re-send the start/demote/promote op * Recurring ops will be detected independently */ required = custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); pe_action_set_reason(required, "resource definition change", true); } } return did_change; } /*! * \internal * \brief Do deferred action checks after allocation * * \param[in] data_set Working set for cluster */ static void check_params(pe_resource_t *rsc, pe_node_t *node, xmlNode *rsc_op, enum pe_check_parameters check, pe_working_set_t *data_set) { const char *reason = NULL; op_digest_cache_t *digest_data = NULL; switch (check) { case pe_check_active: if (check_action_definition(rsc, node, rsc_op, data_set) && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { reason = "action definition changed"; } break; case pe_check_last_failure: digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, data_set); switch (digest_data->rc) { case RSC_DIGEST_UNKNOWN: crm_trace("Resource %s history entry %s on %s has no digest to compare", rsc->id, ID(rsc_op), node->details->id); break; case RSC_DIGEST_MATCH: break; default: reason = "resource parameters have changed"; break; } break; } if (reason) { pe__clear_failcount(rsc, node, reason, data_set); } } static void check_actions_for(xmlNode * rsc_entry, pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set) { GList *gIter = NULL; int offset = -1; int stop_index = 0; int start_index = 0; const char *task = NULL; xmlNode *rsc_op = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; CRM_CHECK(node != NULL, return); if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { pe_resource_t *parent = uber_parent(rsc); if(parent == NULL || pe_rsc_is_clone(parent) == FALSE || pcmk_is_set(parent->flags, pe_rsc_unique)) { pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id); DeleteRsc(rsc, node, FALSE, data_set); } else { pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id); } return; } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s", rsc->id, node->details->uname); return; } pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname); if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } for (rsc_op = pcmk__xe_first_child(rsc_entry); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { op_list = g_list_prepend(op_list, rsc_op); } } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; offset++; if (start_index < stop_index) { /* stopped */ continue; } else if (offset < start_index) { /* action occurred prior to a start */ continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((interval_ms > 0) && (pcmk_is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) { // Maintenance mode cancels recurring operations CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set); } else if ((interval_ms > 0) || pcmk__strcase_any_of(task, RSC_STATUS, RSC_START, RSC_PROMOTE, RSC_MIGRATED, NULL)) { /* If a resource operation failed, and the operation's definition * has changed, clear any fail count so they can be retried fresh. */ if (pe__bundle_needs_remote_name(rsc, data_set)) { /* We haven't allocated resources to nodes yet, so if the * REMOTE_CONTAINER_HACK is used, we may calculate the digest * based on the literal "#uname" value rather than the properly * substituted value. That would mistakenly make the action * definition appear to have been changed. Defer the check until * later in this case. */ pe__add_param_check(rsc_op, rsc, node, pe_check_active, data_set); } else if (check_action_definition(rsc, node, rsc_op, data_set) && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { pe__clear_failcount(rsc, node, "action definition changed", data_set); } } } g_list_free(sorted_op_list); } static GList * find_rsc_list(GList *result, pe_resource_t * rsc, const char *id, gboolean renamed_clones, gboolean partial, pe_working_set_t * data_set) { GList *gIter = NULL; gboolean match = FALSE; if (id == NULL) { return NULL; } if (rsc == NULL) { if (data_set == NULL) { return NULL; } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } return result; } if (partial) { if (strstr(rsc->id, id)) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) { match = TRUE; } } else { if (strcmp(rsc->id, id) == 0) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) { match = TRUE; } } if (match) { result = g_list_prepend(result, rsc); } if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } } return result; } static void check_actions(pe_working_set_t * data_set) { const char *id = NULL; pe_node_t *node = NULL; xmlNode *lrm_rscs = NULL; xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); xmlNode *node_state = NULL; for (node_state = pcmk__xe_first_child(status); node_state != NULL; node_state = pcmk__xe_next(node_state)) { if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { id = crm_element_value(node_state, XML_ATTR_ID); lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE); node = pe_find_node_id(data_set->nodes, id); if (node == NULL) { continue; /* Still need to check actions for a maintenance node to cancel existing monitor operations */ } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) { crm_trace("Skipping param check for %s: can't run resources", node->details->uname); continue; } crm_trace("Processing node %s", node->details->uname); if (node->details->online || pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { xmlNode *rsc_entry = NULL; for (rsc_entry = pcmk__xe_first_child(lrm_rscs); rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) { if (pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { if (xml_has_children(rsc_entry)) { GList *gIter = NULL; GList *result = NULL; const char *rsc_id = ID(rsc_entry); CRM_CHECK(rsc_id != NULL, return); result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set); for (gIter = result; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (rsc->variant != pe_native) { continue; } check_actions_for(rsc_entry, rsc, node, data_set); } g_list_free(result); } } } } } } } static void apply_placement_constraints(pe_working_set_t * data_set) { for (GList *gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { pe__location_t *cons = gIter->data; cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons); } } static gboolean failcount_clear_action_exists(pe_node_t * node, pe_resource_t * rsc) { gboolean rc = FALSE; GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE); if (list) { rc = TRUE; } g_list_free(list); return rc; } /*! * \internal * \brief Force resource away if failures hit migration threshold * * \param[in,out] rsc Resource to check for failures * \param[in,out] node Node to check for failures * \param[in,out] data_set Cluster working set to update */ static void check_migration_threshold(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { int fail_count, countdown; pe_resource_t *failed; /* Migration threshold of 0 means never force away */ if (rsc->migration_threshold == 0) { return; } // If we're ignoring failures, also ignore the migration threshold if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) { return; } /* If there are no failures, there's no need to force away */ fail_count = pe_get_failcount(node, rsc, NULL, pe_fc_effective|pe_fc_fillers, NULL, data_set); if (fail_count <= 0) { return; } /* How many more times recovery will be tried on this node */ countdown = QB_MAX(rsc->migration_threshold - fail_count, 0); /* If failed resource has a parent, we'll force the parent away */ failed = rsc; if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } if (countdown == 0) { resource_location(failed, node, -INFINITY, "__fail_limit__", data_set); crm_warn("Forcing %s away from %s after %d failures (max=%d)", failed->id, node->details->uname, fail_count, rsc->migration_threshold); } else { crm_info("%s can fail %d more times on %s before being forced off", failed->id, countdown, node->details->uname); } } static void common_apply_stickiness(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set) { if (rsc->children) { GList *gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; common_apply_stickiness(child_rsc, node, data_set); } return; } if (pcmk_is_set(rsc->flags, pe_rsc_managed) && rsc->stickiness != 0 && pcmk__list_of_1(rsc->running_on)) { pe_node_t *current = pe_find_node_id(rsc->running_on, node->details->id); pe_node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (current == NULL) { } else if ((match != NULL) || pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster)) { pe_resource_t *sticky_rsc = rsc; resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set); pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location" " (node=%s, weight=%d)", sticky_rsc->id, node->details->uname, rsc->stickiness); } else { GHashTableIter iter; pe_node_t *nIter = NULL; pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric" " and node %s is not explicitly allowed", rsc->id, node->details->uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) { crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight); } } } /* Check the migration threshold only if a failcount clear action * has not already been placed for this resource on the node. * There is no sense in potentially forcing the resource from this * node if the failcount is being reset anyway. * * @TODO A clear_failcount operation can be scheduled in stage4() via * check_actions_for(), or in stage5() via check_params(). This runs in * stage2(), so it cannot detect those, meaning we might check the migration * threshold when we shouldn't -- worst case, we stop or move the resource, * then move it back next transition. */ if (failcount_clear_action_exists(node, rsc) == FALSE) { check_migration_threshold(rsc, node, data_set); } } void complex_set_cmds(pe_resource_t * rsc) { GList *gIter = rsc->children; rsc->cmds = &resource_class_alloc_functions[rsc->variant]; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; complex_set_cmds(child_rsc); } } void set_alloc_actions(pe_working_set_t * data_set) { GList *gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; complex_set_cmds(rsc); } } static void calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data) { const char *key = (const char *)gKey; const char *value = (const char *)gValue; int *system_health = (int *)user_data; if (!gKey || !gValue || !user_data) { return; } if (pcmk__starts_with(key, "#health")) { int score; /* Convert the value into an integer */ score = char2score(value); /* Add it to the running total */ *system_health = pe__add_scores(score, *system_health); } } static gboolean apply_system_health(pe_working_set_t * data_set) { GList *gIter = NULL; const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy"); int base_health = 0; if (pcmk__str_eq(health_strategy, "none", pcmk__str_null_matches | pcmk__str_casei)) { /* Prevent any accidental health -> score translation */ pcmk__score_red = 0; pcmk__score_yellow = 0; pcmk__score_green = 0; return TRUE; } else if (pcmk__str_eq(health_strategy, "migrate-on-red", pcmk__str_casei)) { /* Resources on nodes which have health values of red are * weighted away from that node. */ pcmk__score_red = -INFINITY; pcmk__score_yellow = 0; pcmk__score_green = 0; } else if (pcmk__str_eq(health_strategy, "only-green", pcmk__str_casei)) { /* Resources on nodes which have health values of red or yellow * are forced away from that node. */ pcmk__score_red = -INFINITY; pcmk__score_yellow = -INFINITY; pcmk__score_green = 0; } else if (pcmk__str_eq(health_strategy, "progressive", pcmk__str_casei)) { /* Same as the above, but use the r/y/g scores provided by the user * Defaults are provided by the pe_prefs table * Also, custom health "base score" can be used */ base_health = char2score(pe_pref(data_set->config_hash, "node-health-base")); } else if (pcmk__str_eq(health_strategy, "custom", pcmk__str_casei)) { /* Requires the admin to configure the rsc_location constaints for * processing the stored health scores */ /* TODO: Check for the existence of appropriate node health constraints */ return TRUE; } else { crm_err("Unknown node health strategy: %s", health_strategy); return FALSE; } crm_info("Applying automated node health strategy: %s", health_strategy); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { int system_health = base_health; pe_node_t *node = (pe_node_t *) gIter->data; /* Search through the node hash table for system health entries. */ g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health); crm_info(" Node %s has an combined system health of %d", node->details->uname, system_health); /* If the health is non-zero, then create a new rsc2node so that the * weight will be added later on. */ if (system_health != 0) { GList *gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set); } } } return TRUE; } gboolean stage0(pe_working_set_t * data_set) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); if (data_set->input == NULL) { return FALSE; } if (!pcmk_is_set(data_set->flags, pe_flag_have_status)) { crm_trace("Calculating status"); cluster_status(data_set); } set_alloc_actions(data_set); apply_system_health(data_set); unpack_constraints(cib_constraints, data_set); return TRUE; } /* * Check nodes for resources started outside of the LRM */ gboolean probe_resources(pe_working_set_t * data_set) { pe_action_t *probe_node_complete = NULL; for (GList *gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED); if (node->details->online == FALSE) { if (pe__is_remote_node(node) && node->details->remote_rsc && (get_remote_node_state(node) == remote_state_failed)) { pe_fence_node(data_set, node, "the connection is unrecoverable", FALSE); } continue; } else if (node->details->unclean) { continue; } else if (node->details->rsc_discovery_enabled == FALSE) { /* resource discovery is disabled for this node */ continue; } if (probed != NULL && crm_is_true(probed) == FALSE) { pe_action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname), CRM_OP_REPROBE, node, FALSE, TRUE, data_set); add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); continue; } for (GList *gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set); } } return TRUE; } static void rsc_discover_filter(pe_resource_t *rsc, pe_node_t *node) { pe_resource_t *top = uber_parent(rsc); pe_node_t *match; if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) { return; } g_list_foreach(rsc->children, (GFunc) rsc_discover_filter, node); match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match && match->rsc_discover_mode != pe_discover_exclusive) { match->weight = -INFINITY; } } static time_t shutdown_time(pe_node_t *node, pe_working_set_t *data_set) { const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); time_t result = 0; if (shutdown) { long long result_ll; if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) { result = (time_t) result_ll; } } return result? result : get_effective_time(data_set); } static void apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set) { const char *class; // Only primitives and (uncloned) groups may be locked if (rsc->variant == pe_group) { g_list_foreach(rsc->children, (GFunc) apply_shutdown_lock, data_set); } else if (rsc->variant != pe_native) { return; } // Fence devices and remote connections can't be locked class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches) || pe__resource_is_remote_conn(rsc, data_set)) { return; } if (rsc->lock_node != NULL) { // The lock was obtained from resource history if (rsc->running_on != NULL) { /* The resource was started elsewhere even though it is now * considered locked. This shouldn't be possible, but as a * failsafe, we don't want to disturb the resource now. */ pe_rsc_info(rsc, "Cancelling shutdown lock because %s is already active", rsc->id); pe__clear_resource_history(rsc, rsc->lock_node, data_set); rsc->lock_node = NULL; rsc->lock_time = 0; } // Only a resource active on exactly one node can be locked } else if (pcmk__list_of_1(rsc->running_on)) { pe_node_t *node = rsc->running_on->data; if (node->details->shutdown) { if (node->details->unclean) { pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown", rsc->id, node->details->uname); } else { rsc->lock_node = node; rsc->lock_time = shutdown_time(node, data_set); } } } if (rsc->lock_node == NULL) { // No lock needed return; } if (data_set->shutdown_lock > 0) { time_t lock_expiration = rsc->lock_time + data_set->shutdown_lock; pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)", rsc->id, rsc->lock_node->details->uname, (long long) lock_expiration); pe__update_recheck_time(++lock_expiration, data_set); } else { pe_rsc_info(rsc, "Locking %s to %s due to shutdown", rsc->id, rsc->lock_node->details->uname); } // If resource is locked to one node, ban it from all other nodes for (GList *item = data_set->nodes; item != NULL; item = item->next) { pe_node_t *node = item->data; if (strcmp(node->details->uname, rsc->lock_node->details->uname)) { resource_location(rsc, node, -CRM_SCORE_INFINITY, XML_CONFIG_ATTR_SHUTDOWN_LOCK, data_set); } } } /* * \internal * \brief Stage 2 of cluster status: apply node-specific criteria * * Count known nodes, and apply location constraints, stickiness, and exclusive * resource discovery. */ gboolean stage2(pe_working_set_t * data_set) { GList *gIter = NULL; if (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) { g_list_foreach(data_set->resources, (GFunc) apply_shutdown_lock, data_set); } if (!pcmk_is_set(data_set->flags, pe_flag_no_compat)) { // @COMPAT API backward compatibility for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node && (node->weight >= 0) && node->details->online && (node->details->type != node_ping)) { data_set->max_valid_nodes++; } } } apply_placement_constraints(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { GList *gIter2 = NULL; pe_node_t *node = (pe_node_t *) gIter->data; gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; common_apply_stickiness(rsc, node, data_set); rsc_discover_filter(rsc, node); } } return TRUE; } /* * Create internal resource constraints before allocation */ gboolean stage3(pe_working_set_t * data_set) { GList *gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; rsc->cmds->internal_constraints(rsc, data_set); } return TRUE; } /* * Check for orphaned or redefined actions */ gboolean stage4(pe_working_set_t * data_set) { check_actions(data_set); return TRUE; } static void * convert_const_pointer(const void *ptr) { /* Worst function ever */ return (void *)ptr; } static gint sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data) { int rc = 0; int r1_weight = -INFINITY; int r2_weight = -INFINITY; const char *reason = "existence"; GList *nodes = (GList *) data; const pe_resource_t *resource1 = a; const pe_resource_t *resource2 = b; pe_node_t *r1_node = NULL; pe_node_t *r2_node = NULL; GList *gIter = NULL; GHashTable *r1_nodes = NULL; GHashTable *r2_nodes = NULL; reason = "priority"; r1_weight = resource1->priority; r2_weight = resource2->priority; if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "no node list"; if (nodes == NULL) { goto done; } r1_nodes = pcmk__native_merge_weights(convert_const_pointer(resource1), resource1->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); pe__show_node_weights(true, NULL, resource1->id, r1_nodes, resource1->cluster); r2_nodes = pcmk__native_merge_weights(convert_const_pointer(resource2), resource2->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); pe__show_node_weights(true, NULL, resource2->id, r2_nodes, resource2->cluster); /* Current location score */ reason = "current location"; r1_weight = -INFINITY; r2_weight = -INFINITY; if (resource1->running_on) { r1_node = pe__current_node(resource1); r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id); if (r1_node != NULL) { r1_weight = r1_node->weight; } } if (resource2->running_on) { r2_node = pe__current_node(resource2); r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id); if (r2_node != NULL) { r2_weight = r2_node->weight; } } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "score"; for (gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; r1_node = NULL; r2_node = NULL; r1_weight = -INFINITY; if (r1_nodes) { r1_node = g_hash_table_lookup(r1_nodes, node->details->id); } if (r1_node) { r1_weight = r1_node->weight; } r2_weight = -INFINITY; if (r2_nodes) { r2_node = g_hash_table_lookup(r2_nodes, node->details->id); } if (r2_node) { r2_weight = r2_node->weight; } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } } done: crm_trace("%s (%d) on %s %c %s (%d) on %s: %s", resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a", rc < 0 ? '>' : rc > 0 ? '<' : '=', resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason); if (r1_nodes) { g_hash_table_destroy(r1_nodes); } if (r2_nodes) { g_hash_table_destroy(r2_nodes); } return rc; } static void allocate_resources(pe_working_set_t * data_set) { GList *gIter = NULL; if (pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) { /* Allocate remote connection resources first (which will also allocate * any colocation dependencies). If the connection is migrating, always * prefer the partial migration target. */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (rsc->is_remote_node == FALSE) { continue; } pe_rsc_trace(rsc, "Allocating remote connection resource '%s'", rsc->id); rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set); } } /* now do the rest of the resources */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (rsc->is_remote_node == TRUE) { continue; } pe_rsc_trace(rsc, "Allocating %s resource '%s'", crm_element_name(rsc->xml), rsc->id); rsc->cmds->allocate(rsc, NULL, data_set); } } /* We always use pe_order_preserve with these convenience functions to exempt * internally generated constraints from the prohibition of user constraints * involving remote connection resources. * * The start ordering additionally uses pe_order_runnable_left so that the * specified action is not runnable if the start is not runnable. */ static inline void order_start_then_action(pe_resource_t *lh_rsc, pe_action_t *rh_action, enum pe_ordering extra, pe_working_set_t *data_set) { if (lh_rsc && rh_action && data_set) { custom_action_order(lh_rsc, start_key(lh_rsc), NULL, rh_action->rsc, NULL, rh_action, pe_order_preserve | pe_order_runnable_left | extra, data_set); } } static inline void order_action_then_stop(pe_action_t *lh_action, pe_resource_t *rh_rsc, enum pe_ordering extra, pe_working_set_t *data_set) { if (lh_action && rh_rsc && data_set) { custom_action_order(lh_action->rsc, NULL, lh_action, rh_rsc, stop_key(rh_rsc), NULL, pe_order_preserve | extra, data_set); } } // Clear fail counts for orphaned rsc on all online nodes static void cleanup_orphans(pe_resource_t * rsc, pe_working_set_t * data_set) { GList *gIter = NULL; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node->details->online && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { pe_action_t *clear_op = NULL; clear_op = pe__clear_failcount(rsc, node, "it is orphaned", data_set); /* We can't use order_action_then_stop() here because its * pe_order_preserve breaks things */ custom_action_order(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc), NULL, pe_order_optional, data_set); } } } gboolean stage5(pe_working_set_t * data_set) { pcmk__output_t *out = data_set->priv; GList *gIter = NULL; if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) { GList *nodes = g_list_copy(data_set->nodes); nodes = sort_nodes_by_weight(nodes, NULL, data_set); data_set->resources = g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes); g_list_free(nodes); } gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) { out->message(out, "node-capacity", node, "Original"); } } crm_trace("Allocating services"); /* Take (next) highest resource, assign it and create its actions */ allocate_resources(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) { out->message(out, "node-capacity", node, "Remaining"); } } // Process deferred action checks pe__foreach_param_check(data_set, check_params); pe__free_param_checks(data_set); if (pcmk_is_set(data_set->flags, pe_flag_startup_probes)) { crm_trace("Calculating needed probes"); /* This code probably needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=100: With probes: ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints 36s ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph Without probes: ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph */ probe_resources(data_set); } crm_trace("Handle orphans"); if (pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* There's no need to recurse into rsc->children because those * should just be unallocated clone instances. */ if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { cleanup_orphans(rsc, data_set); } } } crm_trace("Creating actions"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; rsc->cmds->create_actions(rsc, data_set); } crm_trace("Creating done"); return TRUE; } static gboolean is_managed(const pe_resource_t * rsc) { GList *gIter = rsc->children; if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if (is_managed(child_rsc)) { return TRUE; } } return FALSE; } static gboolean any_managed_resources(pe_working_set_t * data_set) { GList *gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (is_managed(rsc)) { return TRUE; } } return FALSE; } /*! * \internal * \brief Create pseudo-op for guest node fence, and order relative to it * * \param[in] node Guest node to fence * \param[in] data_set Working set of CIB state */ static void fence_guest(pe_node_t *node, pe_working_set_t *data_set) { pe_resource_t *container = node->details->remote_rsc->container; pe_action_t *stop = NULL; pe_action_t *stonith_op = NULL; /* The fence action is just a label; we don't do anything differently for * off vs. reboot. We specify it explicitly, rather than let it default to * cluster's default action, because we are not _initiating_ fencing -- we * are creating a pseudo-event to describe fencing that is already occurring * by other means (container recovery). */ const char *fence_action = "off"; /* Check whether guest's container resource has any explicit stop or * start (the stop may be implied by fencing of the guest's host). */ if (container) { stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL); if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) { fence_action = "reboot"; } } /* Create a fence pseudo-event, so we have an event to order actions * against, and the controller can always detect it. */ stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", FALSE, data_set); pe__set_action_flags(stonith_op, pe_action_pseudo|pe_action_runnable); /* We want to imply stops/demotes after the guest is stopped, not wait until * it is restarted, so we always order pseudo-fencing after stop, not start * (even though start might be closer to what is done for a real reboot). */ if ((stop != NULL) && pcmk_is_set(stop->flags, pe_action_pseudo)) { pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, FALSE, data_set); crm_info("Implying guest node %s is down (action %d) after %s fencing", node->details->uname, stonith_op->id, stop->node->details->uname); order_actions(parent_stonith_op, stonith_op, pe_order_runnable_left|pe_order_implies_then); } else if (stop) { order_actions(stop, stonith_op, pe_order_runnable_left|pe_order_implies_then); crm_info("Implying guest node %s is down (action %d) " "after container %s is stopped (action %d)", node->details->uname, stonith_op->id, container->id, stop->id); } else { /* If we're fencing the guest node but there's no stop for the guest * resource, we must think the guest is already stopped. However, we may * think so because its resource history was just cleaned. To avoid * unnecessarily considering the guest node down if it's really up, * order the pseudo-fencing after any stop of the connection resource, * which will be ordered after any container (re-)probe. */ stop = find_first_action(node->details->remote_rsc->actions, NULL, RSC_STOP, NULL); if (stop) { order_actions(stop, stonith_op, pe_order_optional); crm_info("Implying guest node %s is down (action %d) " "after connection is stopped (action %d)", node->details->uname, stonith_op->id, stop->id); } else { /* Not sure why we're fencing, but everything must already be * cleanly stopped. */ crm_info("Implying guest node %s is down (action %d) ", node->details->uname, stonith_op->id); } } /* Order/imply other actions relative to pseudo-fence as with real fence */ pcmk__order_vs_fence(stonith_op, data_set); } /* * Create dependencies for stonith and shutdown operations */ gboolean stage6(pe_working_set_t * data_set) { pe_action_t *dc_down = NULL; pe_action_t *stonith_op = NULL; gboolean integrity_lost = FALSE; gboolean need_stonith = TRUE; GList *gIter; GList *stonith_ops = NULL; GList *shutdown_ops = NULL; /* Remote ordering constraints need to happen prior to calculating fencing * because it is one more place we will mark the node as dirty. * * A nice side effect of doing them early is that apply_*_ordering() can be * simpler because pe_fence_node() has already done some of the work. */ crm_trace("Creating remote ordering constraints"); apply_remote_node_ordering(data_set); crm_trace("Processing fencing and shutdown cases"); if (any_managed_resources(data_set) == FALSE) { crm_notice("Delaying fencing operations until there are resources to manage"); need_stonith = FALSE; } /* Check each node for stonith/shutdown */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; /* Guest nodes are "fenced" by recovering their container resource, * so handle them separately. */ if (pe__is_guest_node(node)) { if (node->details->remote_requires_reset && need_stonith && pe_can_fence(data_set, node)) { fence_guest(node, data_set); } continue; } stonith_op = NULL; if (node->details->unclean && need_stonith && pe_can_fence(data_set, node)) { stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, data_set); pe_warn("Scheduling Node %s for STONITH", node->details->uname); pcmk__order_vs_fence(stonith_op, data_set); if (node->details->is_dc) { // Remember if the DC is being fenced dc_down = stonith_op; } else { if (!pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing) && (stonith_ops != NULL)) { /* Concurrent fencing is disabled, so order each non-DC * fencing in a chain. If there is any DC fencing or * shutdown, it will be ordered after the last action in the * chain later. */ order_actions((pe_action_t *) stonith_ops->data, stonith_op, pe_order_optional); } // Remember all non-DC fencing actions in a separate list stonith_ops = g_list_prepend(stonith_ops, stonith_op); } } else if (node->details->online && node->details->shutdown && /* TODO define what a shutdown op means for a remote node. * For now we do not send shutdown operations for remote nodes, but * if we can come up with a good use for this in the future, we will. */ pe__is_guest_or_remote_node(node) == FALSE) { pe_action_t *down_op = sched_shutdown_op(node, data_set); if (node->details->is_dc) { // Remember if the DC is being shut down dc_down = down_op; } else { // Remember non-DC shutdowns for later ordering shutdown_ops = g_list_prepend(shutdown_ops, down_op); } } if (node->details->unclean && stonith_op == NULL) { integrity_lost = TRUE; pe_warn("Node %s is unclean!", node->details->uname); } } if (integrity_lost) { if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED"); pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE"); } else if (!pcmk_is_set(data_set->flags, pe_flag_have_quorum)) { crm_notice("Cannot fence unclean nodes until quorum is" " attained (or no-quorum-policy is set to ignore)"); } } if (dc_down != NULL) { /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated * DC elections. However, we don't want to order non-DC shutdowns before * a DC *fencing*, because even though we don't want a node that's * shutting down to become DC, the DC fencing could be ordered before a * clone stop that's also ordered before the shutdowns, thus leading to * a graph loop. */ if (pcmk__str_eq(dc_down->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { for (gIter = shutdown_ops; gIter != NULL; gIter = gIter->next) { pe_action_t *node_stop = (pe_action_t *) gIter->data; crm_debug("Ordering shutdown on %s before %s on DC %s", node_stop->node->details->uname, dc_down->task, dc_down->node->details->uname); order_actions(node_stop, dc_down, pe_order_optional); } } // Order any non-DC fencing before any DC fencing or shutdown if (pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)) { /* With concurrent fencing, order each non-DC fencing action * separately before any DC fencing or shutdown. */ for (gIter = stonith_ops; gIter != NULL; gIter = gIter->next) { order_actions((pe_action_t *) gIter->data, dc_down, pe_order_optional); } } else if (stonith_ops) { /* Without concurrent fencing, the non-DC fencing actions are * already ordered relative to each other, so we just need to order * the DC fencing after the last action in the chain (which is the * first item in the list). */ order_actions((pe_action_t *) stonith_ops->data, dc_down, pe_order_optional); } } g_list_free(stonith_ops); g_list_free(shutdown_ops); return TRUE; } /* * Determine the sets of independent actions and the correct order for the * actions in each set. * * Mark dependencies of un-runnable actions un-runnable * */ static GList * find_actions_by_task(GList *actions, pe_resource_t * rsc, const char *original_key) { GList *list = NULL; list = find_actions(actions, original_key, NULL); if (list == NULL) { /* we're potentially searching a child of the original resource */ char *key = NULL; char *task = NULL; guint interval_ms = 0; if (parse_op_key(original_key, NULL, &task, &interval_ms)) { key = pcmk__op_key(rsc->id, task, interval_ms); list = find_actions(actions, key, NULL); } else { crm_err("search key: %s", original_key); } free(key); free(task); } return list; } static void rsc_order_then(pe_action_t *lh_action, pe_resource_t *rsc, pe__ordering_t *order) { GList *gIter = NULL; GList *rh_actions = NULL; pe_action_t *rh_action = NULL; enum pe_ordering type; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); type = order->type; rh_action = order->rh_action; crm_trace("Applying ordering constraint %d (then: %s)", order->id, rsc->id); if (rh_action != NULL) { rh_actions = g_list_prepend(NULL, rh_action); } else if (rsc != NULL) { rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task); } if (rh_actions == NULL) { pe_rsc_trace(rsc, "Ignoring constraint %d: then (%s for %s) not found", order->id, order->rh_action_task, rsc->id); return; } if ((lh_action != NULL) && (lh_action->rsc == rsc) && pcmk_is_set(lh_action->flags, pe_action_dangle)) { pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid, order->rh_action_task); pe__clear_order_flags(type, pe_order_implies_then); } gIter = rh_actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *rh_action_iter = (pe_action_t *) gIter->data; if (lh_action) { order_actions(lh_action, rh_action_iter, type); } else if (type & pe_order_implies_then) { pe__clear_action_flags(rh_action_iter, pe_action_runnable); crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type); } else { crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type); } } g_list_free(rh_actions); } static void rsc_order_first(pe_resource_t *lh_rsc, pe__ordering_t *order, pe_working_set_t *data_set) { GList *lh_actions = NULL; pe_action_t *lh_action = order->lh_action; pe_resource_t *rh_rsc = order->rh_rsc; CRM_ASSERT(lh_rsc != NULL); pe_rsc_trace(lh_rsc, "Applying ordering constraint %d (first: %s)", order->id, lh_rsc->id); if (lh_action != NULL) { lh_actions = g_list_prepend(NULL, lh_action); } else { lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task); } if ((lh_actions == NULL) && (lh_rsc == rh_rsc)) { pe_rsc_trace(lh_rsc, "Ignoring constraint %d: first (%s for %s) not found", order->id, order->lh_action_task, lh_rsc->id); } else if (lh_actions == NULL) { char *key = NULL; char *op_type = NULL; guint interval_ms = 0; parse_op_key(order->lh_action_task, NULL, &op_type, &interval_ms); key = pcmk__op_key(lh_rsc->id, op_type, interval_ms); if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && pcmk__str_eq(op_type, RSC_STOP, pcmk__str_casei)) { free(key); pe_rsc_trace(lh_rsc, "Ignoring constraint %d: first (%s for %s) not found", order->id, order->lh_action_task, lh_rsc->id); } else if ((lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_UNPROMOTED) && pcmk__str_eq(op_type, RSC_DEMOTE, pcmk__str_casei)) { free(key); pe_rsc_trace(lh_rsc, "Ignoring constraint %d: first (%s for %s) not found", order->id, order->lh_action_task, lh_rsc->id); } else { pe_rsc_trace(lh_rsc, "Creating first (%s for %s) for constraint %d ", order->lh_action_task, lh_rsc->id, order->id); lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set); lh_actions = g_list_prepend(NULL, lh_action); } free(op_type); } if (rh_rsc == NULL) { if (order->rh_action == NULL) { pe_rsc_trace(lh_rsc, "Ignoring constraint %d: then not found", order->id); return; } rh_rsc = order->rh_action->rsc; } for (GList *gIter = lh_actions; gIter != NULL; gIter = gIter->next) { lh_action = (pe_action_t *) gIter->data; if (rh_rsc == NULL) { order_actions(lh_action, order->rh_action, order->type); } else { rsc_order_then(lh_action, rh_rsc, order); } } g_list_free(lh_actions); } extern void update_colo_start_chain(pe_action_t *action, pe_working_set_t *data_set); static int is_recurring_action(pe_action_t *action) { guint interval_ms; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { return 0; } return (interval_ms > 0); } static void apply_container_ordering(pe_action_t *action, pe_working_set_t *data_set) { /* VMs are also classified as containers for these purposes... in * that they both involve a 'thing' running on a real or remote * cluster node. * * This allows us to be smarter about the type and extent of * recovery actions required in various scenarios */ pe_resource_t *remote_rsc = NULL; pe_resource_t *container = NULL; enum action_tasks task = text2task(action->task); CRM_ASSERT(action->rsc); CRM_ASSERT(action->node); CRM_ASSERT(pe__is_guest_or_remote_node(action->node)); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc); container = remote_rsc->container; CRM_ASSERT(container); if (pcmk_is_set(container->flags, pe_rsc_failed)) { pe_fence_node(data_set, action->node, "container failed", FALSE); } crm_trace("Order %s action %s relative to %s%s for %s%s", action->task, action->uuid, pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", remote_rsc->id, pcmk_is_set(container->flags, pe_rsc_failed)? "failed " : "", container->id); if (pcmk__strcase_any_of(action->task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ task = stop_rsc; } switch (task) { case start_rsc: case action_promote: /* Force resource recovery if the container is recovered */ order_start_then_action(container, action, pe_order_implies_then, data_set); /* Wait for the connection resource to be up too */ order_start_then_action(remote_rsc, action, pe_order_none, data_set); break; case stop_rsc: case action_demote: if (pcmk_is_set(container->flags, pe_rsc_failed)) { /* When the container representing a guest node fails, any stop * or demote actions for resources running on the guest node * are implied by the container stopping. This is similar to * how fencing operations work for cluster nodes and remote * nodes. */ } else { /* Ensure the operation happens before the connection is brought * down. * * If we really wanted to, we could order these after the * connection start, IFF the container's current role was * stopped (otherwise we re-introduce an ordering loop when the * connection is restarting). */ order_action_then_stop(action, remote_rsc, pe_order_none, data_set); } break; default: /* Wait for the connection resource to be up */ if (is_recurring_action(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ if(task != no_action) { order_start_then_action(remote_rsc, action, pe_order_implies_then, data_set); } } else { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } break; } } static enum remote_connection_state get_remote_node_state(pe_node_t *node) { pe_resource_t *remote_rsc = NULL; pe_node_t *cluster_node = NULL; CRM_ASSERT(node); remote_rsc = node->details->remote_rsc; CRM_ASSERT(remote_rsc); cluster_node = pe__current_node(remote_rsc); /* If the cluster node the remote connection resource resides on * is unclean or went offline, we can't process any operations * on that remote node until after it starts elsewhere. */ if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) { /* The connection resource is not going to run anywhere */ if (cluster_node && cluster_node->details->unclean) { /* The remote connection is failed because its resource is on a * failed node and can't be recovered elsewhere, so we must fence. */ return remote_state_failed; } if (!pcmk_is_set(remote_rsc->flags, pe_rsc_failed)) { /* Connection resource is cleanly stopped */ return remote_state_stopped; } /* Connection resource is failed */ if ((remote_rsc->next_role == RSC_ROLE_STOPPED) && remote_rsc->remote_reconnect_ms && node->details->remote_was_fenced && !pe__shutdown_requested(node)) { /* We won't know whether the connection is recoverable until the * reconnect interval expires and we reattempt connection. */ return remote_state_unknown; } /* The remote connection is in a failed state. If there are any * resources known to be active on it (stop) or in an unknown state * (probe), we must assume the worst and fence it. */ return remote_state_failed; } else if (cluster_node == NULL) { /* Connection is recoverable but not currently running anywhere, see if we can recover it first */ return remote_state_unknown; } else if(cluster_node->details->unclean == TRUE || cluster_node->details->online == FALSE) { /* Connection is running on a dead node, see if we can recover it first */ return remote_state_resting; } else if (pcmk__list_of_multiple(remote_rsc->running_on) && remote_rsc->partial_migration_source && remote_rsc->partial_migration_target) { /* We're in the middle of migrating a connection resource, * wait until after the resource migrates before performing * any actions. */ return remote_state_resting; } return remote_state_alive; } /*! * \internal * \brief Order actions on remote node relative to actions for the connection */ static void apply_remote_ordering(pe_action_t *action, pe_working_set_t *data_set) { pe_resource_t *remote_rsc = NULL; enum action_tasks task = text2task(action->task); enum remote_connection_state state = get_remote_node_state(action->node); enum pe_ordering order_opts = pe_order_none; if (action->rsc == NULL) { return; } CRM_ASSERT(action->node); CRM_ASSERT(pe__is_guest_or_remote_node(action->node)); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc); crm_trace("Order %s action %s relative to %s%s (state: %s)", action->task, action->uuid, pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", remote_rsc->id, state2text(state)); if (pcmk__strcase_any_of(action->task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ task = stop_rsc; } switch (task) { case start_rsc: case action_promote: order_opts = pe_order_none; if (state == remote_state_failed) { /* Force recovery, by making this action required */ pe__set_order_flags(order_opts, pe_order_implies_then); } /* Ensure connection is up before running this action */ order_start_then_action(remote_rsc, action, order_opts, data_set); break; case stop_rsc: if(state == remote_state_alive) { order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); } else if(state == remote_state_failed) { /* The resource is active on the node, but since we don't have a * valid connection, the only way to stop the resource is by * fencing the node. There is no need to order the stop relative * to the remote connection, since the stop will become implied * by the fencing. */ pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable", FALSE); } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) { /* State must be remote_state_unknown or remote_state_stopped. * Since the connection is not coming back up in this * transition, stop this resource first. */ order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); } else { /* The connection is going to be started somewhere else, so * stop this resource after that completes. */ order_start_then_action(remote_rsc, action, pe_order_none, data_set); } break; case action_demote: /* Only order this demote relative to the connection start if the * connection isn't being torn down. Otherwise, the demote would be * blocked because the connection start would not be allowed. */ if(state == remote_state_resting || state == remote_state_unknown) { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } /* Otherwise we can rely on the stop ordering */ break; default: /* Wait for the connection resource to be up */ if (is_recurring_action(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ order_start_then_action(remote_rsc, action, pe_order_implies_then, data_set); } else { pe_node_t *cluster_node = pe__current_node(remote_rsc); if(task == monitor_rsc && state == remote_state_failed) { /* We would only be here if we do not know the * state of the resource on the remote node. * Since we have no way to find out, it is * necessary to fence the node. */ pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable", FALSE); } if(cluster_node && state == remote_state_stopped) { /* The connection is currently up, but is going * down permanently. * * Make sure we check services are actually * stopped _before_ we let the connection get * closed */ order_action_then_stop(action, remote_rsc, pe_order_runnable_left, data_set); } else { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } } break; } } static void apply_remote_node_ordering(pe_working_set_t *data_set) { if (!pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) { return; } for (GList *gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; pe_resource_t *remote = NULL; // We are only interested in resource actions if (action->rsc == NULL) { continue; } /* Special case: If we are clearing the failcount of an actual * remote connection resource, then make sure this happens before * any start of the resource in this transition. */ if (action->rsc->is_remote_node && pcmk__str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT, pcmk__str_casei)) { custom_action_order(action->rsc, NULL, action, action->rsc, pcmk__op_key(action->rsc->id, RSC_START, 0), NULL, pe_order_optional, data_set); continue; } // We are only interested in actions allocated to a node if (action->node == NULL) { continue; } if (!pe__is_guest_or_remote_node(action->node)) { continue; } /* We are only interested in real actions. * * @TODO This is probably wrong; pseudo-actions might be converted to * real actions and vice versa later in update_actions() at the end of * stage7(). */ if (pcmk_is_set(action->flags, pe_action_pseudo)) { continue; } remote = action->node->details->remote_rsc; if (remote == NULL) { // Orphaned continue; } /* Another special case: if a resource is moving to a Pacemaker Remote * node, order the stop on the original node after any start of the * remote connection. This ensures that if the connection fails to * start, we leave the resource running on the original node. */ if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) { for (GList *item = action->rsc->actions; item != NULL; item = item->next) { pe_action_t *rsc_action = item->data; if ((rsc_action->node->details != action->node->details) && pcmk__str_eq(rsc_action->task, RSC_STOP, pcmk__str_casei)) { custom_action_order(remote, start_key(remote), NULL, action->rsc, NULL, rsc_action, pe_order_optional, data_set); } } } /* The action occurs across a remote connection, so create * ordering constraints that guarantee the action occurs while the node * is active (after start, before stop ... things like that). * * This is somewhat brittle in that we need to make sure the results of * this ordering are compatible with the result of get_router_node(). * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part * of this logic rather than action2xml(). */ if (remote->container) { crm_trace("Container ordering for %s", action->uuid); apply_container_ordering(action, data_set); } else { crm_trace("Remote ordering for %s", action->uuid); apply_remote_ordering(action, data_set); } } } static gboolean order_first_probe_unneeded(pe_action_t * probe, pe_action_t * rh_action) { /* No need to probe the resource on the node that is being * unfenced. Otherwise it might introduce transition loop * since probe will be performed after the node is * unfenced. */ if (pcmk__str_eq(rh_action->task, CRM_OP_FENCE, pcmk__str_casei) && probe->node && rh_action->node && probe->node->details == rh_action->node->details) { const char *op = g_hash_table_lookup(rh_action->meta, "stonith_action"); if (pcmk__str_eq(op, "on", pcmk__str_casei)) { return TRUE; } } // Shutdown waits for probe to complete only if it's on the same node if ((pcmk__str_eq(rh_action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) && probe->node && rh_action->node && probe->node->details != rh_action->node->details) { return TRUE; } return FALSE; } static void order_first_probes_imply_stops(pe_working_set_t * data_set) { GList *gIter = NULL; for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) { pe__ordering_t *order = gIter->data; enum pe_ordering order_type = pe_order_optional; pe_resource_t *lh_rsc = order->lh_rsc; pe_resource_t *rh_rsc = order->rh_rsc; pe_action_t *lh_action = order->lh_action; pe_action_t *rh_action = order->rh_action; const char *lh_action_task = order->lh_action_task; const char *rh_action_task = order->rh_action_task; GList *probes = NULL; GList *rh_actions = NULL; GList *pIter = NULL; if (lh_rsc == NULL) { continue; } else if (rh_rsc && lh_rsc == rh_rsc) { continue; } if (lh_action == NULL && lh_action_task == NULL) { continue; } if (rh_action == NULL && rh_action_task == NULL) { continue; } /* Technically probe is expected to return "not running", which could be * the alternative of stop action if the status of the resource is * unknown yet. */ if (lh_action && !pcmk__str_eq(lh_action->task, RSC_STOP, pcmk__str_casei)) { continue; } else if (lh_action == NULL && lh_action_task && !pcmk__ends_with(lh_action_task, "_" RSC_STOP "_0")) { continue; } /* Do not probe the resource inside of a stopping container. Otherwise * it might introduce transition loop since probe will be performed * after the container starts again. */ if (rh_rsc && lh_rsc->container == rh_rsc) { if (rh_action && pcmk__str_eq(rh_action->task, RSC_STOP, pcmk__str_casei)) { continue; } else if (rh_action == NULL && rh_action_task && pcmk__ends_with(rh_action_task,"_" RSC_STOP "_0")) { continue; } } if (order->type == pe_order_none) { continue; } // Preserve the order options for future filtering if (pcmk_is_set(order->type, pe_order_apply_first_non_migratable)) { pe__set_order_flags(order_type, pe_order_apply_first_non_migratable); } if (pcmk_is_set(order->type, pe_order_same_node)) { pe__set_order_flags(order_type, pe_order_same_node); } // Keep the order types for future filtering if (order->type == pe_order_anti_colocation || order->type == pe_order_load) { order_type = order->type; } probes = pe__resource_actions(lh_rsc, NULL, RSC_STATUS, FALSE); if (probes == NULL) { continue; } if (rh_action) { rh_actions = g_list_prepend(rh_actions, rh_action); } else if (rh_rsc && rh_action_task) { rh_actions = find_actions(rh_rsc->actions, rh_action_task, NULL); } if (rh_actions == NULL) { g_list_free(probes); continue; } crm_trace("Processing for LH probe based on ordering constraint %s -> %s" " (id=%d, type=%.6x)", lh_action ? lh_action->uuid : lh_action_task, rh_action ? rh_action->uuid : rh_action_task, order->id, order->type); for (pIter = probes; pIter != NULL; pIter = pIter->next) { pe_action_t *probe = (pe_action_t *) pIter->data; GList *rIter = NULL; for (rIter = rh_actions; rIter != NULL; rIter = rIter->next) { pe_action_t *rh_action_iter = (pe_action_t *) rIter->data; if (order_first_probe_unneeded(probe, rh_action_iter)) { continue; } order_actions(probe, rh_action_iter, order_type); } } g_list_free(rh_actions); g_list_free(probes); } } static void order_first_probe_then_restart_repromote(pe_action_t * probe, pe_action_t * after, pe_working_set_t * data_set) { GList *gIter = NULL; bool interleave = FALSE; pe_resource_t *compatible_rsc = NULL; if (probe == NULL || probe->rsc == NULL || probe->rsc->variant != pe_native) { return; } if (after == NULL // Avoid running into any possible loop || pcmk_is_set(after->flags, pe_action_tracking)) { return; } if (!pcmk__str_eq(probe->task, RSC_STATUS, pcmk__str_casei)) { return; } pe__set_action_flags(after, pe_action_tracking); crm_trace("Processing based on %s %s -> %s %s", probe->uuid, probe->node ? probe->node->details->uname: "", after->uuid, after->node ? after->node->details->uname : ""); if (after->rsc /* Better not build a dependency directly with a clone/group. * We are going to proceed through the ordering chain and build * dependencies with its children. */ && after->rsc->variant == pe_native && probe->rsc != after->rsc) { GList *then_actions = NULL; enum pe_ordering probe_order_type = pe_order_optional; if (pcmk__str_eq(after->task, RSC_START, pcmk__str_casei)) { then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP, FALSE); } else if (pcmk__str_eq(after->task, RSC_PROMOTE, pcmk__str_casei)) { then_actions = pe__resource_actions(after->rsc, NULL, RSC_DEMOTE, FALSE); } for (gIter = then_actions; gIter != NULL; gIter = gIter->next) { pe_action_t *then = (pe_action_t *) gIter->data; // Skip any pseudo action which for example is implied by fencing if (pcmk_is_set(then->flags, pe_action_pseudo)) { continue; } order_actions(probe, then, probe_order_type); } g_list_free(then_actions); } if (after->rsc && after->rsc->variant > pe_group) { const char *interleave_s = g_hash_table_lookup(after->rsc->meta, XML_RSC_ATTR_INTERLEAVE); interleave = crm_is_true(interleave_s); if (interleave) { /* For an interleaved clone, we should build a dependency only * with the relevant clone child. */ compatible_rsc = find_compatible_child(probe->rsc, after->rsc, RSC_ROLE_UNKNOWN, FALSE, data_set); } } for (gIter = after->actions_after; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) gIter->data; /* pe_order_implies_then is the reason why a required A.start * implies/enforces B.start to be required too, which is the cause of * B.restart/re-promote. * * Not sure about pe_order_implies_then_on_node though. It's now only * used for unfencing case, which tends to introduce transition * loops... */ if (!pcmk_is_set(after_wrapper->type, pe_order_implies_then)) { /* The order type between a group/clone and its child such as * B.start-> B_child.start is: * pe_order_implies_first_printed | pe_order_runnable_left * * Proceed through the ordering chain and build dependencies with * its children. */ if (after->rsc == NULL || after->rsc->variant < pe_group || probe->rsc->parent == after->rsc || after_wrapper->action->rsc == NULL || after_wrapper->action->rsc->variant > pe_group || after->rsc != after_wrapper->action->rsc->parent) { continue; } /* Proceed to the children of a group or a non-interleaved clone. * For an interleaved clone, proceed only to the relevant child. */ if (after->rsc->variant > pe_group && interleave == TRUE && (compatible_rsc == NULL || compatible_rsc != after_wrapper->action->rsc)) { continue; } } crm_trace("Proceeding through %s %s -> %s %s (type=0x%.6x)", after->uuid, after->node ? after->node->details->uname: "", after_wrapper->action->uuid, after_wrapper->action->node ? after_wrapper->action->node->details->uname : "", after_wrapper->type); order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set); } } static void clear_actions_tracking_flag(pe_working_set_t * data_set) { GList *gIter = NULL; for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (pcmk_is_set(action->flags, pe_action_tracking)) { pe__clear_action_flags(action, pe_action_tracking); } } } static void order_first_rsc_probes(pe_resource_t * rsc, pe_working_set_t * data_set) { GList *gIter = NULL; GList *probes = NULL; g_list_foreach(rsc->children, (GFunc) order_first_rsc_probes, data_set); if (rsc->variant != pe_native) { return; } probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE); for (gIter = probes; gIter != NULL; gIter= gIter->next) { pe_action_t *probe = (pe_action_t *) gIter->data; GList *aIter = NULL; for (aIter = probe->actions_after; aIter != NULL; aIter = aIter->next) { pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) aIter->data; order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set); clear_actions_tracking_flag(data_set); } } g_list_free(probes); } static void order_first_probes(pe_working_set_t * data_set) { GList *gIter = NULL; for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; order_first_rsc_probes(rsc, data_set); } order_first_probes_imply_stops(data_set); } static void order_then_probes(pe_working_set_t * data_set) { #if 0 GList *gIter = NULL; for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* Given "A then B", we would prefer to wait for A to be * started before probing B. * * If A was a filesystem on which the binaries and data for B * lived, it would have been useful if the author of B's agent * could assume that A is running before B.monitor will be * called. * * However we can't _only_ probe once A is running, otherwise * we'd not detect the state of B if A could not be started * for some reason. * * In practice however, we cannot even do an opportunistic * version of this because B may be moving: * * B.probe -> B.start * B.probe -> B.stop * B.stop -> B.start * A.stop -> A.start * A.start -> B.probe * * So far so good, but if we add the result of this code: * * B.stop -> A.stop * * Then we get a loop: * * B.probe -> B.stop -> A.stop -> A.start -> B.probe * * We could kill the 'B.probe -> B.stop' dependency, but that * could mean stopping B "too" soon, because B.start must wait * for the probes to complete. * * Another option is to allow it only if A is a non-unique * clone with clone-max == node-max (since we'll never be * moving it). However, we could still be stopping one * instance at the same time as starting another. * The complexity of checking for allowed conditions combined * with the ever narrowing usecase suggests that this code * should remain disabled until someone gets smarter. */ pe_action_t *start = NULL; GList *actions = NULL; GList *probes = NULL; actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE); if (actions) { start = actions->data; g_list_free(actions); } if(start == NULL) { crm_err("No start action for %s", rsc->id); continue; } probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE); for (actions = start->actions_before; actions != NULL; actions = actions->next) { pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data; GList *pIter = NULL; pe_action_t *first = before->action; pe_resource_t *first_rsc = first->rsc; if(first->required_runnable_before) { GList *clone_actions = NULL; for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) { before = (pe_action_wrapper_t *) clone_actions->data; crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid); CRM_ASSERT(before->action->rsc); first_rsc = before->action->rsc; break; } } else if(!pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) { crm_trace("Not a start op %s for %s", first->uuid, start->uuid); } if(first_rsc == NULL) { continue; } else if(uber_parent(first_rsc) == uber_parent(start->rsc)) { crm_trace("Same parent %s for %s", first_rsc->id, start->uuid); continue; } else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) { crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid); continue; } crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant); for (pIter = probes; pIter != NULL; pIter = pIter->next) { pe_action_t *probe = (pe_action_t *) pIter->data; crm_err("Ordering %s before %s", first->uuid, probe->uuid); order_actions(first, probe, pe_order_optional); } } } #endif } static void order_probes(pe_working_set_t * data_set) { order_first_probes(data_set); order_then_probes(data_set); } gboolean stage7(pe_working_set_t * data_set) { pcmk__output_t *prev_out = data_set->priv; pcmk__output_t *out = NULL; GList *gIter = NULL; crm_trace("Applying ordering constraints"); /* Don't ask me why, but apparently they need to be processed in * the order they were created in... go figure * * Also g_list_append() has horrendous performance characteristics * So we need to use g_list_prepend() and then reverse the list here */ data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints); for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) { pe__ordering_t *order = gIter->data; pe_resource_t *rsc = order->lh_rsc; if (rsc != NULL) { rsc_order_first(rsc, order, data_set); continue; } rsc = order->rh_rsc; if (rsc != NULL) { rsc_order_then(order->lh_action, rsc, order); } else { crm_trace("Applying ordering constraint %d (non-resource actions)", order->id); order_actions(order->lh_action, order->rh_action, order->type); } } g_list_foreach(data_set->actions, (GFunc) update_colo_start_chain, data_set); crm_trace("Ordering probes"); order_probes(data_set); crm_trace("Updating %d actions", g_list_length(data_set->actions)); g_list_foreach(data_set->actions, (GFunc) update_action, data_set); // Check for invalid orderings for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; pe_action_wrapper_t *input = NULL; for (GList *input_iter = action->actions_before; input_iter != NULL; input_iter = input_iter->next) { input = (pe_action_wrapper_t *) input_iter->data; if (pcmk__ordering_is_invalid(action, input)) { input->type = pe_order_none; } } } /* stage7 only ever outputs to the log, so ignore whatever output object was * previously set and just log instead. */ out = pcmk__new_logger(); if (out == NULL) { return FALSE; } pcmk__output_set_log_level(out, LOG_NOTICE); data_set->priv = out; out->begin_list(out, NULL, NULL, "Actions"); LogNodeActions(data_set); g_list_foreach(data_set->resources, (GFunc) LogActions, data_set); out->end_list(out); out->finish(out, CRM_EX_OK, true, NULL); pcmk__output_free(out); data_set->priv = prev_out; return TRUE; } static int transition_id = -1; /*! * \internal * \brief Log a message after calculating a transition * * \param[in] filename Where transition input is stored */ void pcmk__log_transition_summary(const char *filename) { if (was_processing_error) { crm_err("Calculated transition %d (with errors)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else if (was_processing_warning) { crm_warn("Calculated transition %d (with warnings)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else { crm_notice("Calculated transition %d%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } if (crm_config_error) { crm_notice("Configuration errors found during scheduler processing," " please run \"crm_verify -L\" to identify issues"); } } /* * Create a dependency graph to send to the transitioner (via the controller) */ gboolean stage8(pe_working_set_t * data_set) { GList *gIter = NULL; const char *value = NULL; long long limit = 0LL; transition_id++; crm_trace("Creating transition graph %d.", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); value = pe_pref(data_set->config_hash, "stonith-timeout"); crm_xml_add(data_set->graph, "stonith-timeout", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); value = pe_pref(data_set->config_hash, "migration-limit"); if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) { crm_xml_add(data_set->graph, "migration-limit", value); } if (data_set->recheck_by > 0) { char *recheck_epoch = NULL; recheck_epoch = crm_strdup_printf("%llu", (long long) data_set->recheck_by); crm_xml_add(data_set->graph, "recheck-by", recheck_epoch); free(recheck_epoch); } -/* errors... - slist_iter(action, pe_action_t, action_list, lpc, - if(action->optional == FALSE && action->runnable == FALSE) { - print_action("Ignoring", action, TRUE); - } - ); -*/ - /* The following code will de-duplicate action inputs, so nothing past this * should rely on the action input type flags retaining their original * values. */ gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id); rsc->cmds->expand(rsc, data_set); } crm_log_xml_trace(data_set->graph, "created resource-driven action list"); /* pseudo action to distribute list of nodes with maintenance state update */ add_maintenance_update(data_set); /* catch any non-resource specific actions */ crm_trace("processing non-resource actions"); gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (action->rsc && action->node && action->node->details->shutdown && !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance) && !pcmk_any_flags_set(action->flags, pe_action_optional|pe_action_runnable) && pcmk__str_eq(action->task, RSC_STOP, pcmk__str_none) ) { /* Eventually we should just ignore the 'fence' case * But for now it's the best way to detect (in CTS) when * CIB resource updates are being lost */ if (pcmk_is_set(data_set->flags, pe_flag_have_quorum) || data_set->no_quorum_policy == no_quorum_ignore) { crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)", action->node->details->unclean ? "fence" : "shut down", action->node->details->uname, action->rsc->id, pcmk_is_set(action->rsc->flags, pe_rsc_managed)? " blocked" : " unmanaged", pcmk_is_set(action->rsc->flags, pe_rsc_failed)? " failed" : "", action->uuid); } } graph_element_from_action(action, data_set); } crm_log_xml_trace(data_set->graph, "created generic action list"); crm_trace("Created transition graph %d.", transition_id); return TRUE; } void LogNodeActions(pe_working_set_t * data_set) { pcmk__output_t *out = data_set->priv; GList *gIter = NULL; for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { char *node_name = NULL; char *task = NULL; pe_action_t *action = (pe_action_t *) gIter->data; if (action->rsc != NULL) { continue; } else if (pcmk_is_set(action->flags, pe_action_optional)) { continue; } if (pe__is_guest_node(action->node)) { node_name = crm_strdup_printf("%s (resource: %s)", action->node->details->uname, action->node->details->remote_rsc->container->id); } else if(action->node) { node_name = crm_strdup_printf("%s", action->node->details->uname); } if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { task = strdup("Shutdown"); } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { const char *op = g_hash_table_lookup(action->meta, "stonith_action"); task = crm_strdup_printf("Fence (%s)", op); } out->message(out, "node-action", task, node_name, action->reason); free(node_name); free(task); } } diff --git a/lib/pacemaker/pcmk_sched_transition.c b/lib/pacemaker/pcmk_sched_transition.c index f207a17cfe..dcbea8382f 100644 --- a/lib/pacemaker/pcmk_sched_transition.c +++ b/lib/pacemaker/pcmk_sched_transition.c @@ -1,854 +1,855 @@ /* * Copyright 2009-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include // lrmd_event_data_t, lrmd_free_event() #include #include #include #include #include #include static pcmk__output_t *out = NULL; static cib_t *fake_cib = NULL; static GList *fake_resource_list = NULL; static GList *fake_op_fail_list = NULL; gboolean bringing_nodes_online = FALSE; #define STATUS_PATH_MAX 512 #define NEW_NODE_TEMPLATE "//"XML_CIB_TAG_NODE"[@uname='%s']" #define NODE_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']" #define RSC_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static void inject_transient_attr(xmlNode * cib_node, const char *name, const char *value) { xmlNode *attrs = NULL; xmlNode *instance_attrs = NULL; const char *node_uuid = ID(cib_node); out->message(out, "inject-attr", name, value, cib_node); attrs = first_named_child(cib_node, XML_TAG_TRANSIENT_NODEATTRS); if (attrs == NULL) { attrs = create_xml_node(cib_node, XML_TAG_TRANSIENT_NODEATTRS); crm_xml_add(attrs, XML_ATTR_ID, node_uuid); } instance_attrs = first_named_child(attrs, XML_TAG_ATTR_SETS); if (instance_attrs == NULL) { instance_attrs = create_xml_node(attrs, XML_TAG_ATTR_SETS); crm_xml_add(instance_attrs, XML_ATTR_ID, node_uuid); } crm_create_nvpair_xml(instance_attrs, NULL, name, value); } static void update_failcounts(xmlNode * cib_node, const char *resource, const char *task, guint interval_ms, int rc) { if (rc == 0) { return; } else if ((rc == 7) && (interval_ms == 0)) { return; } else { char *name = NULL; char *now = pcmk__ttoa(time(NULL)); name = pcmk__failcount_name(resource, task, interval_ms); inject_transient_attr(cib_node, name, "value++"); free(name); name = pcmk__lastfailure_name(resource, task, interval_ms); inject_transient_attr(cib_node, name, now); free(name); free(now); } } static void create_node_entry(cib_t * cib_conn, const char *node) { int rc = pcmk_ok; char *xpath = crm_strdup_printf(NEW_NODE_TEMPLATE, node); rc = cib_conn->cmds->query(cib_conn, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); if (rc == -ENXIO) { xmlNode *cib_object = create_xml_node(NULL, XML_CIB_TAG_NODE); crm_xml_add(cib_object, XML_ATTR_ID, node); // Use node name as ID crm_xml_add(cib_object, XML_ATTR_UNAME, node); cib_conn->cmds->create(cib_conn, XML_CIB_TAG_NODES, cib_object, cib_sync_call | cib_scope_local); /* Not bothering with subsequent query to see if it exists, we'll bomb out later in the call to query_node_uuid()... */ free_xml(cib_object); } free(xpath); } static lrmd_event_data_t * create_op(xmlNode *cib_resource, const char *task, guint interval_ms, int outcome) { lrmd_event_data_t *op = NULL; xmlNode *xop = NULL; op = lrmd_new_event(ID(cib_resource), task, interval_ms); op->rc = outcome; op->op_status = 0; op->params = NULL; /* TODO: Fill me in */ op->t_run = (unsigned int) time(NULL); op->t_rcchange = op->t_run; op->call_id = 0; for (xop = pcmk__xe_first_child(cib_resource); xop != NULL; xop = pcmk__xe_next(xop)) { int tmp = 0; crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); if (tmp > op->call_id) { op->call_id = tmp; } } op->call_id++; return op; } static xmlNode * inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc) { return pcmk__create_history_xml(cib_resource, op, CRM_FEATURE_SET, target_rc, NULL, crm_system_name, LOG_TRACE); } static xmlNode * inject_node_state(cib_t * cib_conn, const char *node, const char *uuid) { int rc = pcmk_ok; xmlNode *cib_object = NULL; char *xpath = crm_strdup_printf(NODE_TEMPLATE, node); if (bringing_nodes_online) { create_node_entry(cib_conn, node); } rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath | cib_sync_call | cib_scope_local); if (cib_object && ID(cib_object) == NULL) { crm_err("Detected multiple node_state entries for xpath=%s, bailing", xpath); crm_log_xml_warn(cib_object, "Duplicates"); free(xpath); crm_exit(CRM_EX_SOFTWARE); return NULL; // not reached, but makes static analysis happy } if (rc == -ENXIO) { char *found_uuid = NULL; if (uuid == NULL) { query_node_uuid(cib_conn, node, &found_uuid, NULL); } else { found_uuid = strdup(uuid); } cib_object = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(cib_object, XML_ATTR_UUID, found_uuid); crm_xml_add(cib_object, XML_ATTR_UNAME, node); cib_conn->cmds->create(cib_conn, XML_CIB_TAG_STATUS, cib_object, cib_sync_call | cib_scope_local); free_xml(cib_object); free(found_uuid); rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath | cib_sync_call | cib_scope_local); crm_trace("injecting node state for %s. rc is %d", node, rc); } free(xpath); CRM_ASSERT(rc == pcmk_ok); return cib_object; } static xmlNode * modify_node(cib_t * cib_conn, char *node, gboolean up) { xmlNode *cib_node = inject_node_state(cib_conn, node, NULL); if (up) { crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_YES); crm_xml_add(cib_node, XML_NODE_IS_PEER, ONLINESTATUS); crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER); crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER); } else { crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO); crm_xml_add(cib_node, XML_NODE_IS_PEER, OFFLINESTATUS); crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_DOWN); crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_DOWN); } crm_xml_add(cib_node, XML_ATTR_ORIGIN, crm_system_name); return cib_node; } static xmlNode * find_resource_xml(xmlNode * cib_node, const char *resource) { xmlNode *match = NULL; const char *node = crm_element_value(cib_node, XML_ATTR_UNAME); char *xpath = crm_strdup_printf(RSC_TEMPLATE, node, resource); match = get_xpath_object(xpath, cib_node, LOG_TRACE); free(xpath); return match; } static xmlNode * inject_resource(xmlNode * cib_node, const char *resource, const char *lrm_name, const char *rclass, const char *rtype, const char *rprovider) { xmlNode *lrm = NULL; xmlNode *container = NULL; xmlNode *cib_resource = NULL; char *xpath = NULL; cib_resource = find_resource_xml(cib_node, resource); if (cib_resource != NULL) { /* If an existing LRM history entry uses the resource name, * continue using it, even if lrm_name is different. */ return cib_resource; } // Check for history entry under preferred name if (strcmp(resource, lrm_name)) { cib_resource = find_resource_xml(cib_node, lrm_name); if (cib_resource != NULL) { return cib_resource; } } /* One day, add query for class, provider, type */ if (rclass == NULL || rtype == NULL) { out->err(out, "Resource %s not found in the status section of %s." " Please supply the class and type to continue", resource, ID(cib_node)); return NULL; } else if (!pcmk__strcase_any_of(rclass, PCMK_RESOURCE_CLASS_OCF, PCMK_RESOURCE_CLASS_STONITH, PCMK_RESOURCE_CLASS_SERVICE, PCMK_RESOURCE_CLASS_UPSTART, PCMK_RESOURCE_CLASS_SYSTEMD, PCMK_RESOURCE_CLASS_LSB, NULL)) { out->err(out, "Invalid class for %s: %s", resource, rclass); return NULL; } else if (pcmk_is_set(pcmk_get_ra_caps(rclass), pcmk_ra_cap_provider) && (rprovider == NULL)) { out->err(out, "Please specify the provider for resource %s", resource); return NULL; } xpath = (char *)xmlGetNodePath(cib_node); crm_info("Injecting new resource %s into %s '%s'", lrm_name, xpath, ID(cib_node)); free(xpath); lrm = first_named_child(cib_node, XML_CIB_TAG_LRM); if (lrm == NULL) { const char *node_uuid = ID(cib_node); lrm = create_xml_node(cib_node, XML_CIB_TAG_LRM); crm_xml_add(lrm, XML_ATTR_ID, node_uuid); } container = first_named_child(lrm, XML_LRM_TAG_RESOURCES); if (container == NULL) { container = create_xml_node(lrm, XML_LRM_TAG_RESOURCES); } cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE); // If we're creating a new entry, use the preferred name crm_xml_add(cib_resource, XML_ATTR_ID, lrm_name); crm_xml_add(cib_resource, XML_AGENT_ATTR_CLASS, rclass); crm_xml_add(cib_resource, XML_AGENT_ATTR_PROVIDER, rprovider); crm_xml_add(cib_resource, XML_ATTR_TYPE, rtype); return cib_resource; } #define XPATH_MAX 1024 static int find_ticket_state(cib_t * the_cib, const char *ticket_id, xmlNode ** ticket_state_xml) { int offset = 0; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; CRM_ASSERT(ticket_state_xml != NULL); *ticket_state_xml = NULL; xpath_string = calloc(1, XPATH_MAX); offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "%s", "/cib/status/tickets"); if (ticket_id) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "/%s[@id=\"%s\"]", XML_CIB_TAG_TICKET_STATE, ticket_id); } CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { if (ticket_id) { out->err(out, "Multiple ticket_states match ticket_id=%s", ticket_id); } *ticket_state_xml = xml_search; } else { *ticket_state_xml = xml_search; } bail: free(xpath_string); return rc; } static int set_ticket_state_attr(const char *ticket_id, const char *attr_name, const char *attr_value, cib_t * cib, int cib_options) { int rc = pcmk_ok; xmlNode *xml_top = NULL; xmlNode *ticket_state_xml = NULL; rc = find_ticket_state(cib, ticket_id, &ticket_state_xml); if (rc == pcmk_ok) { crm_debug("Found a match state for ticket: id=%s", ticket_id); xml_top = ticket_state_xml; } else if (rc != -ENXIO) { return rc; } else { xmlNode *xml_obj = NULL; xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS); ticket_state_xml = create_xml_node(xml_obj, XML_CIB_TAG_TICKET_STATE); crm_xml_add(ticket_state_xml, XML_ATTR_ID, ticket_id); } crm_xml_add(ticket_state_xml, attr_name, attr_value); crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, xml_top, cib_options); free_xml(xml_top); return rc; } void modify_configuration(pe_working_set_t * data_set, cib_t *cib, const char *quorum, const char *watchdog, GList *node_up, GList *node_down, GList *node_fail, GList *op_inject, GList *ticket_grant, GList *ticket_revoke, GList *ticket_standby, GList *ticket_activate) { int rc = pcmk_ok; GList *gIter = NULL; xmlNode *cib_op = NULL; xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; lrmd_event_data_t *op = NULL; out = data_set->priv; out->message(out, "inject-modify-config", quorum, watchdog); if (quorum) { xmlNode *top = create_xml_node(NULL, XML_TAG_CIB); /* crm_xml_add(top, XML_ATTR_DC_UUID, dc_uuid); */ crm_xml_add(top, XML_ATTR_HAVE_QUORUM, quorum); rc = cib->cmds->modify(cib, NULL, top, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } if (watchdog) { rc = update_attr_delegate(cib, cib_sync_call | cib_scope_local, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, XML_ATTR_HAVE_WATCHDOG, watchdog, FALSE, NULL, NULL); CRM_ASSERT(rc == pcmk_ok); } for (gIter = node_up; gIter != NULL; gIter = gIter->next) { char *node = (char *)gIter->data; out->message(out, "inject-modify-node", "Online", node); cib_node = modify_node(cib, node, TRUE); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); } for (gIter = node_down; gIter != NULL; gIter = gIter->next) { char xpath[STATUS_PATH_MAX]; char *node = (char *)gIter->data; out->message(out, "inject-modify-node", "Offline", node); cib_node = modify_node(cib, node, FALSE); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node, XML_CIB_TAG_LRM); cib->cmds->remove(cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node, XML_TAG_TRANSIENT_NODEATTRS); cib->cmds->remove(cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); } for (gIter = node_fail; gIter != NULL; gIter = gIter->next) { char *node = (char *)gIter->data; out->message(out, "inject-modify-node", "Failing", node); cib_node = modify_node(cib, node, TRUE); crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); } for (gIter = ticket_grant; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; out->message(out, "inject-modify-ticket", "Granting", ticket_id); rc = set_ticket_state_attr(ticket_id, "granted", "true", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_revoke; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; out->message(out, "inject-modify-ticket", "Revoking", ticket_id); rc = set_ticket_state_attr(ticket_id, "granted", "false", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_standby; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; out->message(out, "inject-modify-ticket", "Standby", ticket_id); rc = set_ticket_state_attr(ticket_id, "standby", "true", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_activate; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; out->message(out, "inject-modify-ticket", "Activating", ticket_id); rc = set_ticket_state_attr(ticket_id, "standby", "false", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = op_inject; gIter != NULL; gIter = gIter->next) { char *spec = (char *)gIter->data; int rc = 0; int outcome = 0; guint interval_ms = 0; char *key = NULL; char *node = NULL; char *task = NULL; char *resource = NULL; const char *rtype = NULL; const char *rclass = NULL; const char *rprovider = NULL; pe_resource_t *rsc = NULL; out->message(out, "inject-spec", spec); key = calloc(1, strlen(spec) + 1); node = calloc(1, strlen(spec) + 1); rc = sscanf(spec, "%[^@]@%[^=]=%d", key, node, &outcome); if (rc != 3) { out->err(out, "Invalid operation spec: %s. Only found %d fields", spec, rc); free(key); free(node); continue; } parse_op_key(key, &resource, &task, &interval_ms); rsc = pe_find_resource(data_set->resources, resource); if (rsc == NULL) { out->err(out, "Invalid resource name: %s", resource); } else { rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); rprovider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); cib_node = inject_node_state(cib, node, NULL); CRM_ASSERT(cib_node != NULL); update_failcounts(cib_node, resource, task, interval_ms, outcome); cib_resource = inject_resource(cib_node, resource, resource, rclass, rtype, rprovider); CRM_ASSERT(cib_resource != NULL); op = create_op(cib_resource, task, interval_ms, outcome); CRM_ASSERT(op != NULL); cib_op = inject_op(cib_resource, op, 0); CRM_ASSERT(cib_op != NULL); lrmd_free_event(op); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } free(task); free(node); free(key); } if (!out->is_quiet(out)) { out->end_list(out); } } static gboolean exec_pseudo_action(crm_graph_t * graph, crm_action_t * action) { const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); action->confirmed = TRUE; out->message(out, "inject-pseudo-action", node, task); - update_graph(graph, action); + pcmk__update_graph(graph, action); return TRUE; } static gboolean exec_rsc_action(crm_graph_t * graph, crm_action_t * action) { int rc = 0; GList *gIter = NULL; lrmd_event_data_t *op = NULL; int target_outcome = 0; const char *rtype = NULL; const char *rclass = NULL; const char *resource = NULL; const char *rprovider = NULL; const char *lrm_name = NULL; const char *operation = crm_element_value(action->xml, "operation"); const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); char *uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID); const char *router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (pcmk__strcase_any_of(operation, CRM_OP_PROBED, CRM_OP_REPROBE, NULL)) { crm_info("Skipping %s op for %s", operation, node); goto done; } if (action_rsc == NULL) { crm_log_xml_err(action->xml, "Bad"); free(node); free(uuid); return FALSE; } /* Look for the preferred name * If not found, try the expected 'local' name * If not found use the preferred name anyway */ resource = crm_element_value(action_rsc, XML_ATTR_ID); CRM_ASSERT(resource != NULL); // makes static analysis happy lrm_name = resource; // Preferred name when writing history if (pe_find_resource(fake_resource_list, resource) == NULL) { const char *longname = crm_element_value(action_rsc, XML_ATTR_ID_LONG); if (longname && pe_find_resource(fake_resource_list, longname)) { resource = longname; } } if (pcmk__strcase_any_of(operation, "delete", RSC_METADATA, NULL)) { out->message(out, "inject-rsc-action", resource, operation, node, (guint) 0); goto done; } rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS); rtype = crm_element_value(action_rsc, XML_ATTR_TYPE); rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER); pcmk__scan_min_int(target_rc_s, &target_outcome, 0); CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL, cib_sync_call | cib_scope_local) == pcmk_ok); cib_node = inject_node_state(fake_cib, node, (router_node? node : uuid)); CRM_ASSERT(cib_node != NULL); cib_resource = inject_resource(cib_node, resource, lrm_name, rclass, rtype, rprovider); if (cib_resource == NULL) { crm_err("invalid resource in transition"); free(node); free(uuid); free_xml(cib_node); return FALSE; } - op = convert_graph_action(cib_resource, action, 0, target_outcome); + op = pcmk__event_from_graph_action(cib_resource, action, 0, target_outcome); out->message(out, "inject-rsc-action", resource, op->op_type, node, op->interval_ms); for (gIter = fake_op_fail_list; gIter != NULL; gIter = gIter->next) { char *spec = (char *)gIter->data; char *key = NULL; const char *match_name = NULL; // Allow user to specify anonymous clone with or without instance number key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource, op->op_type, op->interval_ms, node); if (strncasecmp(key, spec, strlen(key)) == 0) { match_name = resource; } free(key); if ((match_name == NULL) && strcmp(resource, lrm_name)) { key = crm_strdup_printf(PCMK__OP_FMT "@%s=", lrm_name, op->op_type, op->interval_ms, node); if (strncasecmp(key, spec, strlen(key)) == 0) { match_name = lrm_name; } free(key); } if (match_name != NULL) { rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc); // ${match_name}_${task}_${interval_in_ms}@${node}=${rc} if (rc != 1) { out->err(out, "Invalid failed operation spec: %s. Result code must be integer", spec); continue; } action->failed = TRUE; graph->abort_priority = INFINITY; out->info(out, "Pretending action %d failed with rc=%d", action->id, op->rc); update_failcounts(cib_node, match_name, op->op_type, op->interval_ms, op->rc); break; } } inject_op(cib_resource, op, target_outcome); lrmd_free_event(op); rc = fake_cib->cmds->modify(fake_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); done: free(node); free(uuid); free_xml(cib_node); action->confirmed = TRUE; - update_graph(graph, action); + pcmk__update_graph(graph, action); return TRUE; } static gboolean exec_crmd_action(crm_graph_t * graph, crm_action_t * action) { const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); action->confirmed = TRUE; out->message(out, "inject-cluster-action", node, task, rsc); - update_graph(graph, action); + pcmk__update_graph(graph, action); return TRUE; } static gboolean exec_stonith_action(crm_graph_t * graph, crm_action_t * action) { const char *op = crm_meta_value(action->params, "stonith_action"); char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); out->message(out, "inject-fencing-action", target, op); if(!pcmk__str_eq(op, "on", pcmk__str_casei)) { int rc = 0; char xpath[STATUS_PATH_MAX]; xmlNode *cib_node = modify_node(fake_cib, target, FALSE); crm_xml_add(cib_node, XML_ATTR_ORIGIN, __func__); CRM_ASSERT(cib_node != NULL); rc = fake_cib->cmds->replace(fake_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_CIB_TAG_LRM); fake_cib->cmds->remove(fake_cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_TAG_TRANSIENT_NODEATTRS); fake_cib->cmds->remove(fake_cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); free_xml(cib_node); } action->confirmed = TRUE; - update_graph(graph, action); + pcmk__update_graph(graph, action); free(target); return TRUE; } int run_simulation(pe_working_set_t * data_set, cib_t *cib, GList *op_fail_list) { crm_graph_t *transition = NULL; - enum transition_status graph_rc = -1; + enum transition_status graph_rc; crm_graph_functions_t exec_fns = { exec_pseudo_action, exec_rsc_action, exec_crmd_action, exec_stonith_action, }; out = data_set->priv; fake_cib = cib; fake_op_fail_list = op_fail_list; if (!out->is_quiet(out)) { out->begin_list(out, NULL, NULL, "Executing Cluster Transition"); } - set_graph_functions(&exec_fns); - transition = unpack_graph(data_set->graph, crm_system_name); - print_graph(LOG_DEBUG, transition); + pcmk__set_graph_functions(&exec_fns); + transition = pcmk__unpack_graph(data_set->graph, crm_system_name); + pcmk__log_graph(LOG_DEBUG, transition); fake_resource_list = data_set->resources; do { - graph_rc = run_graph(transition); + graph_rc = pcmk__execute_graph(transition); } while (graph_rc == transition_active); fake_resource_list = NULL; if (graph_rc != transition_complete) { - out->err(out, "Transition failed: %s", transition_status(graph_rc)); - print_graph(LOG_ERR, transition); + out->err(out, "Transition failed: %s", + pcmk__graph_status2text(graph_rc)); + pcmk__log_graph(LOG_ERR, transition); } - destroy_graph(transition); + pcmk__free_graph(transition); if (graph_rc != transition_complete) { out->err(out, "An invalid transition was produced"); } if (!out->is_quiet(out)) { xmlNode *cib_object = NULL; int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); pe_reset_working_set(data_set); data_set->input = cib_object; out->end_list(out); } if (graph_rc != transition_complete) { return graph_rc; } return 0; } diff --git a/lib/pacemaker/pcmk_trans_graph.c b/lib/pacemaker/pcmk_trans_graph.c deleted file mode 100644 index 77ed9d6c2f..0000000000 --- a/lib/pacemaker/pcmk_trans_graph.c +++ /dev/null @@ -1,335 +0,0 @@ -/* - * Copyright 2004-2019 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * - * This source code is licensed under the GNU Lesser General Public License - * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. - */ - -#include - -#include -#include -#include -#include - -crm_graph_functions_t *graph_fns = NULL; - -static gboolean -update_synapse_ready(synapse_t * synapse, int action_id) -{ - GList *lpc = NULL; - gboolean updates = FALSE; - - CRM_CHECK(synapse->executed == FALSE, return FALSE); - CRM_CHECK(synapse->confirmed == FALSE, return FALSE); - - synapse->ready = TRUE; - for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { - crm_action_t *prereq = (crm_action_t *) lpc->data; - - crm_trace("Processing input %d", prereq->id); - - if (prereq->id == action_id) { - crm_trace("Marking input %d of synapse %d confirmed", action_id, synapse->id); - prereq->confirmed = TRUE; - updates = TRUE; - - } else if (prereq->confirmed == FALSE) { - synapse->ready = FALSE; - } - - } - - if (updates) { - crm_trace("Updated synapse %d", synapse->id); - } - return updates; -} - -static gboolean -update_synapse_confirmed(synapse_t * synapse, int action_id) -{ - GList *lpc = NULL; - gboolean updates = FALSE; - gboolean is_confirmed = TRUE; - - CRM_CHECK(synapse->executed, return FALSE); - CRM_CHECK(synapse->confirmed == FALSE, return TRUE); - - is_confirmed = TRUE; - for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { - crm_action_t *action = (crm_action_t *) lpc->data; - - crm_trace("Processing action %d", action->id); - - if (action->id == action_id) { - crm_trace("Confirmed: Action %d of Synapse %d", action_id, synapse->id); - action->confirmed = TRUE; - updates = TRUE; - - } else if (action->confirmed == FALSE) { - is_confirmed = FALSE; - crm_trace("Synapse %d still not confirmed after action %d", synapse->id, action_id); - } - } - - if (is_confirmed && synapse->confirmed == FALSE) { - crm_trace("Confirmed: Synapse %d", synapse->id); - synapse->confirmed = TRUE; - updates = TRUE; - } - - if (updates) { - crm_trace("Updated synapse %d", synapse->id); - } - return updates; -} - -gboolean -update_graph(crm_graph_t * graph, crm_action_t * action) -{ - gboolean rc = FALSE; - gboolean updates = FALSE; - GList *lpc = NULL; - - for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { - synapse_t *synapse = (synapse_t *) lpc->data; - - if (synapse->confirmed || synapse->failed) { - crm_trace("Synapse complete"); - - } else if (synapse->executed) { - crm_trace("Synapse executed"); - rc = update_synapse_confirmed(synapse, action->id); - - } else if (action->failed == FALSE || synapse->priority == INFINITY) { - rc = update_synapse_ready(synapse, action->id); - } - updates = updates || rc; - } - - if (updates) { - crm_trace("Updated graph with completed action %d", action->id); - } - return updates; -} - -static gboolean -should_fire_synapse(crm_graph_t * graph, synapse_t * synapse) -{ - GList *lpc = NULL; - - CRM_CHECK(synapse->executed == FALSE, return FALSE); - CRM_CHECK(synapse->confirmed == FALSE, return FALSE); - - crm_trace("Checking pre-reqs for synapse %d", synapse->id); - /* lookup prereqs */ - synapse->ready = TRUE; - for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { - crm_action_t *prereq = (crm_action_t *) lpc->data; - - crm_trace("Processing input %d", prereq->id); - if (prereq->confirmed == FALSE) { - crm_trace("Input %d for synapse %d not satisfied: not confirmed", prereq->id, synapse->id); - synapse->ready = FALSE; - break; - } else if(prereq->failed && prereq->can_fail == FALSE) { - crm_trace("Input %d for synapse %d not satisfied: failed", prereq->id, synapse->id); - synapse->ready = FALSE; - break; - } - } - - for (lpc = synapse->actions; synapse->ready && lpc != NULL; lpc = lpc->next) { - crm_action_t *a = (crm_action_t *) lpc->data; - - if (a->type == action_type_pseudo) { - /* None of the below applies to pseudo ops */ - - } else if (synapse->priority < graph->abort_priority) { - crm_trace("Skipping synapse %d: abort level %d", synapse->id, graph->abort_priority); - graph->skipped++; - return FALSE; - - } else if(graph_fns->allowed && graph_fns->allowed(graph, a) == FALSE) { - crm_trace("Deferring synapse %d: allowed", synapse->id); - return FALSE; - } - } - - return synapse->ready; -} - -static gboolean -initiate_action(crm_graph_t * graph, crm_action_t * action) -{ - const char *id = NULL; - - CRM_CHECK(action->executed == FALSE, return FALSE); - - id = ID(action->xml); - CRM_CHECK(id != NULL, return FALSE); - - action->executed = TRUE; - if (action->type == action_type_pseudo) { - crm_trace("Executing pseudo-event: %s (%d)", id, action->id); - return graph_fns->pseudo(graph, action); - - } else if (action->type == action_type_rsc) { - crm_trace("Executing rsc-event: %s (%d)", id, action->id); - return graph_fns->rsc(graph, action); - - } else if (action->type == action_type_crm) { - const char *task = NULL; - - task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); - CRM_CHECK(task != NULL, return FALSE); - - if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) { - crm_trace("Executing STONITH-event: %s (%d)", id, action->id); - return graph_fns->stonith(graph, action); - } - - crm_trace("Executing crm-event: %s (%d)", id, action->id); - return graph_fns->crmd(graph, action); - } - - crm_err("Failed on unsupported command type: %s (id=%s)", crm_element_name(action->xml), id); - return FALSE; -} - -static gboolean -fire_synapse(crm_graph_t * graph, synapse_t * synapse) -{ - GList *lpc = NULL; - - CRM_CHECK(synapse != NULL, return FALSE); - CRM_CHECK(synapse->ready, return FALSE); - CRM_CHECK(synapse->confirmed == FALSE, return TRUE); - - crm_trace("Synapse %d fired", synapse->id); - synapse->executed = TRUE; - for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { - crm_action_t *action = (crm_action_t *) lpc->data; - - /* allow some leeway */ - gboolean passed = FALSE; - - /* Invoke the action and start the timer */ - passed = initiate_action(graph, action); - if (passed == FALSE) { - crm_err("Failed initiating <%s id=%d> in synapse %d", - crm_element_name(action->xml), action->id, synapse->id); - synapse->confirmed = TRUE; - action->confirmed = TRUE; - action->failed = TRUE; - return FALSE; - } - } - - return TRUE; -} - -int -run_graph(crm_graph_t * graph) -{ - GList *lpc = NULL; - int stat_log_level = LOG_DEBUG; - int pass_result = transition_active; - - const char *status = "In-progress"; - - if (graph_fns == NULL) { - set_default_graph_functions(); - } - if (graph == NULL) { - return transition_complete; - } - - graph->fired = 0; - graph->pending = 0; - graph->skipped = 0; - graph->completed = 0; - graph->incomplete = 0; - crm_trace("Entering graph %d callback", graph->id); - - /* Pre-calculate the number of completed and in-flight operations */ - for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { - synapse_t *synapse = (synapse_t *) lpc->data; - - if (synapse->confirmed) { - crm_trace("Synapse %d complete", synapse->id); - graph->completed++; - - } else if (synapse->failed == FALSE && synapse->executed) { - crm_trace("Synapse %d: confirmation pending", synapse->id); - graph->pending++; - } - } - - /* Now check if there is work to do */ - for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { - synapse_t *synapse = (synapse_t *) lpc->data; - - if (graph->batch_limit > 0 && graph->pending >= graph->batch_limit) { - crm_debug("Throttling output: batch limit (%d) reached", graph->batch_limit); - break; - } else if (synapse->failed) { - graph->skipped++; - continue; - - } else if (synapse->confirmed || synapse->executed) { - /* Already handled */ - continue; - } - - if (should_fire_synapse(graph, synapse)) { - crm_trace("Synapse %d fired", synapse->id); - graph->fired++; - if(fire_synapse(graph, synapse) == FALSE) { - crm_err("Synapse %d failed to fire", synapse->id); - stat_log_level = LOG_ERR; - graph->abort_priority = INFINITY; - graph->incomplete++; - graph->fired--; - } - - if (synapse->confirmed == FALSE) { - graph->pending++; - } - - } else { - crm_trace("Synapse %d cannot fire", synapse->id); - graph->incomplete++; - } - } - - if (graph->pending == 0 && graph->fired == 0) { - graph->complete = TRUE; - stat_log_level = LOG_NOTICE; - pass_result = transition_complete; - status = "Complete"; - - if (graph->incomplete != 0 && graph->abort_priority <= 0) { - stat_log_level = LOG_WARNING; - pass_result = transition_terminated; - status = "Terminated"; - - } else if (graph->skipped != 0) { - status = "Stopped"; - } - - } else if (graph->fired == 0) { - pass_result = transition_pending; - } - - do_crm_log(stat_log_level, - "Transition %d (Complete=%d, Pending=%d," - " Fired=%d, Skipped=%d, Incomplete=%d, Source=%s): %s", - graph->id, graph->completed, graph->pending, graph->fired, - graph->skipped, graph->incomplete, graph->source, status); - - return pass_result; -} diff --git a/lib/pacemaker/pcmk_trans_unpack.c b/lib/pacemaker/pcmk_trans_unpack.c deleted file mode 100644 index 203cbf2d98..0000000000 --- a/lib/pacemaker/pcmk_trans_unpack.c +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright 2004-2021 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * - * This source code is licensed under the GNU Lesser General Public License - * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. - */ - -#include - -#include -#include - -#include -#include -#include -#include -#include - -static crm_action_t * -unpack_action(synapse_t * parent, xmlNode * xml_action) -{ - crm_action_t *action = NULL; - const char *value = crm_element_value(xml_action, XML_ATTR_ID); - - if (value == NULL) { - crm_err("Actions must have an id!"); - crm_log_xml_trace(xml_action, "Action with missing id"); - return NULL; - } - - action = calloc(1, sizeof(crm_action_t)); - if (action == NULL) { - crm_perror(LOG_CRIT, "Cannot unpack action"); - crm_log_xml_trace(xml_action, "Lost action"); - return NULL; - } - - pcmk__scan_min_int(value, &(action->id), -1); - action->type = action_type_rsc; - action->xml = copy_xml(xml_action); - action->synapse = parent; - - if (pcmk__str_eq(crm_element_name(action->xml), XML_GRAPH_TAG_RSC_OP, pcmk__str_casei)) { - action->type = action_type_rsc; - - } else if (pcmk__str_eq(crm_element_name(action->xml), XML_GRAPH_TAG_PSEUDO_EVENT, pcmk__str_casei)) { - action->type = action_type_pseudo; - - } else if (pcmk__str_eq(crm_element_name(action->xml), XML_GRAPH_TAG_CRM_EVENT, pcmk__str_casei)) { - action->type = action_type_crm; - } - - action->params = xml2list(action->xml); - - value = g_hash_table_lookup(action->params, "CRM_meta_timeout"); - pcmk__scan_min_int(value, &(action->timeout), 0); - - /* Take start-delay into account for the timeout of the action timer */ - value = g_hash_table_lookup(action->params, "CRM_meta_start_delay"); - { - int start_delay; - - pcmk__scan_min_int(value, &start_delay, 0); - action->timeout += start_delay; - } - - if (pcmk__guint_from_hash(action->params, - CRM_META "_" XML_LRM_ATTR_INTERVAL, 0, - &(action->interval_ms)) != pcmk_rc_ok) { - action->interval_ms = 0; - } - - value = g_hash_table_lookup(action->params, "CRM_meta_can_fail"); - if (value != NULL) { - crm_str_to_boolean(value, &(action->can_fail)); -#ifndef PCMK__COMPAT_2_0 - if (action->can_fail) { - crm_warn("Support for the can_fail meta-attribute is deprecated" - " and will be removed in a future release"); - } -#endif - } - - crm_trace("Action %d has timer set to %dms", action->id, action->timeout); - - return action; -} - -static synapse_t * -unpack_synapse(crm_graph_t * new_graph, xmlNode * xml_synapse) -{ - const char *value = NULL; - xmlNode *inputs = NULL; - xmlNode *action_set = NULL; - synapse_t *new_synapse = NULL; - - CRM_CHECK(xml_synapse != NULL, return NULL); - crm_trace("looking in synapse %s", ID(xml_synapse)); - - new_synapse = calloc(1, sizeof(synapse_t)); - pcmk__scan_min_int(ID(xml_synapse), &(new_synapse->id), 0); - - value = crm_element_value(xml_synapse, XML_CIB_ATTR_PRIORITY); - pcmk__scan_min_int(value, &(new_synapse->priority), 0); - - CRM_CHECK(new_synapse->id >= 0, free(new_synapse); - return NULL); - - new_graph->num_synapses++; - - crm_trace("look for actions in synapse %s", crm_element_value(xml_synapse, XML_ATTR_ID)); - - for (action_set = pcmk__xml_first_child(xml_synapse); action_set != NULL; - action_set = pcmk__xml_next(action_set)) { - - if (pcmk__str_eq((const char *)action_set->name, "action_set", - pcmk__str_none)) { - xmlNode *action = NULL; - - for (action = pcmk__xml_first_child(action_set); action != NULL; - action = pcmk__xml_next(action)) { - crm_action_t *new_action = unpack_action(new_synapse, action); - - if (new_action == NULL) { - continue; - } - - new_graph->num_actions++; - - crm_trace("Adding action %d to synapse %d", new_action->id, new_synapse->id); - - new_synapse->actions = g_list_append(new_synapse->actions, new_action); - } - } - } - - crm_trace("look for inputs in synapse %s", ID(xml_synapse)); - - for (inputs = pcmk__xml_first_child(xml_synapse); inputs != NULL; - inputs = pcmk__xml_next(inputs)) { - - if (pcmk__str_eq((const char *)inputs->name, "inputs", pcmk__str_none)) { - xmlNode *trigger = NULL; - - for (trigger = pcmk__xml_first_child(inputs); trigger != NULL; - trigger = pcmk__xml_next(trigger)) { - xmlNode *input = NULL; - - for (input = pcmk__xml_first_child(trigger); input != NULL; - input = pcmk__xml_next(input)) { - crm_action_t *new_input = unpack_action(new_synapse, input); - - if (new_input == NULL) { - continue; - } - - crm_trace("Adding input %d to synapse %d", new_input->id, new_synapse->id); - - new_synapse->inputs = g_list_append(new_synapse->inputs, new_input); - } - } - } - } - - return new_synapse; -} - -static void destroy_action(crm_action_t * action); - -crm_graph_t * -unpack_graph(xmlNode * xml_graph, const char *reference) -{ -/* - - - - - id = -1; - new_graph->abort_priority = 0; - new_graph->network_delay = 0; - new_graph->stonith_timeout = 0; - new_graph->completion_action = tg_done; - - if (reference) { - new_graph->source = strdup(reference); - } else { - new_graph->source = strdup("unknown"); - } - - if (xml_graph != NULL) { - t_id = crm_element_value(xml_graph, "transition_id"); - CRM_CHECK(t_id != NULL, free(new_graph); - return NULL); - pcmk__scan_min_int(t_id, &(new_graph->id), -1); - - time = crm_element_value(xml_graph, "cluster-delay"); - CRM_CHECK(time != NULL, free(new_graph); - return NULL); - new_graph->network_delay = crm_parse_interval_spec(time); - - time = crm_element_value(xml_graph, "stonith-timeout"); - if (time == NULL) { - new_graph->stonith_timeout = new_graph->network_delay; - } else { - new_graph->stonith_timeout = crm_parse_interval_spec(time); - } - - // Use 0 (dynamic limit) as default/invalid, -1 (no limit) as minimum - t_id = crm_element_value(xml_graph, "batch-limit"); - if ((t_id == NULL) - || (pcmk__scan_min_int(t_id, &(new_graph->batch_limit), - -1) != pcmk_rc_ok)) { - new_graph->batch_limit = 0; - } - - t_id = crm_element_value(xml_graph, "migration-limit"); - pcmk__scan_min_int(t_id, &(new_graph->migration_limit), -1); - } - - for (synapse = pcmk__xml_first_child(xml_graph); synapse != NULL; - synapse = pcmk__xml_next(synapse)) { - - if (pcmk__str_eq((const char *)synapse->name, "synapse", pcmk__str_none)) { - synapse_t *new_synapse = unpack_synapse(new_graph, synapse); - - if (new_synapse != NULL) { - new_graph->synapses = g_list_append(new_graph->synapses, new_synapse); - } - } - } - - crm_debug("Unpacked transition %d: %d actions in %d synapses", - new_graph->id, new_graph->num_actions, new_graph->num_synapses); - - return new_graph; -} - -static void -destroy_action(crm_action_t * action) -{ - if (action->timer && action->timer->source_id != 0) { - crm_warn("Cancelling timer for action %d (src=%d)", action->id, action->timer->source_id); - g_source_remove(action->timer->source_id); - } - if (action->params) { - g_hash_table_destroy(action->params); - } - free_xml(action->xml); - free(action->timer); - free(action); -} - -static void -destroy_synapse(synapse_t * synapse) -{ - while (synapse->actions != NULL) { - crm_action_t *action = g_list_nth_data(synapse->actions, 0); - - synapse->actions = g_list_remove(synapse->actions, action); - destroy_action(action); - } - - while (synapse->inputs != NULL) { - crm_action_t *action = g_list_nth_data(synapse->inputs, 0); - - synapse->inputs = g_list_remove(synapse->inputs, action); - destroy_action(action); - } - free(synapse); -} - -void -destroy_graph(crm_graph_t * graph) -{ - if (graph == NULL) { - return; - } - while (graph->synapses != NULL) { - synapse_t *synapse = g_list_nth_data(graph->synapses, 0); - - graph->synapses = g_list_remove(graph->synapses, synapse); - destroy_synapse(synapse); - } - - free(graph->source); - free(graph); -} - -lrmd_event_data_t * -convert_graph_action(xmlNode * resource, crm_action_t * action, int status, int rc) -{ - xmlNode *xop = NULL; - lrmd_event_data_t *op = NULL; - GHashTableIter iter; - const char *name = NULL; - const char *value = NULL; - xmlNode *action_resource = NULL; - - CRM_CHECK(action != NULL, return NULL); - CRM_CHECK(action->type == action_type_rsc, return NULL); - - action_resource = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); - CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "Bad"); - return NULL); - - op = lrmd_new_event(ID(action_resource), - crm_element_value(action->xml, XML_LRM_ATTR_TASK), - action->interval_ms); - op->rc = rc; - op->op_status = status; - op->t_run = time(NULL); - op->t_rcchange = op->t_run; - op->params = pcmk__strkey_table(free, free); - - g_hash_table_iter_init(&iter, action->params); - while (g_hash_table_iter_next(&iter, (void **)&name, (void **)&value)) { - g_hash_table_insert(op->params, strdup(name), strdup(value)); - } - - for (xop = pcmk__xml_first_child(resource); xop != NULL; - xop = pcmk__xml_next(xop)) { - int tmp = 0; - - crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); - crm_debug("Got call_id=%d for %s", tmp, ID(resource)); - if (tmp > op->call_id) { - op->call_id = tmp; - } - } - - op->call_id++; - return op; -}