diff --git a/lib/pacemaker/pcmk_sched_fencing.c b/lib/pacemaker/pcmk_sched_fencing.c index 318d592687..ee0c49b446 100644 --- a/lib/pacemaker/pcmk_sched_fencing.c +++ b/lib/pacemaker/pcmk_sched_fencing.c @@ -1,509 +1,509 @@ /* - * Copyright 2004-2024 the Pacemaker project contributors + * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Check whether a resource is known on a particular node * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return TRUE if resource (or parent if an anonymous clone) is known */ static bool rsc_is_known_on(const pcmk_resource_t *rsc, const pcmk_node_t *node) { const pcmk_resource_t *parent = rsc->priv->parent; if (g_hash_table_lookup(rsc->priv->probed_nodes, node->priv->id) != NULL) { return TRUE; } else if (pcmk__is_primitive(rsc) && pcmk__is_anonymous_clone(parent) && (g_hash_table_lookup(parent->priv->probed_nodes, node->priv->id) != NULL)) { /* We check only the parent, not the uber-parent, because we cannot * assume that the resource is known if it is in an anonymously cloned * group (which may be only partially known). */ return TRUE; } return FALSE; } /*! * \internal * \brief Order a resource's start and promote actions relative to fencing * * \param[in,out] rsc Resource to be ordered * \param[in,out] stonith_op Fence action */ static void order_start_vs_fencing(pcmk_resource_t *rsc, pcmk_action_t *stonith_op) { pcmk_node_t *target; CRM_CHECK(stonith_op && stonith_op->node, return); target = stonith_op->node; for (GList *iter = rsc->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = iter->data; switch (action->needs) { case pcmk__requires_nothing: // Anything other than start or promote requires nothing break; case pcmk__requires_fencing: order_actions(stonith_op, action, pcmk__ar_ordered); break; case pcmk__requires_quorum: if (pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_none) && (g_hash_table_lookup(rsc->priv->allowed_nodes, target->priv->id) != NULL) && !rsc_is_known_on(rsc, target)) { /* If we don't know the status of the resource on the node * we're about to shoot, we have to assume it may be active * there. Order the resource start after the fencing. This * is analogous to waiting for all the probes for a resource * to complete before starting it. * * The most likely explanation is that the DC died and took * its status with it. */ pcmk__rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, pcmk__node_name(target)); order_actions(stonith_op, action, pcmk__ar_ordered |pcmk__ar_unrunnable_first_blocks); } break; } } } /*! * \internal * \brief Order a resource's stop and demote actions relative to fencing * * \param[in,out] rsc Resource to be ordered * \param[in,out] stonith_op Fence action */ static void order_stop_vs_fencing(pcmk_resource_t *rsc, pcmk_action_t *stonith_op) { GList *iter = NULL; GList *action_list = NULL; bool order_implicit = false; pcmk_resource_t *top = uber_parent(rsc); pcmk_action_t *parent_stop = NULL; pcmk_node_t *target; CRM_CHECK(stonith_op && stonith_op->node, return); target = stonith_op->node; /* Get a list of stop actions potentially implied by the fencing */ action_list = pe__resource_actions(rsc, target, PCMK_ACTION_STOP, FALSE); /* If resource requires fencing, implicit actions must occur after fencing. * * Implied stops and demotes of resources running on guest nodes are always * ordered after fencing, even if the resource does not require fencing, * because guest node "fencing" is actually just a resource stop. */ if (pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing) || pcmk__is_guest_or_bundle_node(target)) { order_implicit = true; } if (action_list && order_implicit) { parent_stop = find_first_action(top->priv->actions, NULL, PCMK_ACTION_STOP, NULL); } for (iter = action_list; iter != NULL; iter = iter->next) { pcmk_action_t *action = iter->data; // The stop would never complete, so convert it into a pseudo-action. pcmk__set_action_flags(action, pcmk__action_pseudo|pcmk__action_runnable); if (order_implicit) { /* Order the stonith before the parent stop (if any). * * Also order the stonith before the resource stop, unless the * resource is inside a bundle -- that would cause a graph loop. * We can rely on the parent stop's ordering instead. * * User constraints must not order a resource in a guest node * relative to the guest node container resource. The * pcmk__ar_guest_allowed flag marks constraints as generated by the * cluster and thus immune to that check (and is irrelevant if * target is not a guest). */ if (!pcmk__is_bundled(rsc)) { order_actions(stonith_op, action, pcmk__ar_guest_allowed); } order_actions(stonith_op, parent_stop, pcmk__ar_guest_allowed); } if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { crm_notice("Stop of failed resource %s is implicit %s %s is fenced", rsc->id, (order_implicit? "after" : "because"), pcmk__node_name(target)); } else { crm_info("%s is implicit %s %s is fenced", action->uuid, (order_implicit? "after" : "because"), pcmk__node_name(target)); } if (pcmk_is_set(rsc->flags, pcmk__rsc_notify)) { pe__order_notifs_after_fencing(action, rsc, stonith_op); } #if 0 /* It might be a good idea to stop healthy resources on a node about to * be fenced, when possible. * * However, fencing must be done before a failed resource's * (pseudo-)stop action, so that could create a loop. For example, given * a group of A and B running on node N with a failed stop of B: * * fence N -> stop B (pseudo-op) -> stop A -> fence N * * The block below creates the stop A -> fence N ordering and therefore * must (at least for now) be disabled. Instead, run the block above and * treat all resources on N as B would be (i.e., as a pseudo-op after * the fencing). * * @TODO Maybe break the "A requires B" dependency in * pcmk__update_action_for_orderings() and use this block for healthy * resources instead of the above. */ crm_info("Moving healthy resource %s off %s before fencing", rsc->id, pcmk__node_name(node)); pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL, strdup(PCMK_ACTION_STONITH), stonith_op, pcmk__ar_ordered, rsc->private->scheduler); #endif } g_list_free(action_list); /* Get a list of demote actions potentially implied by the fencing */ action_list = pe__resource_actions(rsc, target, PCMK_ACTION_DEMOTE, FALSE); for (iter = action_list; iter != NULL; iter = iter->next) { pcmk_action_t *action = iter->data; - if (!(action->node->details->online) || action->node->details->unclean + if (!pcmk__node_available(action->node, pcmk__node_alive) || pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { pcmk__rsc_info(rsc, "Demote of failed resource %s is implicit " "after %s is fenced", rsc->id, pcmk__node_name(target)); } else { pcmk__rsc_info(rsc, "%s is implicit after %s is fenced", action->uuid, pcmk__node_name(target)); } /* The demote would never complete and is now implied by the * fencing, so convert it into a pseudo-action. */ pcmk__set_action_flags(action, pcmk__action_pseudo|pcmk__action_runnable); if (pcmk__is_bundled(rsc)) { // Recovery will be ordered as usual after parent's implied stop } else if (order_implicit) { order_actions(stonith_op, action, pcmk__ar_guest_allowed|pcmk__ar_ordered); } } } g_list_free(action_list); } /*! * \internal * \brief Order resource actions properly relative to fencing * * \param[in,out] rsc Resource whose actions should be ordered * \param[in,out] stonith_op Fencing operation to be ordered against */ static void rsc_stonith_ordering(pcmk_resource_t *rsc, pcmk_action_t *stonith_op) { if (rsc->priv->children != NULL) { for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child_rsc = iter->data; rsc_stonith_ordering(child_rsc, stonith_op); } } else if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { pcmk__rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: " "%s", rsc->id); } else { order_start_vs_fencing(rsc, stonith_op); order_stop_vs_fencing(rsc, stonith_op); } } /*! * \internal * \brief Order all actions appropriately relative to a fencing operation * * Ensure start operations of affected resources are ordered after fencing, * imply stop and demote operations of affected resources by marking them as * pseudo-actions, etc. * * \param[in,out] stonith_op Fencing operation * \param[in,out] scheduler Scheduler data */ void pcmk__order_vs_fence(pcmk_action_t *stonith_op, pcmk_scheduler_t *scheduler) { CRM_CHECK(stonith_op && scheduler, return); for (GList *r = scheduler->priv->resources; r != NULL; r = r->next) { rsc_stonith_ordering((pcmk_resource_t *) r->data, stonith_op); } } /*! * \internal * \brief Order an action after unfencing * * \param[in] rsc Resource that action is for * \param[in,out] node Node that action is on * \param[in,out] action Action to be ordered after unfencing * \param[in] order Ordering flags */ void pcmk__order_vs_unfence(const pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_action_t *action, enum pcmk__action_relation_flags order) { /* When unfencing is in use, we order unfence actions before any probe or * start of resources that require unfencing, and also of fence devices. * * This might seem to violate the principle that fence devices require * only quorum. However, fence agents that unfence often don't have enough * information to even probe or start unless the node is first unfenced. */ if ((pcmk_is_set(rsc->flags, pcmk__rsc_fence_device) && pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_enable_unfencing)) || pcmk_is_set(rsc->flags, pcmk__rsc_needs_unfencing)) { /* Start with an optional ordering. Requiring unfencing would result in * the node being unfenced, and all its resources being stopped, * whenever a new resource is added -- which would be highly suboptimal. */ pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, TRUE, NULL, FALSE, node->priv->scheduler); order_actions(unfence, action, order); if (!pcmk__node_unfenced(node)) { // But unfencing is required if it has never been done char *reason = crm_strdup_printf("required by %s %s", rsc->id, action->task); trigger_unfencing(NULL, node, reason, NULL, node->priv->scheduler); free(reason); } } } /*! * \internal * \brief Create pseudo-op for guest node fence, and order relative to it * * \param[in,out] node Guest node to fence */ void pcmk__fence_guest(pcmk_node_t *node) { pcmk_resource_t *launcher = NULL; pcmk_action_t *stop = NULL; pcmk_action_t *stonith_op = NULL; /* The fence action is just a label; we don't do anything differently for * off vs. reboot. We specify it explicitly, rather than let it default to * cluster's default action, because we are not _initiating_ fencing -- we * are creating a pseudo-event to describe fencing that is already occurring * by other means (launcher recovery). */ const char *fence_action = PCMK_ACTION_OFF; pcmk__assert(node != NULL); /* Check whether guest's launcher has any explicit stop or start (the stop * may be implied by fencing of the guest's host). */ launcher = node->priv->remote->priv->launcher; if (launcher != NULL) { stop = find_first_action(launcher->priv->actions, NULL, PCMK_ACTION_STOP, NULL); if (find_first_action(launcher->priv->actions, NULL, PCMK_ACTION_START, NULL)) { fence_action = PCMK_ACTION_REBOOT; } } /* Create a fence pseudo-event, so we have an event to order actions * against, and the controller can always detect it. */ stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", FALSE, node->priv->scheduler); pcmk__set_action_flags(stonith_op, pcmk__action_pseudo|pcmk__action_runnable); /* We want to imply stops/demotes after the guest is stopped, not wait until * it is restarted, so we always order pseudo-fencing after stop, not start * (even though start might be closer to what is done for a real reboot). */ if ((stop != NULL) && pcmk_is_set(stop->flags, pcmk__action_pseudo)) { pcmk_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, FALSE, node->priv->scheduler); crm_info("Implying guest %s is down (action %d) after %s fencing", pcmk__node_name(node), stonith_op->id, pcmk__node_name(stop->node)); order_actions(parent_stonith_op, stonith_op, pcmk__ar_unrunnable_first_blocks |pcmk__ar_first_implies_then); } else if (stop) { order_actions(stop, stonith_op, pcmk__ar_unrunnable_first_blocks |pcmk__ar_first_implies_then); crm_info("Implying guest %s is down (action %d) " "after launcher %s is stopped (action %d)", pcmk__node_name(node), stonith_op->id, launcher->id, stop->id); } else { /* If we're fencing the guest node but there's no stop for the guest * resource, we must think the guest is already stopped. However, we may * think so because its resource history was just cleaned. To avoid * unnecessarily considering the guest node down if it's really up, * order the pseudo-fencing after any stop of the connection resource, * which will be ordered after any launcher (re-)probe. */ stop = find_first_action(node->priv->remote->priv->actions, NULL, PCMK_ACTION_STOP, NULL); if (stop) { order_actions(stop, stonith_op, pcmk__ar_ordered); crm_info("Implying guest %s is down (action %d) " "after connection is stopped (action %d)", pcmk__node_name(node), stonith_op->id, stop->id); } else { /* Not sure why we're fencing, but everything must already be * cleanly stopped. */ crm_info("Implying guest %s is down (action %d) ", pcmk__node_name(node), stonith_op->id); } } // Order/imply other actions relative to pseudo-fence as with real fence pcmk__order_vs_fence(stonith_op, node->priv->scheduler); } /*! * \internal * \brief Check whether node has already been unfenced * * \param[in] node Node to check * * \return true if node has a nonzero #node-unfenced attribute (or none), * otherwise false */ bool pcmk__node_unfenced(const pcmk_node_t *node) { const char *unfenced = pcmk__node_attr(node, CRM_ATTR_UNFENCED, NULL, pcmk__rsc_node_current); return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches); } /*! * \internal * \brief Order a resource's start and stop relative to unfencing of a node * * \param[in,out] data Node that could be unfenced * \param[in,out] user_data Resource to order */ void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data) { pcmk_node_t *node = (pcmk_node_t *) data; pcmk_resource_t *rsc = (pcmk_resource_t *) user_data; pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, true, NULL, false, rsc->priv->scheduler); crm_debug("Ordering any stops of %s before %s, and any starts after", rsc->id, unfence->uuid); /* * It would be more efficient to order clone resources once, * rather than order each instance, but ordering the instance * allows us to avoid unnecessary dependencies that might conflict * with user constraints. * * @TODO: This constraint can still produce a transition loop if the * resource has a stop scheduled on the node being unfenced, and * there is a user ordering constraint to start some other resource * (which will be ordered after the unfence) before stopping this * resource. An example is "start some slow-starting cloned service * before stopping an associated virtual IP that may be moving to * it": * stop this -> unfencing -> start that -> stop this */ pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL, strdup(unfence->uuid), unfence, pcmk__ar_ordered|pcmk__ar_if_on_same_node, rsc->priv->scheduler); pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence, rsc, start_key(rsc), NULL, pcmk__ar_first_implies_same_node_then |pcmk__ar_if_on_same_node, rsc->priv->scheduler); } diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c index 8bff84b8a3..4b7d11746e 100644 --- a/lib/pacemaker/pcmk_sched_primitive.c +++ b/lib/pacemaker/pcmk_sched_primitive.c @@ -1,1725 +1,1725 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // uint8_t, uint32_t #include #include #include "libpacemaker_private.h" static void stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional); static void start_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional); static void demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional); static void promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional); static void assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional); #define RSC_ROLE_MAX (pcmk_role_promoted + 1) static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* This array lists the immediate next role when transitioning from one role * to a target role. For example, when going from Stopped to Promoted, the * next role is Unpromoted, because the resource must be started before it * can be promoted. The current state then becomes Started, which is fed * into this array again, giving a next role of Promoted. * * Current role Immediate next role Final target role * ------------ ------------------- ----------------- */ /* Unknown */ { pcmk_role_unknown, /* Unknown */ pcmk_role_stopped, /* Stopped */ pcmk_role_stopped, /* Started */ pcmk_role_stopped, /* Unpromoted */ pcmk_role_stopped, /* Promoted */ }, /* Stopped */ { pcmk_role_stopped, /* Unknown */ pcmk_role_stopped, /* Stopped */ pcmk_role_started, /* Started */ pcmk_role_unpromoted, /* Unpromoted */ pcmk_role_unpromoted, /* Promoted */ }, /* Started */ { pcmk_role_stopped, /* Unknown */ pcmk_role_stopped, /* Stopped */ pcmk_role_started, /* Started */ pcmk_role_unpromoted, /* Unpromoted */ pcmk_role_promoted, /* Promoted */ }, /* Unpromoted */ { pcmk_role_stopped, /* Unknown */ pcmk_role_stopped, /* Stopped */ pcmk_role_stopped, /* Started */ pcmk_role_unpromoted, /* Unpromoted */ pcmk_role_promoted, /* Promoted */ }, /* Promoted */ { pcmk_role_stopped, /* Unknown */ pcmk_role_unpromoted, /* Stopped */ pcmk_role_unpromoted, /* Started */ pcmk_role_unpromoted, /* Unpromoted */ pcmk_role_promoted, /* Promoted */ }, }; /*! * \internal * \brief Function to schedule actions needed for a role change * * \param[in,out] rsc Resource whose role is changing * \param[in,out] node Node where resource will be in its next role * \param[in] optional Whether scheduled actions should be optional */ typedef void (*rsc_transition_fn)(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional); static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* This array lists the function needed to transition directly from one role * to another. NULL indicates that nothing is needed. * * Current role Transition function Next role * ------------ ------------------- ---------- */ /* Unknown */ { assert_role_error, /* Unknown */ stop_resource, /* Stopped */ assert_role_error, /* Started */ assert_role_error, /* Unpromoted */ assert_role_error, /* Promoted */ }, /* Stopped */ { assert_role_error, /* Unknown */ NULL, /* Stopped */ start_resource, /* Started */ start_resource, /* Unpromoted */ assert_role_error, /* Promoted */ }, /* Started */ { assert_role_error, /* Unknown */ stop_resource, /* Stopped */ NULL, /* Started */ NULL, /* Unpromoted */ promote_resource, /* Promoted */ }, /* Unpromoted */ { assert_role_error, /* Unknown */ stop_resource, /* Stopped */ stop_resource, /* Started */ NULL, /* Unpromoted */ promote_resource, /* Promoted */ }, /* Promoted */ { assert_role_error, /* Unknown */ demote_resource, /* Stopped */ demote_resource, /* Started */ demote_resource, /* Unpromoted */ NULL, /* Promoted */ }, }; /*! * \internal * \brief Get a list of a resource's allowed nodes sorted by node score * * \param[in] rsc Resource to check * * \return List of allowed nodes sorted by node score */ static GList * sorted_allowed_nodes(const pcmk_resource_t *rsc) { if (rsc->priv->allowed_nodes != NULL) { GList *nodes = g_hash_table_get_values(rsc->priv->allowed_nodes); if (nodes != NULL) { return pcmk__sort_nodes(nodes, pcmk__current_node(rsc)); } } return NULL; } /*! * \internal * \brief Assign a resource to its best allowed node, if possible * * \param[in,out] rsc Resource to choose a node for * \param[in] prefer If not \c NULL, prefer this node when all else * equal * \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a * node, set next role to stopped and update * existing actions * * \return true if \p rsc could be assigned to a node, otherwise false * * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can * completely undo the assignment. A successful assignment can be either * undone or left alone as final. A failed assignment has the same effect * as calling pcmk__unassign_resource(); there are no side effects on * roles or actions. */ static bool assign_best_node(pcmk_resource_t *rsc, const pcmk_node_t *prefer, bool stop_if_fail) { GList *nodes = NULL; pcmk_node_t *chosen = NULL; pcmk_node_t *best = NULL; const pcmk_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc); if (prefer == NULL) { prefer = most_free_node; } if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) { // We've already finished assignment of resources to nodes return rsc->priv->assigned_node != NULL; } // Sort allowed nodes by score nodes = sorted_allowed_nodes(rsc); if (nodes != NULL) { best = (pcmk_node_t *) nodes->data; // First node has best score } if ((prefer != NULL) && (nodes != NULL)) { // Get the allowed node version of prefer chosen = g_hash_table_lookup(rsc->priv->allowed_nodes, prefer->priv->id); if (chosen == NULL) { pcmk__rsc_trace(rsc, "Preferred node %s for %s was unknown", pcmk__node_name(prefer), rsc->id); /* Favor the preferred node as long as its score is at least as good as * the best allowed node's. * * An alternative would be to favor the preferred node even if the best * node is better, when the best node's score is less than INFINITY. */ } else if (chosen->assign->score < best->assign->score) { pcmk__rsc_trace(rsc, "Preferred node %s for %s was unsuitable", pcmk__node_name(chosen), rsc->id); chosen = NULL; } else if (!pcmk__node_available(chosen, pcmk__node_alive |pcmk__node_usable |pcmk__node_no_negative)) { pcmk__rsc_trace(rsc, "Preferred node %s for %s was unavailable", pcmk__node_name(chosen), rsc->id); chosen = NULL; } else { pcmk__rsc_trace(rsc, "Chose preferred node %s for %s " "(ignoring %d candidates)", pcmk__node_name(chosen), rsc->id, g_list_length(nodes)); } } if ((chosen == NULL) && (best != NULL)) { /* Either there is no preferred node, or the preferred node is not * suitable, but another node is allowed to run the resource. */ chosen = best; if (!pcmk__is_unique_clone(rsc->priv->parent) && (chosen->assign->score > 0) // Zero not acceptable && pcmk__node_available(chosen, pcmk__node_alive|pcmk__node_usable)) { /* If the resource is already running on a node, prefer that node if * it is just as good as the chosen node. * * We don't do this for unique clone instances, because * pcmk__assign_instances() has already assigned instances to their * running nodes when appropriate, and if we get here, we don't want * remaining unassigned instances to prefer a node that's already * running another instance. */ pcmk_node_t *running = pcmk__current_node(rsc); if (running == NULL) { // Nothing to do } else if (!pcmk__node_available(running, pcmk__node_alive |pcmk__node_usable |pcmk__node_no_negative)) { pcmk__rsc_trace(rsc, "Current node for %s (%s) can't run resources", rsc->id, pcmk__node_name(running)); } else { int nodes_with_best_score = 1; for (GList *iter = nodes->next; iter; iter = iter->next) { pcmk_node_t *allowed = (pcmk_node_t *) iter->data; if (allowed->assign->score != chosen->assign->score) { // The nodes are sorted by score, so no more are equal break; } if (pcmk__same_node(allowed, running)) { // Scores are equal, so prefer the current node chosen = allowed; } nodes_with_best_score++; } if (nodes_with_best_score > 1) { uint8_t log_level = LOG_INFO; if (chosen->assign->score >= PCMK_SCORE_INFINITY) { log_level = LOG_WARNING; } do_crm_log(log_level, "Chose %s for %s from %d nodes with score %s", pcmk__node_name(chosen), rsc->id, nodes_with_best_score, pcmk_readable_score(chosen->assign->score)); } } } pcmk__rsc_trace(rsc, "Chose %s for %s from %d candidates", pcmk__node_name(chosen), rsc->id, g_list_length(nodes)); } pcmk__assign_resource(rsc, chosen, false, stop_if_fail); g_list_free(nodes); return rsc->priv->assigned_node != NULL; } /*! * \internal * \brief Apply a "this with" colocation to a node's allowed node scores * * \param[in,out] colocation Colocation to apply * \param[in,out] rsc Resource being assigned */ static void apply_this_with(pcmk__colocation_t *colocation, pcmk_resource_t *rsc) { GHashTable *archive = NULL; pcmk_resource_t *other = colocation->primary; // In certain cases, we will need to revert the node scores if ((colocation->dependent_role >= pcmk_role_promoted) || ((colocation->score < 0) && (colocation->score > -PCMK_SCORE_INFINITY))) { archive = pcmk__copy_node_table(rsc->priv->allowed_nodes); } if (pcmk_is_set(other->flags, pcmk__rsc_unassigned)) { pcmk__rsc_trace(rsc, "%s: Assigning colocation %s primary %s first" "(score=%d role=%s)", rsc->id, colocation->id, other->id, colocation->score, pcmk_role_text(colocation->dependent_role)); other->priv->cmds->assign(other, NULL, true); } // Apply the colocation score to this resource's allowed node scores rsc->priv->cmds->apply_coloc_score(rsc, other, colocation, true); if ((archive != NULL) && !pcmk__any_node_available(rsc->priv->allowed_nodes)) { pcmk__rsc_info(rsc, "%s: Reverting scores from colocation with %s " "because no nodes allowed", rsc->id, other->id); g_hash_table_destroy(rsc->priv->allowed_nodes); rsc->priv->allowed_nodes = archive; archive = NULL; } if (archive != NULL) { g_hash_table_destroy(archive); } } /*! * \internal * \brief Update a Pacemaker Remote node once its connection has been assigned * * \param[in] connection Connection resource that has been assigned */ static void remote_connection_assigned(const pcmk_resource_t *connection) { pcmk_node_t *remote_node = pcmk_find_node(connection->priv->scheduler, connection->id); CRM_CHECK(remote_node != NULL, return); if ((connection->priv->assigned_node != NULL) && (connection->priv->next_role != pcmk_role_stopped)) { crm_trace("Pacemaker Remote node %s will be online", remote_node->priv->id); remote_node->details->online = TRUE; if (!pcmk_is_set(remote_node->priv->flags, pcmk__node_seen)) { // Avoid unnecessary fence, since we will attempt connection remote_node->details->unclean = FALSE; } } else { crm_trace("Pacemaker Remote node %s will be shut down " "(%sassigned connection's next role is %s)", remote_node->priv->id, ((connection->priv->assigned_node == NULL)? "un" : ""), pcmk_role_text(connection->priv->next_role)); remote_node->details->shutdown = TRUE; } } /*! * \internal * \brief Assign a primitive resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a * node, set next role to stopped and update * existing actions * * \return Node that \p rsc is assigned to, if assigned entirely to one node * * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can * completely undo the assignment. A successful assignment can be either * undone or left alone as final. A failed assignment has the same effect * as calling pcmk__unassign_resource(); there are no side effects on * roles or actions. */ pcmk_node_t * pcmk__primitive_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer, bool stop_if_fail) { GList *this_with_colocations = NULL; GList *with_this_colocations = NULL; GList *iter = NULL; pcmk_resource_t *parent = NULL; pcmk__colocation_t *colocation = NULL; pcmk_scheduler_t *scheduler = NULL; pcmk__assert(pcmk__is_primitive(rsc)); scheduler = rsc->priv->scheduler; parent = rsc->priv->parent; // Never assign a child without parent being assigned first if ((parent != NULL) && !pcmk_is_set(parent->flags, pcmk__rsc_assigning)) { pcmk__rsc_debug(rsc, "%s: Assigning parent %s first", rsc->id, parent->id); parent->priv->cmds->assign(parent, prefer, stop_if_fail); } if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) { // Assignment has already been done const char *node_name = "no node"; if (rsc->priv->assigned_node != NULL) { node_name = pcmk__node_name(rsc->priv->assigned_node); } pcmk__rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id, node_name); return rsc->priv->assigned_node; } // Ensure we detect assignment loops if (pcmk_is_set(rsc->flags, pcmk__rsc_assigning)) { pcmk__rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id); return NULL; } pcmk__set_rsc_flags(rsc, pcmk__rsc_assigning); pe__show_node_scores(true, rsc, "Pre-assignment", rsc->priv->allowed_nodes, scheduler); this_with_colocations = pcmk__this_with_colocations(rsc); with_this_colocations = pcmk__with_this_colocations(rsc); // Apply mandatory colocations first, to satisfy as many as possible for (iter = this_with_colocations; iter != NULL; iter = iter->next) { colocation = iter->data; if ((colocation->score <= -PCMK_SCORE_INFINITY) || (colocation->score >= PCMK_SCORE_INFINITY)) { apply_this_with(colocation, rsc); } } for (iter = with_this_colocations; iter != NULL; iter = iter->next) { colocation = iter->data; if ((colocation->score <= -PCMK_SCORE_INFINITY) || (colocation->score >= PCMK_SCORE_INFINITY)) { pcmk__add_dependent_scores(colocation, rsc); } } pe__show_node_scores(true, rsc, "Mandatory-colocations", rsc->priv->allowed_nodes, scheduler); // Then apply optional colocations for (iter = this_with_colocations; iter != NULL; iter = iter->next) { colocation = iter->data; if ((colocation->score > -PCMK_SCORE_INFINITY) && (colocation->score < PCMK_SCORE_INFINITY)) { apply_this_with(colocation, rsc); } } for (iter = with_this_colocations; iter != NULL; iter = iter->next) { colocation = iter->data; if ((colocation->score > -PCMK_SCORE_INFINITY) && (colocation->score < PCMK_SCORE_INFINITY)) { pcmk__add_dependent_scores(colocation, rsc); } } g_list_free(this_with_colocations); g_list_free(with_this_colocations); if (rsc->priv->next_role == pcmk_role_stopped) { pcmk__rsc_trace(rsc, "Banning %s from all nodes because it will be stopped", rsc->id); resource_location(rsc, NULL, -PCMK_SCORE_INFINITY, PCMK_META_TARGET_ROLE, scheduler); } else if ((rsc->priv->next_role > rsc->priv->orig_role) && !pcmk_is_set(scheduler->flags, pcmk__sched_quorate) && (scheduler->no_quorum_policy == pcmk_no_quorum_freeze)) { crm_notice("Resource %s cannot be elevated from %s to %s due to " PCMK_OPT_NO_QUORUM_POLICY "=" PCMK_VALUE_FREEZE, rsc->id, pcmk_role_text(rsc->priv->orig_role), pcmk_role_text(rsc->priv->next_role)); pe__set_next_role(rsc, rsc->priv->orig_role, PCMK_OPT_NO_QUORUM_POLICY "=" PCMK_VALUE_FREEZE); } pe__show_node_scores(!pcmk_is_set(scheduler->flags, pcmk__sched_output_scores), rsc, __func__, rsc->priv->allowed_nodes, scheduler); // Unmanage resource if fencing is enabled but no device is configured if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled) && !pcmk_is_set(scheduler->flags, pcmk__sched_have_fencing)) { pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed); } if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { // Unmanaged resources stay on their current node const char *reason = NULL; pcmk_node_t *assign_to = NULL; pe__set_next_role(rsc, rsc->priv->orig_role, "unmanaged"); assign_to = pcmk__current_node(rsc); if (assign_to == NULL) { reason = "inactive"; } else if (rsc->priv->orig_role == pcmk_role_promoted) { reason = "promoted"; } else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { reason = "failed"; } else { reason = "active"; } pcmk__rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id, (assign_to? assign_to->priv->name : "no node"), reason); pcmk__assign_resource(rsc, assign_to, true, stop_if_fail); } else if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_all)) { // Must stop at some point, but be consistent with stop_if_fail if (stop_if_fail) { pcmk__rsc_debug(rsc, "Forcing %s to stop: " PCMK_OPT_STOP_ALL_RESOURCES, rsc->id); } pcmk__assign_resource(rsc, NULL, true, stop_if_fail); } else if (!assign_best_node(rsc, prefer, stop_if_fail)) { // Assignment failed if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { pcmk__rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); } else if ((rsc->priv->active_nodes != NULL) && stop_if_fail) { pcmk__rsc_info(rsc, "Stopping removed resource %s", rsc->id); } } pcmk__clear_rsc_flags(rsc, pcmk__rsc_assigning); if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) { remote_connection_assigned(rsc); } return rsc->priv->assigned_node; } /*! * \internal * \brief Schedule actions to bring resource down and back to current role * * \param[in,out] rsc Resource to restart * \param[in,out] current Node that resource should be brought down on * \param[in] need_stop Whether the resource must be stopped * \param[in] need_promote Whether the resource must be promoted * * \return Role that resource would have after scheduled actions are taken */ static void schedule_restart_actions(pcmk_resource_t *rsc, pcmk_node_t *current, bool need_stop, bool need_promote) { enum rsc_role_e role = rsc->priv->orig_role; enum rsc_role_e next_role; rsc_transition_fn fn = NULL; pcmk__set_rsc_flags(rsc, pcmk__rsc_restarting); // Bring resource down to a stop on its current node while (role != pcmk_role_stopped) { next_role = rsc_state_matrix[role][pcmk_role_stopped]; pcmk__rsc_trace(rsc, "Creating %s action to take %s down from %s to %s", (need_stop? "required" : "optional"), rsc->id, pcmk_role_text(role), pcmk_role_text(next_role)); fn = rsc_action_matrix[role][next_role]; if (fn == NULL) { break; } fn(rsc, current, !need_stop); role = next_role; } // Bring resource up to its next role on its next node while ((rsc->priv->orig_role <= rsc->priv->next_role) && (role != rsc->priv->orig_role) && !pcmk_is_set(rsc->flags, pcmk__rsc_blocked)) { bool required = need_stop; next_role = rsc_state_matrix[role][rsc->priv->orig_role]; if ((next_role == pcmk_role_promoted) && need_promote) { required = true; } pcmk__rsc_trace(rsc, "Creating %s action to take %s up from %s to %s", (required? "required" : "optional"), rsc->id, pcmk_role_text(role), pcmk_role_text(next_role)); fn = rsc_action_matrix[role][next_role]; if (fn == NULL) { break; } fn(rsc, rsc->priv->assigned_node, !required); role = next_role; } pcmk__clear_rsc_flags(rsc, pcmk__rsc_restarting); } /*! * \internal * \brief If a resource's next role is not explicitly specified, set a default * * \param[in,out] rsc Resource to set next role for * * \return "explicit" if next role was explicitly set, otherwise "implicit" */ static const char * set_default_next_role(pcmk_resource_t *rsc) { if (rsc->priv->next_role != pcmk_role_unknown) { return "explicit"; } if (rsc->priv->assigned_node == NULL) { pe__set_next_role(rsc, pcmk_role_stopped, "assignment"); } else { pe__set_next_role(rsc, pcmk_role_started, "assignment"); } return "implicit"; } /*! * \internal * \brief Create an action to represent an already pending start * * \param[in,out] rsc Resource to create start action for */ static void create_pending_start(pcmk_resource_t *rsc) { pcmk_action_t *start = NULL; pcmk__rsc_trace(rsc, "Creating action for %s to represent already pending start", rsc->id); start = start_action(rsc, rsc->priv->assigned_node, TRUE); pcmk__set_action_flags(start, pcmk__action_always_in_graph); } /*! * \internal * \brief Schedule actions needed to take a resource to its next role * * \param[in,out] rsc Resource to schedule actions for */ static void schedule_role_transition_actions(pcmk_resource_t *rsc) { enum rsc_role_e role = rsc->priv->orig_role; while (role != rsc->priv->next_role) { enum rsc_role_e next_role = rsc_state_matrix[role][rsc->priv->next_role]; rsc_transition_fn fn = NULL; pcmk__rsc_trace(rsc, "Creating action to take %s from %s to %s " "(ending at %s)", rsc->id, pcmk_role_text(role), pcmk_role_text(next_role), pcmk_role_text(rsc->priv->next_role)); fn = rsc_action_matrix[role][next_role]; if (fn == NULL) { break; } fn(rsc, rsc->priv->assigned_node, false); role = next_role; } } /*! * \internal * \brief Create all actions needed for a given primitive resource * * \param[in,out] rsc Primitive resource to create actions for */ void pcmk__primitive_create_actions(pcmk_resource_t *rsc) { bool need_stop = false; bool need_promote = false; bool is_moving = false; bool allow_migrate = false; bool multiply_active = false; pcmk_node_t *current = NULL; pcmk_node_t *migration_target = NULL; unsigned int num_all_active = 0; unsigned int num_clean_active = 0; const char *next_role_source = NULL; pcmk__assert(pcmk__is_primitive(rsc)); next_role_source = set_default_next_role(rsc); pcmk__rsc_trace(rsc, "Creating all actions for %s transition from %s to %s " "(%s) on %s", rsc->id, pcmk_role_text(rsc->priv->orig_role), pcmk_role_text(rsc->priv->next_role), next_role_source, pcmk__node_name(rsc->priv->assigned_node)); current = rsc->priv->fns->active_node(rsc, &num_all_active, &num_clean_active); g_list_foreach(rsc->priv->dangling_migration_sources, pcmk__abort_dangling_migration, rsc); if ((current != NULL) && (rsc->priv->assigned_node != NULL) && !pcmk__same_node(current, rsc->priv->assigned_node) && (rsc->priv->next_role >= pcmk_role_started)) { pcmk__rsc_trace(rsc, "Moving %s from %s to %s", rsc->id, pcmk__node_name(current), pcmk__node_name(rsc->priv->assigned_node)); is_moving = true; allow_migrate = pcmk__rsc_can_migrate(rsc, current); // This is needed even if migrating (though I'm not sure why ...) need_stop = true; } // Check whether resource is partially migrated and/or multiply active migration_target = rsc->priv->partial_migration_target; if ((rsc->priv->partial_migration_source != NULL) && (migration_target != NULL) && allow_migrate && (num_all_active == 2) && pcmk__same_node(current, rsc->priv->partial_migration_source) && pcmk__same_node(rsc->priv->assigned_node, migration_target)) { /* A partial migration is in progress, and the migration target remains * the same as when the migration began. */ pcmk__rsc_trace(rsc, "Partial migration of %s from %s to %s will continue", rsc->id, pcmk__node_name(rsc->priv->partial_migration_source), pcmk__node_name(migration_target)); } else if ((rsc->priv->partial_migration_source != NULL) || (migration_target != NULL)) { // A partial migration is in progress but can't be continued if (num_all_active > 2) { // The resource is migrating *and* multiply active! crm_notice("Forcing recovery of %s because it is migrating " "from %s to %s and possibly active elsewhere", rsc->id, pcmk__node_name(rsc->priv->partial_migration_source), pcmk__node_name(migration_target)); } else { // The migration source or target isn't available crm_notice("Forcing recovery of %s because it can no longer " "migrate from %s to %s", rsc->id, pcmk__node_name(rsc->priv->partial_migration_source), pcmk__node_name(migration_target)); } need_stop = true; rsc->priv->partial_migration_source = NULL; rsc->priv->partial_migration_target = NULL; allow_migrate = false; } else if (pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing)) { multiply_active = (num_all_active > 1); } else { /* If a resource has PCMK_META_REQUIRES set to PCMK_VALUE_NOTHING or * PCMK_VALUE_QUORUM, don't consider it active on unclean nodes (similar * to how all resources behave when PCMK_OPT_STONITH_ENABLED is false). * We can start such resources elsewhere before fencing completes, and * if we considered the resource active on the failed node, we would * attempt recovery for being active on multiple nodes. */ multiply_active = (num_clean_active > 1); } if (multiply_active) { const char *class = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS); // Resource was (possibly) incorrectly multiply active pcmk__sched_err(rsc->priv->scheduler, "%s resource %s might be active on %u nodes (%s)", pcmk__s(class, "Untyped"), rsc->id, num_all_active, pcmk__multiply_active_text(rsc)); crm_notice("For more information, see \"What are multiply active " "resources?\" at " "https://projects.clusterlabs.org/w/clusterlabs/faq/"); switch (rsc->priv->multiply_active_policy) { case pcmk__multiply_active_restart: need_stop = true; break; case pcmk__multiply_active_unexpected: need_stop = true; // stop_resource() will skip expected node pcmk__set_rsc_flags(rsc, pcmk__rsc_stop_unexpected); break; default: break; } } else { pcmk__clear_rsc_flags(rsc, pcmk__rsc_stop_unexpected); } if (pcmk_is_set(rsc->flags, pcmk__rsc_start_pending)) { create_pending_start(rsc); } if (is_moving) { // Remaining tests are only for resources staying where they are } else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { if (pcmk_is_set(rsc->flags, pcmk__rsc_stop_if_failed)) { need_stop = true; pcmk__rsc_trace(rsc, "Recovering %s", rsc->id); } else { pcmk__rsc_trace(rsc, "Recovering %s by demotion", rsc->id); if (rsc->priv->next_role == pcmk_role_promoted) { need_promote = true; } } } else if (pcmk_is_set(rsc->flags, pcmk__rsc_blocked)) { pcmk__rsc_trace(rsc, "Blocking further actions on %s", rsc->id); need_stop = true; } else if ((rsc->priv->orig_role > pcmk_role_started) && (current != NULL) && (rsc->priv->assigned_node != NULL)) { pcmk_action_t *start = NULL; pcmk__rsc_trace(rsc, "Creating start action for promoted resource %s", rsc->id); start = start_action(rsc, rsc->priv->assigned_node, TRUE); if (!pcmk_is_set(start->flags, pcmk__action_optional)) { // Recovery of a promoted resource pcmk__rsc_trace(rsc, "%s restart is required for recovery", rsc->id); need_stop = true; } } // Create any actions needed to bring resource down and back up to same role schedule_restart_actions(rsc, current, need_stop, need_promote); // Create any actions needed to take resource from this role to the next schedule_role_transition_actions(rsc); pcmk__create_recurring_actions(rsc); if (allow_migrate) { pcmk__create_migration_actions(rsc, current); } } /*! * \internal * \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes * * \param[in] rsc Resource to check */ static void rsc_avoids_remote_nodes(const pcmk_resource_t *rsc) { GHashTableIter iter; pcmk_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if (node->priv->remote != NULL) { node->assign->score = -PCMK_SCORE_INFINITY; } } } /*! * \internal * \brief Return allowed nodes as (possibly sorted) list * * Convert a resource's hash table of allowed nodes to a list. If printing to * stdout, sort the list, to keep action ID numbers consistent for regression * test output (while avoiding the performance hit on a live cluster). * * \param[in] rsc Resource to check for allowed nodes * * \return List of resource's allowed nodes * \note Callers should take care not to rely on the list being sorted. */ static GList * allowed_nodes_as_list(const pcmk_resource_t *rsc) { GList *allowed_nodes = NULL; if (rsc->priv->allowed_nodes != NULL) { allowed_nodes = g_hash_table_get_values(rsc->priv->allowed_nodes); } if (!pcmk__is_daemon) { allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name); } return allowed_nodes; } /*! * \internal * \brief Create implicit constraints needed for a primitive resource * * \param[in,out] rsc Primitive resource to create implicit constraints for */ void pcmk__primitive_internal_constraints(pcmk_resource_t *rsc) { GList *allowed_nodes = NULL; bool check_unfencing = false; bool check_utilization = false; pcmk_scheduler_t *scheduler = NULL; pcmk__assert(pcmk__is_primitive(rsc)); scheduler = rsc->priv->scheduler; if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { pcmk__rsc_trace(rsc, "Skipping implicit constraints for unmanaged resource " "%s", rsc->id); return; } // Whether resource requires unfencing check_unfencing = !pcmk_is_set(rsc->flags, pcmk__rsc_fence_device) && pcmk_is_set(scheduler->flags, pcmk__sched_enable_unfencing) && pcmk_is_set(rsc->flags, pcmk__rsc_needs_unfencing); // Whether a non-default placement strategy is used check_utilization = (g_hash_table_size(rsc->priv->utilization) > 0) && !pcmk__str_eq(scheduler->priv->placement_strategy, PCMK_VALUE_DEFAULT, pcmk__str_casei); // Order stops before starts (i.e. restart) pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL, rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL, pcmk__ar_ordered |pcmk__ar_first_implies_then |pcmk__ar_intermediate_stop, scheduler); // Promotable ordering: demote before stop, start before promote if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags, pcmk__rsc_promotable) || (rsc->priv->orig_role > pcmk_role_unpromoted)) { pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_DEMOTE, 0), NULL, rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL, pcmk__ar_promoted_then_implies_first, scheduler); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL, rsc, pcmk__op_key(rsc->id, PCMK_ACTION_PROMOTE, 0), NULL, pcmk__ar_unrunnable_first_blocks, scheduler); } // Don't clear resource history if probing on same node pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_LRM_DELETE, 0), NULL, rsc, pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0), NULL, pcmk__ar_if_on_same_node|pcmk__ar_then_cancels_first, scheduler); // Certain checks need allowed nodes if (check_unfencing || check_utilization || (rsc->priv->launcher != NULL)) { allowed_nodes = allowed_nodes_as_list(rsc); } if (check_unfencing) { g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc); } if (check_utilization) { pcmk__create_utilization_constraints(rsc, allowed_nodes); } if (rsc->priv->launcher != NULL) { pcmk_resource_t *remote_rsc = NULL; if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) { // rsc is the implicit remote connection for a guest or bundle node /* Guest resources are not allowed to run on Pacemaker Remote nodes, * to avoid nesting remotes. However, bundles are allowed. */ if (!pcmk_is_set(rsc->flags, pcmk__rsc_remote_nesting_allowed)) { rsc_avoids_remote_nodes(rsc->priv->launcher); } /* If someone cleans up a guest or bundle node's launcher, we will * likely schedule a (re-)probe of the launcher and recovery of the * connection. Order the connection stop after the launcher probe, * so that if we detect the launcher running, we will trigger a new * transition and avoid the unnecessary recovery. */ pcmk__order_resource_actions(rsc->priv->launcher, PCMK_ACTION_MONITOR, rsc, PCMK_ACTION_STOP, pcmk__ar_ordered); /* A user can specify that a resource must start on a Pacemaker Remote * node by explicitly configuring it with the PCMK__META_CONTAINER * meta-attribute. This is of questionable merit, since location * constraints can accomplish the same thing. But we support it, so here * we check whether a resource (that is not itself a remote connection) * has PCMK__META_CONTAINER set to a remote node or guest node resource. */ } else if (pcmk_is_set(rsc->priv->launcher->flags, pcmk__rsc_is_remote_connection)) { remote_rsc = rsc->priv->launcher; } else { remote_rsc = pe__resource_contains_guest_node(scheduler, rsc->priv->launcher); } if (remote_rsc != NULL) { /* Force the resource on the Pacemaker Remote node instead of * colocating the resource with the launcher. */ for (GList *item = allowed_nodes; item; item = item->next) { pcmk_node_t *node = item->data; if (node->priv->remote != remote_rsc) { node->assign->score = -PCMK_SCORE_INFINITY; } } } else { /* This resource is either launched by a resource that does NOT * represent a Pacemaker Remote node, or a Pacemaker Remote * connection resource for a guest node or bundle. */ int score; crm_trace("Order and colocate %s relative to its launcher %s", rsc->id, rsc->priv->launcher->id); pcmk__new_ordering(rsc->priv->launcher, pcmk__op_key(rsc->priv->launcher->id, PCMK_ACTION_START, 0), NULL, rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL, pcmk__ar_first_implies_then |pcmk__ar_unrunnable_first_blocks, scheduler); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL, rsc->priv->launcher, pcmk__op_key(rsc->priv->launcher->id, PCMK_ACTION_STOP, 0), NULL, pcmk__ar_then_implies_first, scheduler); if (pcmk_is_set(rsc->flags, pcmk__rsc_remote_nesting_allowed) /* @TODO: && non-bundle Pacemaker Remote nodes exist */) { score = 10000; /* Highly preferred but not essential */ } else { score = PCMK_SCORE_INFINITY; // Force to run on same host } pcmk__new_colocation("#resource-with-container", NULL, score, rsc, rsc->priv->launcher, NULL, NULL, pcmk__coloc_influence); } } if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection) || pcmk_is_set(rsc->flags, pcmk__rsc_fence_device)) { /* Remote connections and fencing devices are not allowed to run on * Pacemaker Remote nodes */ rsc_avoids_remote_nodes(rsc); } g_list_free(allowed_nodes); } /*! * \internal * \brief Apply a colocation's score to node scores or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node scores (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent * * \return The score added to the dependent's priority */ int pcmk__primitive_apply_coloc_score(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent) { enum pcmk__coloc_affects filter_results; pcmk__assert((dependent != NULL) && (primary != NULL) && (colocation != NULL)); if (for_dependent) { // Always process on behalf of primary resource return primary->priv->cmds->apply_coloc_score(dependent, primary, colocation, false); } filter_results = pcmk__colocation_affects(dependent, primary, colocation, false); pcmk__rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)", ((colocation->score > 0)? "Colocating" : "Anti-colocating"), dependent->id, primary->id, colocation->id, colocation->score, filter_results); switch (filter_results) { case pcmk__coloc_affects_role: return pcmk__apply_coloc_to_priority(dependent, primary, colocation); case pcmk__coloc_affects_location: pcmk__apply_coloc_to_scores(dependent, primary, colocation); return 0; default: // pcmk__coloc_affects_nothing return 0; } } /* Primitive implementation of * pcmk__assignment_methods_t:with_this_colocations() */ void pcmk__with_primitive_colocations(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList **list) { const pcmk_resource_t *parent = NULL; pcmk__assert(pcmk__is_primitive(rsc) && (list != NULL)); parent = rsc->priv->parent; if (rsc == orig_rsc) { /* For the resource itself, add all of its own colocations and relevant * colocations from its parent (if any). */ pcmk__add_with_this_list(list, rsc->priv->with_this_colocations, orig_rsc); if (parent != NULL) { parent->priv->cmds->with_this_colocations(parent, orig_rsc, list); } } else { // For an ancestor, add only explicitly configured constraints for (GList *iter = rsc->priv->with_this_colocations; iter != NULL; iter = iter->next) { pcmk__colocation_t *colocation = iter->data; if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) { pcmk__add_with_this(list, colocation, orig_rsc); } } } } /* Primitive implementation of * pcmk__assignment_methods_t:this_with_colocations() */ void pcmk__primitive_with_colocations(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList **list) { const pcmk_resource_t *parent = NULL; pcmk__assert(pcmk__is_primitive(rsc) && (list != NULL)); parent = rsc->priv->parent; if (rsc == orig_rsc) { /* For the resource itself, add all of its own colocations and relevant * colocations from its parent (if any). */ pcmk__add_this_with_list(list, rsc->priv->this_with_colocations, orig_rsc); if (parent != NULL) { parent->priv->cmds->this_with_colocations(parent, orig_rsc, list); } } else { // For an ancestor, add only explicitly configured constraints for (GList *iter = rsc->priv->this_with_colocations; iter != NULL; iter = iter->next) { pcmk__colocation_t *colocation = iter->data; if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) { pcmk__add_this_with(list, colocation, orig_rsc); } } } } /*! * \internal * \brief Return action flags for a given primitive resource action * * \param[in,out] action Action to get flags for * \param[in] node If not NULL, limit effects to this node (ignored) * * \return Flags appropriate to \p action on \p node */ uint32_t pcmk__primitive_action_flags(pcmk_action_t *action, const pcmk_node_t *node) { pcmk__assert(action != NULL); return (uint32_t) action->flags; } /*! * \internal * \brief Check whether a node is a multiply active resource's expected node * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return \c true if \p rsc is multiply active with * \c PCMK_META_MULTIPLE_ACTIVE set to \c PCMK_VALUE_STOP_UNEXPECTED, * and \p node is the node where it will remain active * \note This assumes that the resource's next role cannot be changed to stopped * after this is called, which should be reasonable if status has already * been unpacked and resources have been assigned to nodes. */ static bool is_expected_node(const pcmk_resource_t *rsc, const pcmk_node_t *node) { return pcmk_all_flags_set(rsc->flags, pcmk__rsc_stop_unexpected|pcmk__rsc_restarting) && (rsc->priv->next_role > pcmk_role_stopped) && pcmk__same_node(rsc->priv->assigned_node, node); } /*! * \internal * \brief Schedule actions needed to stop a resource wherever it is active * * \param[in,out] rsc Resource being stopped * \param[in] node Node where resource is being stopped (ignored) * \param[in] optional Whether actions should be optional */ static void stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional) { for (GList *iter = rsc->priv->active_nodes; iter != NULL; iter = iter->next) { pcmk_node_t *current = (pcmk_node_t *) iter->data; pcmk_action_t *stop = NULL; if (is_expected_node(rsc, current)) { /* We are scheduling restart actions for a multiply active resource * with PCMK_META_MULTIPLE_ACTIVE=PCMK_VALUE_STOP_UNEXPECTED, and * this is where it should not be stopped. */ pcmk__rsc_trace(rsc, "Skipping stop of multiply active resource %s " "on expected node %s", rsc->id, pcmk__node_name(current)); continue; } if (rsc->priv->partial_migration_target != NULL) { // Continue migration if node originally was and remains target if (pcmk__same_node(current, rsc->priv->partial_migration_target) && pcmk__same_node(current, rsc->priv->assigned_node)) { pcmk__rsc_trace(rsc, "Skipping stop of %s on %s " "because partial migration there will continue", rsc->id, pcmk__node_name(current)); continue; } else { pcmk__rsc_trace(rsc, "Forcing stop of %s on %s " "because migration target changed", rsc->id, pcmk__node_name(current)); optional = false; } } pcmk__rsc_trace(rsc, "Scheduling stop of %s on %s", rsc->id, pcmk__node_name(current)); stop = stop_action(rsc, current, optional); if (rsc->priv->assigned_node == NULL) { pe_action_set_reason(stop, "node availability", true); } else if (pcmk_all_flags_set(rsc->flags, pcmk__rsc_restarting |pcmk__rsc_stop_unexpected)) { /* We are stopping a multiply active resource on a node that is * not its expected node, and we are still scheduling restart * actions, so the stop is for being multiply active. */ pe_action_set_reason(stop, "being multiply active", true); } if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { pcmk__clear_action_flags(stop, pcmk__action_runnable); } if (pcmk_is_set(rsc->flags, pcmk__rsc_needs_unfencing)) { pcmk_action_t *unfence = pe_fence_op(current, PCMK_ACTION_ON, true, NULL, false, rsc->priv->scheduler); order_actions(stop, unfence, pcmk__ar_then_implies_first); if (!pcmk__node_unfenced(current)) { pcmk__sched_err(rsc->priv->scheduler, "Stopping %s until %s can be unfenced", rsc->id, pcmk__node_name(current)); } } } } /*! * \internal * \brief Schedule actions needed to start a resource on a node * * \param[in,out] rsc Resource being started * \param[in,out] node Node where resource should be started * \param[in] optional Whether actions should be optional */ static void start_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional) { pcmk_action_t *start = NULL; pcmk__assert(node != NULL); pcmk__rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)", (optional? "optional" : "required"), rsc->id, pcmk__node_name(node), node->assign->score); start = start_action(rsc, node, TRUE); pcmk__order_vs_unfence(rsc, node, start, pcmk__ar_first_implies_then); if (pcmk_is_set(start->flags, pcmk__action_runnable) && !optional) { pcmk__clear_action_flags(start, pcmk__action_optional); } if (is_expected_node(rsc, node)) { /* This could be a problem if the start becomes necessary for other * reasons later. */ pcmk__rsc_trace(rsc, "Start of multiply active resouce %s " "on expected node %s will be a pseudo-action", rsc->id, pcmk__node_name(node)); pcmk__set_action_flags(start, pcmk__action_pseudo); } } /*! * \internal * \brief Schedule actions needed to promote a resource on a node * * \param[in,out] rsc Resource being promoted * \param[in] node Node where resource should be promoted * \param[in] optional Whether actions should be optional */ static void promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional) { GList *iter = NULL; GList *action_list = NULL; bool runnable = true; pcmk__assert(node != NULL); // Any start must be runnable for promotion to be runnable action_list = pe__resource_actions(rsc, node, PCMK_ACTION_START, true); for (iter = action_list; iter != NULL; iter = iter->next) { pcmk_action_t *start = (pcmk_action_t *) iter->data; if (!pcmk_is_set(start->flags, pcmk__action_runnable)) { runnable = false; } } g_list_free(action_list); if (runnable) { pcmk_action_t *promote = promote_action(rsc, node, optional); pcmk__rsc_trace(rsc, "Scheduling %s promotion of %s on %s", (optional? "optional" : "required"), rsc->id, pcmk__node_name(node)); if (is_expected_node(rsc, node)) { /* This could be a problem if the promote becomes necessary for * other reasons later. */ pcmk__rsc_trace(rsc, "Promotion of multiply active resouce %s " "on expected node %s will be a pseudo-action", rsc->id, pcmk__node_name(node)); pcmk__set_action_flags(promote, pcmk__action_pseudo); } } else { pcmk__rsc_trace(rsc, "Not promoting %s on %s: start unrunnable", rsc->id, pcmk__node_name(node)); action_list = pe__resource_actions(rsc, node, PCMK_ACTION_PROMOTE, true); for (iter = action_list; iter != NULL; iter = iter->next) { pcmk_action_t *promote = (pcmk_action_t *) iter->data; pcmk__clear_action_flags(promote, pcmk__action_runnable); } g_list_free(action_list); } } /*! * \internal * \brief Schedule actions needed to demote a resource wherever it is active * * \param[in,out] rsc Resource being demoted * \param[in] node Node where resource should be demoted (ignored) * \param[in] optional Whether actions should be optional */ static void demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional) { /* Since this will only be called for a primitive (possibly as an instance * of a collective resource), the resource is multiply active if it is * running on more than one node, so we want to demote on all of them as * part of recovery, regardless of which one is the desired node. */ for (GList *iter = rsc->priv->active_nodes; iter != NULL; iter = iter->next) { pcmk_node_t *current = (pcmk_node_t *) iter->data; if (is_expected_node(rsc, current)) { pcmk__rsc_trace(rsc, "Skipping demote of multiply active resource %s " "on expected node %s", rsc->id, pcmk__node_name(current)); } else { pcmk__rsc_trace(rsc, "Scheduling %s demotion of %s on %s", (optional? "optional" : "required"), rsc->id, pcmk__node_name(current)); demote_action(rsc, current, optional); } } } static void assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional) { pcmk__assert(false); } /*! * \internal * \brief Schedule cleanup of a resource * * \param[in,out] rsc Resource to clean up * \param[in] node Node to clean up on * \param[in] optional Whether clean-up should be optional */ void pcmk__schedule_cleanup(pcmk_resource_t *rsc, const pcmk_node_t *node, bool optional) { /* If the cleanup is required, its orderings are optional, because they're * relevant only if both actions are required. Conversely, if the cleanup is * optional, the orderings make the then action required if the first action * becomes required. */ uint32_t flag = optional? pcmk__ar_first_implies_then : pcmk__ar_ordered; CRM_CHECK((rsc != NULL) && (node != NULL), return); if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { pcmk__rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed", rsc->id, pcmk__node_name(node)); return; } - if (node->details->unclean || !node->details->online) { + if (!pcmk__node_available(node, pcmk__node_alive)) { pcmk__rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable", rsc->id, pcmk__node_name(node)); return; } crm_notice("Scheduling clean-up of %s on %s", rsc->id, pcmk__node_name(node)); delete_action(rsc, node, optional); // stop -> clean-up -> start pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP, rsc, PCMK_ACTION_DELETE, flag); pcmk__order_resource_actions(rsc, PCMK_ACTION_DELETE, rsc, PCMK_ACTION_START, flag); } /*! * \internal * \brief Add primitive meta-attributes relevant to graph actions to XML * * \param[in] rsc Primitive resource whose meta-attributes should be added * \param[in,out] xml Transition graph action attributes XML to add to */ void pcmk__primitive_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml) { char *name = NULL; char *value = NULL; const pcmk_resource_t *parent = NULL; pcmk__assert(pcmk__is_primitive(rsc) && (xml != NULL)); /* Clone instance numbers get set internally as meta-attributes, and are * needed in the transition graph (for example, to tell unique clone * instances apart). */ value = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CLONE); if (value != NULL) { name = crm_meta_name(PCMK__META_CLONE); crm_xml_add(xml, name, value); free(name); } // Not sure if this one is really needed ... value = g_hash_table_lookup(rsc->priv->meta, PCMK_META_REMOTE_NODE); if (value != NULL) { name = crm_meta_name(PCMK_META_REMOTE_NODE); crm_xml_add(xml, name, value); free(name); } /* The PCMK__META_CONTAINER meta-attribute can be set on the primitive * itself or one of its ancestors, so check them all and keep the highest. */ for (parent = rsc; parent != NULL; parent = parent->priv->parent) { if (parent->priv->launcher != NULL) { crm_xml_add(xml, CRM_META "_" PCMK__META_CONTAINER, parent->priv->launcher->id); } } /* Bundle replica children will get their external-ip set internally as a * meta-attribute. The graph action needs it, but under a different naming * convention than other meta-attributes. */ value = g_hash_table_lookup(rsc->priv->meta, "external-ip"); if (value != NULL) { crm_xml_add(xml, "pcmk_external_ip", value); } } // Primitive implementation of pcmk__assignment_methods_t:add_utilization() void pcmk__primitive_add_utilization(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { pcmk__assert(pcmk__is_primitive(rsc) && (orig_rsc != NULL) && (utilization != NULL)); if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) { return; } pcmk__rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization", orig_rsc->id, rsc->id); pcmk__release_node_capacity(utilization, rsc); } /*! * \internal * \brief Get epoch time of node's shutdown attribute (or now if none) * * \param[in,out] node Node to check * * \return Epoch time corresponding to shutdown attribute if set or now if not */ static time_t shutdown_time(pcmk_node_t *node) { const char *shutdown = pcmk__node_attr(node, PCMK__NODE_ATTR_SHUTDOWN, NULL, pcmk__rsc_node_current); time_t result = 0; if (shutdown != NULL) { long long result_ll; int rc = pcmk__scan_ll(shutdown, &result_ll, 0LL); if (rc == pcmk_rc_ok) { result = (time_t) result_ll; } else { crm_warn("Ignoring invalid value '%s' for %s " PCMK__NODE_ATTR_SHUTDOWN " attribute: %s", shutdown, pcmk__node_name(node), pcmk_rc_str(rc)); } } if (result == 0) { result = pcmk__scheduler_epoch_time(node->priv->scheduler); } return result; } /*! * \internal * \brief Ban a resource from a node if it's not locked to the node * * \param[in] data Node to check * \param[in,out] user_data Resource to check */ static void ban_if_not_locked(gpointer data, gpointer user_data) { const pcmk_node_t *node = (const pcmk_node_t *) data; pcmk_resource_t *rsc = (pcmk_resource_t *) user_data; if (!pcmk__same_node(node, rsc->priv->lock_node)) { resource_location(rsc, node, -PCMK_SCORE_INFINITY, PCMK_OPT_SHUTDOWN_LOCK, rsc->priv->scheduler); } } // Primitive implementation of pcmk__assignment_methods_t:shutdown_lock() void pcmk__primitive_shutdown_lock(pcmk_resource_t *rsc) { pcmk_scheduler_t *scheduler = NULL; pcmk__assert(pcmk__is_primitive(rsc)); scheduler = rsc->priv->scheduler; // Fence devices and remote connections can't be locked if (pcmk_any_flags_set(rsc->flags, pcmk__rsc_fence_device |pcmk__rsc_is_remote_connection)) { return; } if (rsc->priv->lock_node != NULL) { // The lock was obtained from resource history if (rsc->priv->active_nodes != NULL) { /* The resource was started elsewhere even though it is now * considered locked. This shouldn't be possible, but as a * failsafe, we don't want to disturb the resource now. */ pcmk__rsc_info(rsc, "Cancelling shutdown lock " "because %s is already active", rsc->id); pe__clear_resource_history(rsc, rsc->priv->lock_node); rsc->priv->lock_node = NULL; rsc->priv->lock_time = 0; } // Only a resource active on exactly one node can be locked } else if (pcmk__list_of_1(rsc->priv->active_nodes)) { pcmk_node_t *node = rsc->priv->active_nodes->data; if (node->details->shutdown) { if (node->details->unclean) { pcmk__rsc_debug(rsc, "Not locking %s to unclean %s for shutdown", rsc->id, pcmk__node_name(node)); } else { rsc->priv->lock_node = node; rsc->priv->lock_time = shutdown_time(node); } } } if (rsc->priv->lock_node == NULL) { // No lock needed return; } if (scheduler->priv->shutdown_lock_ms > 0U) { time_t lock_expiration = rsc->priv->lock_time + pcmk__timeout_ms2s(scheduler->priv->shutdown_lock_ms); pcmk__rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)", rsc->id, pcmk__node_name(rsc->priv->lock_node), (long long) lock_expiration); pcmk__update_recheck_time(++lock_expiration, scheduler, "shutdown lock expiration"); } else { pcmk__rsc_info(rsc, "Locking %s to %s due to shutdown", rsc->id, pcmk__node_name(rsc->priv->lock_node)); } // If resource is locked to one node, ban it from all other nodes g_list_foreach(scheduler->nodes, ban_if_not_locked, rsc); } diff --git a/lib/pacemaker/pcmk_sched_recurring.c b/lib/pacemaker/pcmk_sched_recurring.c index 88d8a7035b..299151b197 100644 --- a/lib/pacemaker/pcmk_sched_recurring.c +++ b/lib/pacemaker/pcmk_sched_recurring.c @@ -1,755 +1,754 @@ /* - * Copyright 2004-2024 the Pacemaker project contributors + * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include "libpacemaker_private.h" // Information parsed from an operation history entry in the CIB struct op_history { // XML attributes const char *id; // ID of history entry const char *name; // Action name // Parsed information char *key; // Operation key for action enum rsc_role_e role; // Action role (or pcmk_role_unknown for default) guint interval_ms; // Action interval }; /*! * \internal * \brief Parse an interval from XML * * \param[in] xml XML containing an interval attribute * * \return Interval parsed from XML (or 0 as default) */ static guint xe_interval(const xmlNode *xml) { guint interval_ms = 0U; pcmk_parse_interval_spec(crm_element_value(xml, PCMK_META_INTERVAL), &interval_ms); return interval_ms; } /*! * \internal * \brief Check whether an operation exists multiple times in resource history * * \param[in] rsc Resource with history to search * \param[in] name Name of action to search for * \param[in] interval_ms Interval (in milliseconds) of action to search for * * \return true if an operation with \p name and \p interval_ms exists more than * once in the operation history of \p rsc, otherwise false */ static bool is_op_dup(const pcmk_resource_t *rsc, const char *name, guint interval_ms) { const char *id = NULL; for (xmlNode *op = pcmk__xe_first_child(rsc->priv->ops_xml, PCMK_XE_OP, NULL, NULL); op != NULL; op = pcmk__xe_next(op, PCMK_XE_OP)) { // Check whether action name and interval match if (!pcmk__str_eq(crm_element_value(op, PCMK_XA_NAME), name, pcmk__str_none) || (xe_interval(op) != interval_ms)) { continue; } if (pcmk__xe_id(op) == NULL) { continue; // Shouldn't be possible } if (id == NULL) { id = pcmk__xe_id(op); // First matching op } else { pcmk__config_err("Operation %s is duplicate of %s (do not use " "same name and interval combination more " "than once per resource)", pcmk__xe_id(op), id); return true; } } return false; } /*! * \internal * \brief Check whether an action name is one that can be recurring * * \param[in] name Action name to check * * \return true if \p name is an action known to be unsuitable as a recurring * operation, otherwise false * * \note Pacemaker's current philosophy is to allow users to configure recurring * operations except for a short list of actions known not to be suitable * for that (as opposed to allowing only actions known to be suitable, * which includes only monitor). Among other things, this approach allows * users to define their own custom operations and make them recurring, * though that use case is not well tested. */ static bool op_cannot_recur(const char *name) { return pcmk__str_any_of(name, PCMK_ACTION_STOP, PCMK_ACTION_START, PCMK_ACTION_DEMOTE, PCMK_ACTION_PROMOTE, PCMK_ACTION_RELOAD_AGENT, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL); } /*! * \internal * \brief Check whether a resource history entry is for a recurring action * * \param[in] rsc Resource that history entry is for * \param[in] xml XML of resource history entry to check * \param[out] op Where to store parsed info if recurring * * \return true if \p xml is for a recurring action, otherwise false */ static bool is_recurring_history(const pcmk_resource_t *rsc, const xmlNode *xml, struct op_history *op) { const char *role = NULL; op->interval_ms = xe_interval(xml); if (op->interval_ms == 0) { return false; // Not recurring } op->id = pcmk__xe_id(xml); if (pcmk__str_empty(op->id)) { pcmk__config_err("Ignoring resource history entry without ID"); return false; // Shouldn't be possible (unless CIB was manually edited) } op->name = crm_element_value(xml, PCMK_XA_NAME); if (op_cannot_recur(op->name)) { pcmk__config_err("Ignoring %s because %s action cannot be recurring", op->id, pcmk__s(op->name, "unnamed")); return false; } // There should only be one recurring operation per action/interval if (is_op_dup(rsc, op->name, op->interval_ms)) { return false; } // Ensure role is valid if specified role = crm_element_value(xml, PCMK_XA_ROLE); if (role == NULL) { op->role = pcmk_role_unknown; } else { op->role = pcmk_parse_role(role); if (op->role == pcmk_role_unknown) { pcmk__config_err("Ignoring %s role because %s is not a valid role", op->id, role); return false; } } // Only actions that are still configured and enabled matter if (pcmk__find_action_config(rsc, op->name, op->interval_ms, false) == NULL) { pcmk__rsc_trace(rsc, "Ignoring %s (%s-interval %s for %s) because it is " "disabled or no longer in configuration", op->id, pcmk__readable_interval(op->interval_ms), op->name, rsc->id); return false; } op->key = pcmk__op_key(rsc->id, op->name, op->interval_ms); return true; } /*! * \internal * \brief Check whether a recurring action for an active role should be optional * * \param[in] rsc Resource that recurring action is for * \param[in] node Node that \p rsc will be active on (if any) * \param[in] key Operation key for recurring action to check * \param[in,out] start Start action for \p rsc * * \return true if recurring action should be optional, otherwise false */ static bool active_recurring_should_be_optional(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *key, pcmk_action_t *start) { GList *possible_matches = NULL; if (node == NULL) { // Should only be possible if unmanaged and stopped pcmk__rsc_trace(rsc, "%s will be mandatory because resource is unmanaged", key); return false; } if (!pcmk_is_set(rsc->priv->cmds->action_flags(start, NULL), pcmk__action_optional)) { pcmk__rsc_trace(rsc, "%s will be mandatory because %s is", key, start->uuid); return false; } possible_matches = find_actions_exact(rsc->priv->actions, key, node); if (possible_matches == NULL) { pcmk__rsc_trace(rsc, "%s will be mandatory because it is not active on %s", key, pcmk__node_name(node)); return false; } for (const GList *iter = possible_matches; iter != NULL; iter = iter->next) { const pcmk_action_t *op = (const pcmk_action_t *) iter->data; if (pcmk_is_set(op->flags, pcmk__action_reschedule)) { pcmk__rsc_trace(rsc, "%s will be mandatory because " "it needs to be rescheduled", key); g_list_free(possible_matches); return false; } } g_list_free(possible_matches); return true; } /*! * \internal * \brief Create recurring action from resource history entry for an active role * * \param[in,out] rsc Resource that resource history is for * \param[in,out] start Start action for \p rsc on \p node * \param[in] node Node that resource will be active on (if any) * \param[in] op Resource history entry */ static void recurring_op_for_active(pcmk_resource_t *rsc, pcmk_action_t *start, const pcmk_node_t *node, const struct op_history *op) { pcmk_action_t *mon = NULL; bool is_optional = true; bool role_match = false; enum rsc_role_e monitor_role = op->role; // We're only interested in recurring actions for active roles if (monitor_role == pcmk_role_stopped) { return; } is_optional = active_recurring_should_be_optional(rsc, node, op->key, start); // Check whether monitor's role matches role resource will have if (monitor_role == pcmk_role_unknown) { monitor_role = pcmk_role_unpromoted; role_match = (rsc->priv->next_role != pcmk_role_promoted); } else { role_match = (rsc->priv->next_role == monitor_role); } if (!role_match) { if (is_optional) { // It's running, so cancel it char *after_key = NULL; pcmk_action_t *cancel_op = pcmk__new_cancel_action(rsc, op->name, op->interval_ms, node); switch (rsc->priv->orig_role) { case pcmk_role_unpromoted: case pcmk_role_started: if (rsc->priv->next_role == pcmk_role_promoted) { after_key = promote_key(rsc); } else if (rsc->priv->next_role == pcmk_role_stopped) { after_key = stop_key(rsc); } break; case pcmk_role_promoted: after_key = demote_key(rsc); break; default: break; } if (after_key) { pcmk__new_ordering(rsc, NULL, cancel_op, rsc, after_key, NULL, pcmk__ar_unrunnable_first_blocks, rsc->priv->scheduler); } } do_crm_log((is_optional? LOG_INFO : LOG_TRACE), "%s recurring action %s because %s configured for %s role " "(not %s)", (is_optional? "Cancelling" : "Ignoring"), op->key, op->id, pcmk_role_text(monitor_role), pcmk_role_text(rsc->priv->next_role)); return; } pcmk__rsc_trace(rsc, "Creating %s recurring action %s for %s (%s %s on %s)", (is_optional? "optional" : "mandatory"), op->key, op->id, rsc->id, pcmk_role_text(rsc->priv->next_role), pcmk__node_name(node)); mon = custom_action(rsc, strdup(op->key), op->name, node, is_optional, rsc->priv->scheduler); if (!pcmk_is_set(start->flags, pcmk__action_runnable)) { pcmk__rsc_trace(rsc, "%s is unrunnable because start is", mon->uuid); pcmk__clear_action_flags(mon, pcmk__action_runnable); - } else if ((node == NULL) || !node->details->online - || node->details->unclean) { + } else if (!pcmk__node_available(node, pcmk__node_alive)) { pcmk__rsc_trace(rsc, "%s is unrunnable because no node is available", mon->uuid); pcmk__clear_action_flags(mon, pcmk__action_runnable); } else if (!pcmk_is_set(mon->flags, pcmk__action_optional)) { pcmk__rsc_info(rsc, "Start %s-interval %s for %s on %s", pcmk__readable_interval(op->interval_ms), mon->task, rsc->id, pcmk__node_name(node)); } if (rsc->priv->next_role == pcmk_role_promoted) { pe__add_action_expected_result(mon, CRM_EX_PROMOTED); } // Order monitor relative to other actions if ((node == NULL) || pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { pcmk__new_ordering(rsc, start_key(rsc), NULL, NULL, strdup(mon->uuid), mon, pcmk__ar_first_implies_then |pcmk__ar_unrunnable_first_blocks, rsc->priv->scheduler); pcmk__new_ordering(rsc, reload_key(rsc), NULL, NULL, strdup(mon->uuid), mon, pcmk__ar_first_implies_then |pcmk__ar_unrunnable_first_blocks, rsc->priv->scheduler); if (rsc->priv->next_role == pcmk_role_promoted) { pcmk__new_ordering(rsc, promote_key(rsc), NULL, rsc, NULL, mon, pcmk__ar_ordered |pcmk__ar_unrunnable_first_blocks, rsc->priv->scheduler); } else if (rsc->priv->orig_role == pcmk_role_promoted) { pcmk__new_ordering(rsc, demote_key(rsc), NULL, rsc, NULL, mon, pcmk__ar_ordered |pcmk__ar_unrunnable_first_blocks, rsc->priv->scheduler); } } } /*! * \internal * \brief Cancel a recurring action if running on a node * * \param[in,out] rsc Resource that action is for * \param[in] node Node to cancel action on * \param[in] key Operation key for action * \param[in] name Action name * \param[in] interval_ms Action interval (in milliseconds) */ static void cancel_if_running(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *key, const char *name, guint interval_ms) { GList *possible_matches = find_actions_exact(rsc->priv->actions, key, node); pcmk_action_t *cancel_op = NULL; if (possible_matches == NULL) { return; // Recurring action isn't running on this node } g_list_free(possible_matches); cancel_op = pcmk__new_cancel_action(rsc, name, interval_ms, node); switch (rsc->priv->next_role) { case pcmk_role_started: case pcmk_role_unpromoted: /* Order starts after cancel. If the current role is * stopped, this cancels the monitor before the resource * starts; if the current role is started, then this cancels * the monitor on a migration target before starting there. */ pcmk__new_ordering(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL, pcmk__ar_unrunnable_first_blocks, rsc->priv->scheduler); break; default: break; } pcmk__rsc_info(rsc, "Cancelling %s-interval %s action for %s on %s because " "configured for " PCMK_ROLE_STOPPED " role (not %s)", pcmk__readable_interval(interval_ms), name, rsc->id, pcmk__node_name(node), pcmk_role_text(rsc->priv->next_role)); } /*! * \internal * \brief Order an action after all probes of a resource on a node * * \param[in,out] rsc Resource to check for probes * \param[in] node Node to check for probes of \p rsc * \param[in,out] action Action to order after probes of \p rsc on \p node */ static void order_after_probes(pcmk_resource_t *rsc, const pcmk_node_t *node, pcmk_action_t *action) { GList *probes = pe__resource_actions(rsc, node, PCMK_ACTION_MONITOR, FALSE); for (GList *iter = probes; iter != NULL; iter = iter->next) { order_actions((pcmk_action_t *) iter->data, action, pcmk__ar_unrunnable_first_blocks); } g_list_free(probes); } /*! * \internal * \brief Order an action after all stops of a resource on a node * * \param[in,out] rsc Resource to check for stops * \param[in] node Node to check for stops of \p rsc * \param[in,out] action Action to order after stops of \p rsc on \p node */ static void order_after_stops(pcmk_resource_t *rsc, const pcmk_node_t *node, pcmk_action_t *action) { GList *stop_ops = pe__resource_actions(rsc, node, PCMK_ACTION_STOP, TRUE); for (GList *iter = stop_ops; iter != NULL; iter = iter->next) { pcmk_action_t *stop = (pcmk_action_t *) iter->data; if (!pcmk_is_set(stop->flags, pcmk__action_optional) && !pcmk_is_set(action->flags, pcmk__action_optional) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { pcmk__rsc_trace(rsc, "%s optional on %s: unmanaged", action->uuid, pcmk__node_name(node)); pcmk__set_action_flags(action, pcmk__action_optional); } if (!pcmk_is_set(stop->flags, pcmk__action_runnable)) { crm_debug("%s unrunnable on %s: stop is unrunnable", action->uuid, pcmk__node_name(node)); pcmk__clear_action_flags(action, pcmk__action_runnable); } if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { pcmk__new_ordering(rsc, stop_key(rsc), stop, NULL, NULL, action, pcmk__ar_first_implies_then |pcmk__ar_unrunnable_first_blocks, rsc->priv->scheduler); } } g_list_free(stop_ops); } /*! * \internal * \brief Create recurring action from resource history entry for inactive role * * \param[in,out] rsc Resource that resource history is for * \param[in] node Node that resource will be active on (if any) * \param[in] op Resource history entry */ static void recurring_op_for_inactive(pcmk_resource_t *rsc, const pcmk_node_t *node, const struct op_history *op) { GList *possible_matches = NULL; // We're only interested in recurring actions for the inactive role if (op->role != pcmk_role_stopped) { return; } if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)) { crm_notice("Ignoring %s (recurring monitors for " PCMK_ROLE_STOPPED " role are not supported for anonymous clones)", op->id); return; // @TODO add support } pcmk__rsc_trace(rsc, "Creating recurring action %s for %s on nodes " "where it should not be running", op->id, rsc->id); for (GList *iter = rsc->priv->scheduler->nodes; iter != NULL; iter = iter->next) { pcmk_node_t *stop_node = (pcmk_node_t *) iter->data; bool is_optional = true; pcmk_action_t *stopped_mon = NULL; // Cancel action on node where resource will be active if ((node != NULL) && pcmk__str_eq(stop_node->priv->name, node->priv->name, pcmk__str_casei)) { cancel_if_running(rsc, node, op->key, op->name, op->interval_ms); continue; } // Recurring action on this node is optional if it's already active here possible_matches = find_actions_exact(rsc->priv->actions, op->key, stop_node); is_optional = (possible_matches != NULL); g_list_free(possible_matches); pcmk__rsc_trace(rsc, "Creating %s recurring action %s for %s (%s " PCMK_ROLE_STOPPED " on %s)", (is_optional? "optional" : "mandatory"), op->key, op->id, rsc->id, pcmk__node_name(stop_node)); stopped_mon = custom_action(rsc, strdup(op->key), op->name, stop_node, is_optional, rsc->priv->scheduler); pe__add_action_expected_result(stopped_mon, CRM_EX_NOT_RUNNING); if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { order_after_probes(rsc, stop_node, stopped_mon); } /* The recurring action is for the inactive role, so it shouldn't be * performed until the resource is inactive. */ order_after_stops(rsc, stop_node, stopped_mon); - if (!stop_node->details->online || stop_node->details->unclean) { + if (!pcmk__node_available(stop_node, pcmk__node_alive)) { pcmk__rsc_debug(rsc, "%s unrunnable on %s: node unavailable)", stopped_mon->uuid, pcmk__node_name(stop_node)); pcmk__clear_action_flags(stopped_mon, pcmk__action_runnable); } if (pcmk_is_set(stopped_mon->flags, pcmk__action_runnable) && !pcmk_is_set(stopped_mon->flags, pcmk__action_optional)) { crm_notice("Start recurring %s-interval %s for " PCMK_ROLE_STOPPED " %s on %s", pcmk__readable_interval(op->interval_ms), stopped_mon->task, rsc->id, pcmk__node_name(stop_node)); } } } /*! * \internal * \brief Create recurring actions for a resource * * \param[in,out] rsc Resource to create recurring actions for */ void pcmk__create_recurring_actions(pcmk_resource_t *rsc) { pcmk_action_t *start = NULL; if (pcmk_is_set(rsc->flags, pcmk__rsc_blocked)) { pcmk__rsc_trace(rsc, "Skipping recurring actions for blocked resource %s", rsc->id); return; } if (pcmk_is_set(rsc->flags, pcmk__rsc_maintenance)) { pcmk__rsc_trace(rsc, "Skipping recurring actions for %s " "in maintenance mode", rsc->id); return; } if (rsc->priv->assigned_node == NULL) { // Recurring actions for active roles not needed } else if (rsc->priv->assigned_node->details->maintenance) { pcmk__rsc_trace(rsc, "Skipping recurring actions for %s on %s " "in maintenance mode", rsc->id, pcmk__node_name(rsc->priv->assigned_node)); } else if ((rsc->priv->next_role != pcmk_role_stopped) || !pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { // Recurring actions for active roles needed start = start_action(rsc, rsc->priv->assigned_node, TRUE); } pcmk__rsc_trace(rsc, "Creating any recurring actions needed for %s", rsc->id); for (xmlNode *op = pcmk__xe_first_child(rsc->priv->ops_xml, PCMK_XE_OP, NULL, NULL); op != NULL; op = pcmk__xe_next(op, PCMK_XE_OP)) { struct op_history op_history = { NULL, }; if (!is_recurring_history(rsc, op, &op_history)) { continue; } if (start != NULL) { recurring_op_for_active(rsc, start, rsc->priv->assigned_node, &op_history); } recurring_op_for_inactive(rsc, rsc->priv->assigned_node, &op_history); free(op_history.key); } } /*! * \internal * \brief Create an executor cancel action * * \param[in,out] rsc Resource of action to cancel * \param[in] task Name of action to cancel * \param[in] interval_ms Interval of action to cancel * \param[in] node Node of action to cancel * * \return Created op */ pcmk_action_t * pcmk__new_cancel_action(pcmk_resource_t *rsc, const char *task, guint interval_ms, const pcmk_node_t *node) { pcmk_action_t *cancel_op = NULL; char *key = NULL; char *interval_ms_s = NULL; pcmk__assert((rsc != NULL) && (task != NULL) && (node != NULL)); key = pcmk__op_key(rsc->id, task, interval_ms); /* This finds an existing action by key, so custom_action() does not change * cancel_op->task. */ cancel_op = custom_action(rsc, key, PCMK_ACTION_CANCEL, node, FALSE, rsc->priv->scheduler); pcmk__str_update(&(cancel_op->task), PCMK_ACTION_CANCEL); pcmk__str_update(&(cancel_op->cancel_task), task); interval_ms_s = crm_strdup_printf("%u", interval_ms); pcmk__insert_meta(cancel_op, PCMK_XA_OPERATION, task); pcmk__insert_meta(cancel_op, PCMK_META_INTERVAL, interval_ms_s); free(interval_ms_s); return cancel_op; } /*! * \internal * \brief Schedule cancellation of a recurring action * * \param[in,out] rsc Resource that action is for * \param[in] call_id Action's call ID from history * \param[in] task Action name * \param[in] interval_ms Action interval * \param[in] node Node that history entry is for * \param[in] reason Short description of why action is cancelled */ void pcmk__schedule_cancel(pcmk_resource_t *rsc, const char *call_id, const char *task, guint interval_ms, const pcmk_node_t *node, const char *reason) { pcmk_action_t *cancel = NULL; CRM_CHECK((rsc != NULL) && (task != NULL) && (node != NULL) && (reason != NULL), return); crm_info("Recurring %s-interval %s for %s will be stopped on %s: %s", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node), reason); cancel = pcmk__new_cancel_action(rsc, task, interval_ms, node); pcmk__insert_meta(cancel, PCMK__XA_CALL_ID, call_id); // Cancellations happen after stops pcmk__new_ordering(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pcmk__ar_ordered, rsc->priv->scheduler); } /*! * \internal * \brief Create a recurring action marked as needing rescheduling if active * * \param[in,out] rsc Resource that action is for * \param[in] task Name of action being rescheduled * \param[in] interval_ms Action interval (in milliseconds) * \param[in,out] node Node where action should be rescheduled */ void pcmk__reschedule_recurring(pcmk_resource_t *rsc, const char *task, guint interval_ms, pcmk_node_t *node) { pcmk_action_t *op = NULL; trigger_unfencing(rsc, node, "Device parameters changed (reschedule)", NULL, rsc->priv->scheduler); op = custom_action(rsc, pcmk__op_key(rsc->id, task, interval_ms), task, node, TRUE, rsc->priv->scheduler); pcmk__set_action_flags(op, pcmk__action_reschedule); } /*! * \internal * \brief Check whether an action is recurring * * \param[in] action Action to check * * \return true if \p action has a nonzero interval, otherwise false */ bool pcmk__action_is_recurring(const pcmk_action_t *action) { guint interval_ms = 0; if (pcmk__guint_from_hash(action->meta, PCMK_META_INTERVAL, 0, &interval_ms) != pcmk_rc_ok) { return false; } return (interval_ms > 0); } diff --git a/lib/pacemaker/pcmk_sched_remote.c b/lib/pacemaker/pcmk_sched_remote.c index 90b43a3995..03231e4cf0 100644 --- a/lib/pacemaker/pcmk_sched_remote.c +++ b/lib/pacemaker/pcmk_sched_remote.c @@ -1,734 +1,733 @@ /* - * Copyright 2004-2024 the Pacemaker project contributors + * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" enum remote_connection_state { remote_state_unknown = 0, remote_state_alive = 1, remote_state_resting = 2, remote_state_failed = 3, remote_state_stopped = 4 }; static const char * state2text(enum remote_connection_state state) { switch (state) { case remote_state_unknown: return "unknown"; case remote_state_alive: return "alive"; case remote_state_resting: return "resting"; case remote_state_failed: return "failed"; case remote_state_stopped: return "stopped"; } return "impossible"; } /* We always use pcmk__ar_guest_allowed with these convenience functions to * exempt internally generated constraints from the prohibition of user * constraints involving remote connection resources. * * The start ordering additionally uses pcmk__ar_unrunnable_first_blocks so that * the specified action is not runnable if the start is not runnable. */ static inline void order_start_then_action(pcmk_resource_t *first_rsc, pcmk_action_t *then_action, uint32_t extra) { if ((first_rsc != NULL) && (then_action != NULL)) { pcmk__new_ordering(first_rsc, start_key(first_rsc), NULL, then_action->rsc, NULL, then_action, pcmk__ar_guest_allowed |pcmk__ar_unrunnable_first_blocks |extra, first_rsc->priv->scheduler); } } static inline void order_action_then_stop(pcmk_action_t *first_action, pcmk_resource_t *then_rsc, uint32_t extra) { if ((first_action != NULL) && (then_rsc != NULL)) { pcmk__new_ordering(first_action->rsc, NULL, first_action, then_rsc, stop_key(then_rsc), NULL, pcmk__ar_guest_allowed|extra, then_rsc->priv->scheduler); } } static enum remote_connection_state get_remote_node_state(const pcmk_node_t *node) { const pcmk_resource_t *remote_rsc = NULL; const pcmk_node_t *cluster_node = NULL; pcmk__assert(node != NULL); remote_rsc = node->priv->remote; pcmk__assert(remote_rsc != NULL); cluster_node = pcmk__current_node(remote_rsc); /* If the cluster node the remote connection resource resides on * is unclean or went offline, we can't process any operations * on that remote node until after it starts elsewhere. */ if ((remote_rsc->priv->next_role == pcmk_role_stopped) || (remote_rsc->priv->assigned_node == NULL)) { // The connection resource is not going to run anywhere if ((cluster_node != NULL) && cluster_node->details->unclean) { /* The remote connection is failed because its resource is on a * failed node and can't be recovered elsewhere, so we must fence. */ return remote_state_failed; } if (!pcmk_is_set(remote_rsc->flags, pcmk__rsc_failed)) { /* Connection resource is cleanly stopped */ return remote_state_stopped; } /* Connection resource is failed */ if ((remote_rsc->priv->next_role == pcmk_role_stopped) && (remote_rsc->priv->remote_reconnect_ms > 0U) && pcmk_is_set(node->priv->flags, pcmk__node_remote_fenced) && !pe__shutdown_requested(node)) { /* We won't know whether the connection is recoverable until the * reconnect interval expires and we reattempt connection. */ return remote_state_unknown; } /* The remote connection is in a failed state. If there are any * resources known to be active on it (stop) or in an unknown state * (probe), we must assume the worst and fence it. */ return remote_state_failed; } else if (cluster_node == NULL) { /* Connection is recoverable but not currently running anywhere, so see * if we can recover it first */ return remote_state_unknown; - } else if (cluster_node->details->unclean - || !(cluster_node->details->online)) { + } else if (!pcmk__node_available(cluster_node, pcmk__node_alive)) { // Connection is running on a dead node, see if we can recover it first return remote_state_resting; } else if (pcmk__list_of_multiple(remote_rsc->priv->active_nodes) && (remote_rsc->priv->partial_migration_source != NULL) && (remote_rsc->priv->partial_migration_target != NULL)) { /* We're in the middle of migrating a connection resource, so wait until * after the migration completes before performing any actions. */ return remote_state_resting; } return remote_state_alive; } /*! * \internal * \brief Order actions on remote node relative to actions for the connection * * \param[in,out] action An action scheduled on a Pacemaker Remote node */ static void apply_remote_ordering(pcmk_action_t *action) { pcmk_resource_t *remote_rsc = NULL; enum pcmk__action_type task = pcmk__parse_action(action->task); enum remote_connection_state state = get_remote_node_state(action->node); uint32_t order_opts = pcmk__ar_none; if (action->rsc == NULL) { return; } pcmk__assert(pcmk__is_pacemaker_remote_node(action->node)); remote_rsc = action->node->priv->remote; pcmk__assert(remote_rsc != NULL); crm_trace("Order %s action %s relative to %s%s (state: %s)", action->task, action->uuid, pcmk_is_set(remote_rsc->flags, pcmk__rsc_failed)? "failed " : "", remote_rsc->id, state2text(state)); if (pcmk__strcase_any_of(action->task, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* Migration ops map to pcmk__action_unspecified, but we need to apply * the same ordering as for stop or demote (see get_router_node()). */ task = pcmk__action_stop; } switch (task) { case pcmk__action_start: case pcmk__action_promote: order_opts = pcmk__ar_none; if (state == remote_state_failed) { /* Force recovery, by making this action required */ pcmk__set_relation_flags(order_opts, pcmk__ar_first_implies_then); } /* Ensure connection is up before running this action */ order_start_then_action(remote_rsc, action, order_opts); break; case pcmk__action_stop: if (state == remote_state_alive) { order_action_then_stop(action, remote_rsc, pcmk__ar_then_implies_first); } else if (state == remote_state_failed) { /* The resource is active on the node, but since we don't have a * valid connection, the only way to stop the resource is by * fencing the node. There is no need to order the stop relative * to the remote connection, since the stop will become implied * by the fencing. */ pe_fence_node(remote_rsc->priv->scheduler, action->node, "resources are active but " "connection is unrecoverable", FALSE); } else if (remote_rsc->priv->next_role == pcmk_role_stopped) { /* State must be remote_state_unknown or remote_state_stopped. * Since the connection is not coming back up in this * transition, stop this resource first. */ order_action_then_stop(action, remote_rsc, pcmk__ar_then_implies_first); } else { /* The connection is going to be started somewhere else, so * stop this resource after that completes. */ order_start_then_action(remote_rsc, action, pcmk__ar_none); } break; case pcmk__action_demote: /* Only order this demote relative to the connection start if the * connection isn't being torn down. Otherwise, the demote would be * blocked because the connection start would not be allowed. */ if ((state == remote_state_resting) || (state == remote_state_unknown)) { order_start_then_action(remote_rsc, action, pcmk__ar_none); } /* Otherwise we can rely on the stop ordering */ break; default: /* Wait for the connection resource to be up */ if (pcmk__action_is_recurring(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ order_start_then_action(remote_rsc, action, pcmk__ar_first_implies_then); } else { pcmk_node_t *cluster_node = pcmk__current_node(remote_rsc); if ((task == pcmk__action_monitor) && (state == remote_state_failed)) { /* We would only be here if we do not know the state of the * resource on the remote node. Since we have no way to find * out, it is necessary to fence the node. */ pe_fence_node(remote_rsc->priv->scheduler, action->node, "resources are in unknown state " "and connection is unrecoverable", FALSE); } if ((cluster_node != NULL) && (state == remote_state_stopped)) { /* The connection is currently up, but is going down * permanently. Make sure we check services are actually * stopped _before_ we let the connection get closed. */ order_action_then_stop(action, remote_rsc, pcmk__ar_unrunnable_first_blocks); } else { order_start_then_action(remote_rsc, action, pcmk__ar_none); } } break; } } static void apply_launcher_ordering(pcmk_action_t *action) { pcmk_resource_t *remote_rsc = NULL; pcmk_resource_t *launcher = NULL; enum pcmk__action_type task = pcmk__parse_action(action->task); pcmk__assert(action->rsc != NULL); pcmk__assert(pcmk__is_pacemaker_remote_node(action->node)); remote_rsc = action->node->priv->remote; pcmk__assert(remote_rsc != NULL); launcher = remote_rsc->priv->launcher; pcmk__assert(launcher != NULL); if (pcmk_is_set(launcher->flags, pcmk__rsc_failed)) { pe_fence_node(action->rsc->priv->scheduler, action->node, "container failed", FALSE); } crm_trace("Order %s action %s relative to %s%s for %s%s", action->task, action->uuid, pcmk_is_set(remote_rsc->flags, pcmk__rsc_failed)? "failed " : "", remote_rsc->id, pcmk_is_set(launcher->flags, pcmk__rsc_failed)? "failed " : "", launcher->id); if (pcmk__strcase_any_of(action->task, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* Migration ops map to pcmk__action_unspecified, but we need to apply * the same ordering as for stop or demote (see get_router_node()). */ task = pcmk__action_stop; } switch (task) { case pcmk__action_start: case pcmk__action_promote: // Force resource recovery if the launcher is recovered order_start_then_action(launcher, action, pcmk__ar_first_implies_then); // Wait for the connection resource to be up, too order_start_then_action(remote_rsc, action, pcmk__ar_none); break; case pcmk__action_stop: case pcmk__action_demote: if (pcmk_is_set(launcher->flags, pcmk__rsc_failed)) { /* When the launcher representing a guest node fails, any stop * or demote actions for resources running on the guest node * are implied by the launcher stopping. This is similar to * how fencing operations work for cluster nodes and remote * nodes. */ } else { /* Ensure the operation happens before the connection is brought * down. * * If we really wanted to, we could order these after the * connection start, IFF the launcher's current role was * stopped (otherwise we re-introduce an ordering loop when the * connection is restarting). */ order_action_then_stop(action, remote_rsc, pcmk__ar_none); } break; default: /* Wait for the connection resource to be up */ if (pcmk__action_is_recurring(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ if (task != pcmk__action_unspecified) { order_start_then_action(remote_rsc, action, pcmk__ar_first_implies_then); } } else { order_start_then_action(remote_rsc, action, pcmk__ar_none); } break; } } /*! * \internal * \brief Order all relevant actions relative to remote connection actions * * \param[in,out] scheduler Scheduler data */ void pcmk__order_remote_connection_actions(pcmk_scheduler_t *scheduler) { if (!pcmk_is_set(scheduler->flags, pcmk__sched_have_remote_nodes)) { return; } crm_trace("Creating remote connection orderings"); for (GList *iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = iter->data; pcmk_resource_t *remote = NULL; // We are only interested in resource actions if (action->rsc == NULL) { continue; } /* Special case: If we are clearing the failcount of an actual * remote connection resource, then make sure this happens before * any start of the resource in this transition. */ if (pcmk_is_set(action->rsc->flags, pcmk__rsc_is_remote_connection) && pcmk__str_eq(action->task, PCMK_ACTION_CLEAR_FAILCOUNT, pcmk__str_none)) { pcmk__new_ordering(action->rsc, NULL, action, action->rsc, pcmk__op_key(action->rsc->id, PCMK_ACTION_START, 0), NULL, pcmk__ar_ordered, scheduler); continue; } // We are only interested in actions assigned to a node if (action->node == NULL) { continue; } if (!pcmk__is_pacemaker_remote_node(action->node)) { continue; } /* We are only interested in real actions. * * @TODO This is probably wrong; pseudo-actions might be converted to * real actions and vice versa later in update_actions() at the end of * pcmk__apply_orderings(). */ if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { continue; } remote = action->node->priv->remote; if (remote == NULL) { // Orphaned continue; } /* Another special case: if a resource is moving to a Pacemaker Remote * node, order the stop on the original node after any start of the * remote connection. This ensures that if the connection fails to * start, we leave the resource running on the original node. */ if (pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_none)) { for (GList *item = action->rsc->priv->actions; item != NULL; item = item->next) { pcmk_action_t *rsc_action = item->data; if (!pcmk__same_node(rsc_action->node, action->node) && pcmk__str_eq(rsc_action->task, PCMK_ACTION_STOP, pcmk__str_none)) { pcmk__new_ordering(remote, start_key(remote), NULL, action->rsc, NULL, rsc_action, pcmk__ar_ordered, scheduler); } } } /* The action occurs across a remote connection, so create * ordering constraints that guarantee the action occurs while the node * is active (after start, before stop ... things like that). * * This is somewhat brittle in that we need to make sure the results of * this ordering are compatible with the result of get_router_node(). * It would probably be better to add PCMK__XA_ROUTER_NODE as part of * this logic rather than create_graph_action(). */ if (remote->priv->launcher != NULL) { crm_trace("Container ordering for %s", action->uuid); apply_launcher_ordering(action); } else { crm_trace("Remote ordering for %s", action->uuid); apply_remote_ordering(action); } } } /*! * \internal * \brief Check whether a node is a failed remote node * * \param[in] node Node to check * * \return true if \p node is a failed remote node, false otherwise */ bool pcmk__is_failed_remote_node(const pcmk_node_t *node) { return pcmk__is_remote_node(node) && (node->priv->remote != NULL) && (get_remote_node_state(node) == remote_state_failed); } /*! * \internal * \brief Check whether a given resource corresponds to a given node as guest * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return true if \p node is a guest node and \p rsc is its containing * resource, otherwise false */ bool pcmk__rsc_corresponds_to_guest(const pcmk_resource_t *rsc, const pcmk_node_t *node) { return (rsc != NULL) && (rsc->priv->launched != NULL) && (node != NULL) && (node->priv->remote != NULL) && (node->priv->remote->priv->launcher == rsc); } /*! * \internal * \brief Get proper connection host that a remote action must be routed through * * A remote connection resource might be starting, stopping, or migrating in the * same transition that an action needs to be executed on its Pacemaker Remote * node. Determine the proper node that the remote action should be routed * through. * * \param[in] action (Potentially remote) action to route * * \return Connection host that action should be routed through if remote, * otherwise NULL */ pcmk_node_t * pcmk__connection_host_for_action(const pcmk_action_t *action) { pcmk_node_t *began_on = NULL; pcmk_node_t *ended_on = NULL; bool partial_migration = false; const char *task = action->task; pcmk_resource_t *remote = NULL; if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_none) || !pcmk__is_pacemaker_remote_node(action->node)) { return NULL; } remote = action->node->priv->remote; pcmk__assert(remote != NULL); began_on = pcmk__current_node(remote); ended_on = remote->priv->assigned_node; if ((remote->priv->launcher == NULL) && (remote->priv->partial_migration_target != NULL)) { partial_migration = true; } if (began_on == NULL) { crm_trace("Routing %s for %s through remote connection's " "next node %s (starting)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (ended_on? ended_on->priv->name : "none"), partial_migration? " (partial migration)" : ""); return ended_on; } if (ended_on == NULL) { crm_trace("Routing %s for %s through remote connection's " "current node %s (stopping)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (began_on? began_on->priv->name : "none"), partial_migration? " (partial migration)" : ""); return began_on; } if (pcmk__same_node(began_on, ended_on)) { crm_trace("Routing %s for %s through remote connection's " "current node %s (not moving)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (began_on? began_on->priv->name : "none"), partial_migration? " (partial migration)" : ""); return began_on; } /* If we get here, the remote connection is moving during this transition. * This means some actions for resources behind the connection will get * routed through the cluster node the connection resource is currently on, * and others are routed through the cluster node the connection will end up * on. */ if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { task = g_hash_table_lookup(action->meta, "notify_operation"); } /* * Stop, demote, and migration actions must occur before the connection can * move (these actions are required before the remote resource can stop). In * this case, we know these actions have to be routed through the initial * cluster node the connection resource lived on before the move takes * place. * * The exception is a partial migration of a (non-guest) remote connection * resource; in that case, all actions (even these) will be ordered after * the connection's pseudo-start on the migration target, so the target is * the router node. */ if (pcmk__strcase_any_of(task, PCMK_ACTION_CANCEL, PCMK_ACTION_STOP, PCMK_ACTION_DEMOTE, PCMK_ACTION_MIGRATE_FROM, PCMK_ACTION_MIGRATE_TO, NULL) && !partial_migration) { crm_trace("Routing %s for %s through remote connection's " "current node %s (moving)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (began_on? began_on->priv->name : "none"), partial_migration? " (partial migration)" : ""); return began_on; } /* Everything else (start, promote, monitor, probe, refresh, * clear failcount, delete, ...) must occur after the connection starts on * the node it is moving to. */ crm_trace("Routing %s for %s through remote connection's " "next node %s (moving)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (ended_on? ended_on->priv->name : "none"), partial_migration? " (partial migration)" : ""); return ended_on; } /*! * \internal * \brief Replace remote connection's addr="#uname" with actual address * * REMOTE_CONTAINER_HACK: If a given resource is a remote connection resource * with its "addr" parameter set to "#uname", pull the actual value from the * parameters evaluated without a node (which was put there earlier in * pcmk__create_graph() when the bundle's expand() method was called). * * \param[in,out] rsc Resource to check * \param[in,out] params Resource parameters evaluated per node */ void pcmk__substitute_remote_addr(pcmk_resource_t *rsc, GHashTable *params) { const char *remote_addr = g_hash_table_lookup(params, PCMK_REMOTE_RA_ADDR); if (pcmk__str_eq(remote_addr, "#uname", pcmk__str_none)) { GHashTable *base = pe_rsc_params(rsc, NULL, rsc->priv->scheduler); remote_addr = g_hash_table_lookup(base, PCMK_REMOTE_RA_ADDR); if (remote_addr != NULL) { pcmk__insert_dup(params, PCMK_REMOTE_RA_ADDR, remote_addr); } } } /*! * \brief Add special guest node meta-attributes to XML * * If a given action will be executed on a guest node, add the following as XML * attributes (using meta-attribute naming): * * The resource's \c PCMK_META_CONTAINER_ATTRIBUTE_TARGET meta-attribute * (usually set only for bundles), as \c PCMK_META_CONTAINER_ATTRIBUTE_TARGET * * The guest's physical host (current host for "down" actions, next host for * "up" actions), as \c PCMK__META_PHYSICAL_HOST * * If the guest node has no physical host, then don't add either attribute. * * \param[in,out] args_xml XML to add attributes to * \param[in] action Action to check */ void pcmk__add_guest_meta_to_xml(xmlNode *args_xml, const pcmk_action_t *action) { const pcmk_node_t *guest = action->node; const pcmk_node_t *host = NULL; const pcmk_resource_t *launcher = NULL; enum pcmk__action_type task; if (!pcmk__is_guest_or_bundle_node(guest)) { return; } launcher = guest->priv->remote->priv->launcher; task = pcmk__parse_action(action->task); if ((task == pcmk__action_notify) || (task == pcmk__action_notified)) { task = pcmk__parse_action(g_hash_table_lookup(action->meta, "notify_operation")); } switch (task) { case pcmk__action_stop: case pcmk__action_stopped: case pcmk__action_demote: case pcmk__action_demoted: // "Down" actions take place on guest's current host host = pcmk__current_node(launcher); break; case pcmk__action_start: case pcmk__action_started: case pcmk__action_monitor: case pcmk__action_promote: case pcmk__action_promoted: // "Up" actions take place on guest's next host host = launcher->priv->assigned_node; break; default: break; } if (host != NULL) { gpointer target = g_hash_table_lookup(action->rsc->priv->meta, PCMK_META_CONTAINER_ATTRIBUTE_TARGET); hash2metafield((gpointer) PCMK_META_CONTAINER_ATTRIBUTE_TARGET, target, (gpointer) args_xml); hash2metafield((gpointer) PCMK__META_PHYSICAL_HOST, (gpointer) host->priv->name, (gpointer) args_xml); } }