diff --git a/include/pcmki/pcmki_sched_utils.h b/include/pcmki/pcmki_sched_utils.h index fb9df7373a..acff7bf86b 100644 --- a/include/pcmki/pcmki_sched_utils.h +++ b/include/pcmki/pcmki_sched_utils.h @@ -1,47 +1,42 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__PCMKI_PCMKI_SCHED_UTILS__H # define PCMK__PCMKI_PCMKI_SCHED_UTILS__H #include // bool #include // GList, GHashTable, gboolean, guint #include // lrmd_event_data_t #include // cib_t #include #include #include #include #include #include /* Constraint helper functions */ GList *pcmk__copy_node_list(const GList *list, bool reset); pe_resource_t *find_compatible_child(const pe_resource_t *local_child, const pe_resource_t *rsc, enum rsc_role_e filter, gboolean current); -pe_resource_t *find_compatible_child_by_node(const pe_resource_t *local_child, - const pe_node_t *local_node, - const pe_resource_t *rsc, - enum rsc_role_e filter, - gboolean current); gboolean is_child_compatible(const pe_resource_t *child_rsc, const pe_node_t *local_node, enum rsc_role_e filter, gboolean current); enum pe_action_flags summary_action_flags(pe_action_t *action, GList *children, const pe_node_t *node); enum action_tasks clone_child_action(pe_action_t * action); int copies_per_node(pe_resource_t * rsc); xmlNode *pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *event, const char *caller_version, int target_rc, const char *node, const char *origin); #endif diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c index 369487ee2b..5ffaa8c702 100644 --- a/lib/pacemaker/pcmk_sched_bundle.c +++ b/lib/pacemaker/pcmk_sched_bundle.c @@ -1,1163 +1,864 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include "libpacemaker_private.h" #define PE__VARIANT_BUNDLE 1 #include static bool is_bundle_node(pe__bundle_variant_data_t *data, pe_node_t *node) { for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (node->details == replica->node->details) { return TRUE; } } return FALSE; } /*! * \internal * \brief Get a list of a bundle's containers * * \param[in] bundle Bundle resource * * \return Newly created list of \p bundle's containers * \note It is the caller's responsibility to free the result with * g_list_free(). */ GList * pcmk__bundle_containers(const pe_resource_t *bundle) { GList *containers = NULL; const pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, bundle); for (GList *iter = data->replicas; iter != NULL; iter = iter->next) { pe__bundle_replica_t *replica = iter->data; containers = g_list_append(containers, replica->container); } return containers; } -static inline GList * -get_containers_or_children(const pe_resource_t *rsc) -{ - return (rsc->variant == pe_container)? - pcmk__bundle_containers(rsc) : rsc->children; -} - /*! * \internal * \brief Assign a bundle resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * * \return Node that \p rsc is assigned to, if assigned entirely to one node */ pe_node_t * pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer) { GList *containers = NULL; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return NULL); get_bundle_variant_data(bundle_data, rsc); pe__set_resource_flags(rsc, pe_rsc_allocating); containers = pcmk__bundle_containers(rsc); pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, rsc->cluster); containers = g_list_sort(containers, pcmk__cmp_instance); pcmk__assign_instances(rsc, containers, bundle_data->nreplicas, bundle_data->nreplicas_per_host); g_list_free(containers); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; pe_node_t *container_host = NULL; CRM_ASSERT(replica); if (replica->ip) { pe_rsc_trace(rsc, "Allocating bundle %s IP %s", rsc->id, replica->ip->id); replica->ip->cmds->assign(replica->ip, prefer); } container_host = replica->container->allocated_to; if (replica->remote && pe__is_guest_or_remote_node(container_host)) { /* We need 'nested' connection resources to be on the same * host because pacemaker-remoted only supports a single * active connection */ pcmk__new_colocation("child-remote-with-docker-remote", NULL, INFINITY, replica->remote, container_host->details->remote_rsc, NULL, NULL, true, rsc->cluster); } if (replica->remote) { pe_rsc_trace(rsc, "Allocating bundle %s connection %s", rsc->id, replica->remote->id); replica->remote->cmds->assign(replica->remote, prefer); } // Explicitly allocate replicas' children before bundle child if (replica->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, replica->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (node->details != replica->node->details) { node->weight = -INFINITY; } else if (!pcmk__threshold_reached(replica->child, node, NULL)) { node->weight = INFINITY; } } pe__set_resource_flags(replica->child->parent, pe_rsc_allocating); pe_rsc_trace(rsc, "Allocating bundle %s replica child %s", rsc->id, replica->child->id); replica->child->cmds->assign(replica->child, replica->node); pe__clear_resource_flags(replica->child->parent, pe_rsc_allocating); } } if (bundle_data->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, bundle_data->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (is_bundle_node(bundle_data, node)) { node->weight = 0; } else { node->weight = -INFINITY; } } pe_rsc_trace(rsc, "Allocating bundle %s child %s", rsc->id, bundle_data->child->id); bundle_data->child->cmds->assign(bundle_data->child, prefer); } pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional); return NULL; } void pcmk__bundle_create_actions(pe_resource_t *rsc) { pe_action_t *action = NULL; GList *containers = NULL; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); containers = pcmk__bundle_containers(rsc); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->ip) { replica->ip->cmds->create_actions(replica->ip); } if (replica->container) { replica->container->cmds->create_actions(replica->container); } if (replica->remote) { replica->remote->cmds->create_actions(replica->remote); } } pcmk__create_instance_actions(rsc, containers, NULL, NULL); if (bundle_data->child) { bundle_data->child->cmds->create_actions(bundle_data->child); if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) { /* promote */ pe__new_rsc_pseudo_action(rsc, RSC_PROMOTE, true, true); action = pe__new_rsc_pseudo_action(rsc, RSC_PROMOTED, true, true); action->priority = INFINITY; /* demote */ pe__new_rsc_pseudo_action(rsc, RSC_DEMOTE, true, true); action = pe__new_rsc_pseudo_action(rsc, RSC_DEMOTED, true, true); action->priority = INFINITY; } } g_list_free(containers); } void pcmk__bundle_internal_constraints(pe_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); if (bundle_data->child) { pcmk__order_resource_actions(rsc, RSC_START, bundle_data->child, RSC_START, pe_order_implies_first_printed); pcmk__order_resource_actions(rsc, RSC_STOP, bundle_data->child, RSC_STOP, pe_order_implies_first_printed); if (bundle_data->child->children) { pcmk__order_resource_actions(bundle_data->child, RSC_STARTED, rsc, RSC_STARTED, pe_order_implies_then_printed); pcmk__order_resource_actions(bundle_data->child, RSC_STOPPED, rsc, RSC_STOPPED, pe_order_implies_then_printed); } else { pcmk__order_resource_actions(bundle_data->child, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed); pcmk__order_resource_actions(bundle_data->child, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed); } } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); CRM_ASSERT(replica->container); replica->container->cmds->internal_constraints(replica->container); pcmk__order_starts(rsc, replica->container, pe_order_runnable_left|pe_order_implies_first_printed); if (replica->child) { pcmk__order_stops(rsc, replica->child, pe_order_implies_first_printed); } pcmk__order_stops(rsc, replica->container, pe_order_implies_first_printed); pcmk__order_resource_actions(replica->container, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed); pcmk__order_resource_actions(replica->container, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed); if (replica->ip) { replica->ip->cmds->internal_constraints(replica->ip); // Start IP then container pcmk__order_starts(replica->ip, replica->container, pe_order_runnable_left|pe_order_preserve); pcmk__order_stops(replica->container, replica->ip, pe_order_implies_first|pe_order_preserve); pcmk__new_colocation("ip-with-docker", NULL, INFINITY, replica->ip, replica->container, NULL, NULL, true, rsc->cluster); } if (replica->remote) { /* This handles ordering and colocating remote relative to container * (via "resource-with-container"). Since IP is also ordered and * colocated relative to the container, we don't need to do anything * explicit here with IP. */ replica->remote->cmds->internal_constraints(replica->remote); } if (replica->child) { CRM_ASSERT(replica->remote); // "Start remote then child" is implicit in scheduler's remote logic } } if (bundle_data->child) { bundle_data->child->cmds->internal_constraints(bundle_data->child); if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) { pcmk__promotable_restart_ordering(rsc); /* child demoted before global demoted */ pcmk__order_resource_actions(bundle_data->child, RSC_DEMOTED, rsc, RSC_DEMOTED, pe_order_implies_then_printed); /* global demote before child demote */ pcmk__order_resource_actions(rsc, RSC_DEMOTE, bundle_data->child, RSC_DEMOTE, pe_order_implies_first_printed); /* child promoted before global promoted */ pcmk__order_resource_actions(bundle_data->child, RSC_PROMOTED, rsc, RSC_PROMOTED, pe_order_implies_then_printed); /* global promote before child promote */ pcmk__order_resource_actions(rsc, RSC_PROMOTE, bundle_data->child, RSC_PROMOTE, pe_order_implies_first_printed); } } } static pe_resource_t * compatible_replica_for_node(const pe_resource_t *rsc_lh, const pe_node_t *candidate, const pe_resource_t *rsc, enum rsc_role_e filter, gboolean current) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(candidate != NULL, return NULL); get_bundle_variant_data(bundle_data, rsc); crm_trace("Looking for compatible child from %s for %s on %s", rsc_lh->id, rsc->id, pe__node_name(candidate)); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (is_child_compatible(replica->container, candidate, filter, current)) { crm_trace("Pairing %s with %s on %s", rsc_lh->id, replica->container->id, pe__node_name(candidate)); return replica->container; } } crm_trace("Can't pair %s with %s", rsc_lh->id, rsc->id); return NULL; } static pe_resource_t * compatible_replica(const pe_resource_t *rsc_lh, const pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set) { GList *scratch = NULL; pe_resource_t *pair = NULL; pe_node_t *active_node_lh = NULL; active_node_lh = rsc_lh->fns->location(rsc_lh, NULL, current); if (active_node_lh) { return compatible_replica_for_node(rsc_lh, active_node_lh, rsc, filter, current); } scratch = g_hash_table_get_values(rsc_lh->allowed_nodes); scratch = pcmk__sort_nodes(scratch, NULL); for (GList *gIter = scratch; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pair = compatible_replica_for_node(rsc_lh, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", rsc_lh->id, (rsc? rsc->id : "none")); done: g_list_free(scratch); return pair; } int copies_per_node(pe_resource_t * rsc) { /* Strictly speaking, there should be a 'copies_per_node' addition * to the resource function table and each case would be a * function. However that would be serious overkill to return an * int. In fact, it seems to me that both function tables * could/should be replaced by resources.{c,h} full of * rsc_{some_operation} functions containing a switch as below * which calls out to functions named {variant}_{some_operation} * as needed. */ switch(rsc->variant) { case pe_unknown: return 0; case pe_native: case pe_group: return 1; case pe_clone: { const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX); if (max_clones_node == NULL) { return 1; } else { int max_i; pcmk__scan_min_int(max_clones_node, &max_i, 0); return max_i; } } case pe_container: { pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); return data->nreplicas_per_host; } } return 0; } /*! * \internal * \brief Apply a colocation's score to node weights or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node weights (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void pcmk__bundle_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent) { GList *allocated_primaries = NULL; pe__bundle_variant_data_t *bundle_data = NULL; /* This should never be called for the bundle itself as a dependent. * Instead, we add its colocation constraints to its replicas and call the * apply_coloc_score() for the replicas as dependents. */ CRM_ASSERT(!for_dependent); CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL), return); CRM_ASSERT(dependent->variant == pe_native); if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { pe_rsc_trace(primary, "%s is still provisional", primary->id); return; } else if (colocation->dependent->variant > pe_group) { pe_resource_t *primary_replica = compatible_replica(dependent, primary, RSC_ROLE_UNKNOWN, FALSE, dependent->cluster); if (primary_replica) { pe_rsc_debug(primary, "Pairing %s with %s", dependent->id, primary_replica->id); dependent->cmds->apply_coloc_score(dependent, primary_replica, colocation, true); } else if (colocation->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", dependent->id, primary->id); pcmk__assign_resource(dependent, NULL, true); } else { pe_rsc_debug(primary, "Cannot pair %s with instance of %s", dependent->id, primary->id); } return; } get_bundle_variant_data(bundle_data, primary); pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d", colocation->id, dependent->id, primary->id, colocation->score); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (colocation->score < INFINITY) { replica->container->cmds->apply_coloc_score(dependent, replica->container, colocation, false); } else { pe_node_t *chosen = replica->container->fns->location(replica->container, NULL, FALSE); if ((chosen == NULL) || is_set_recursive(replica->container, pe_rsc_block, TRUE)) { continue; } if ((colocation->primary_role >= RSC_ROLE_PROMOTED) && (replica->child == NULL)) { continue; } if ((colocation->primary_role >= RSC_ROLE_PROMOTED) && (replica->child->next_role < RSC_ROLE_PROMOTED)) { continue; } pe_rsc_trace(primary, "Allowing %s: %s %d", colocation->id, pe__node_name(chosen), chosen->weight); allocated_primaries = g_list_prepend(allocated_primaries, chosen); } } if (colocation->score >= INFINITY) { node_list_exclude(dependent->allowed_nodes, allocated_primaries, FALSE); } g_list_free(allocated_primaries); } enum pe_action_flags pcmk__bundle_action_flags(pe_action_t *action, const pe_node_t *node) { GList *containers = NULL; enum pe_action_flags flags = 0; pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, action->rsc); if(data->child) { enum action_tasks task = get_complex_task(data->child, action->task, TRUE); switch(task) { case no_action: case action_notify: case action_notified: case action_promote: case action_promoted: case action_demote: case action_demoted: return summary_action_flags(action, data->child->children, node); default: break; } } containers = pcmk__bundle_containers(action->rsc); flags = summary_action_flags(action, containers, node); g_list_free(containers); return flags; } -pe_resource_t * -find_compatible_child_by_node(const pe_resource_t *local_child, - const pe_node_t *local_node, - const pe_resource_t *rsc, enum rsc_role_e filter, - gboolean current) -{ - GList *gIter = NULL; - GList *children = NULL; - - if (local_node == NULL) { - crm_err("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id); - return NULL; - } - - crm_trace("Looking for compatible child from %s for %s on %s", - local_child->id, rsc->id, pe__node_name(local_node)); - - children = get_containers_or_children(rsc); - for (gIter = children; gIter != NULL; gIter = gIter->next) { - pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; - - if(is_child_compatible(child_rsc, local_node, filter, current)) { - crm_trace("Pairing %s with %s on %s", - local_child->id, child_rsc->id, pe__node_name(local_node)); - return child_rsc; - } - } - - crm_trace("Can't pair %s with %s", local_child->id, rsc->id); - if(children != rsc->children) { - g_list_free(children); - } - return NULL; -} - /*! * \internal * \brief Get containerized resource corresponding to a given bundle container * * \param[in] instance Collective instance that might be a bundle container * \param[in] node Node that \p instance might be running on * * \return Bundled resource instance inside \p instance if it is a bundle * container instance running on \p node, otherwise NULL */ const pe_resource_t * pcmk__get_rsc_in_container(const pe_resource_t *instance, const pe_node_t *node) { const pe__bundle_variant_data_t *data = NULL; const pe_resource_t *top = pe__const_top_resource(instance, true); if ((node == NULL) || (top == NULL) || (top->variant != pe_container)) { return NULL; } get_bundle_variant_data(data, top); for (const GList *iter = data->replicas; iter != NULL; iter = iter->next) { const pe__bundle_replica_t *replica = iter->data; if ((replica->child != NULL) && (instance == replica->container) && pe__same_node(node, replica->node)) { return replica->child; } } return NULL; } -static uint32_t -multi_update_interleave_actions(pe_action_t *first, pe_action_t *then, - const pe_node_t *node, uint32_t filter, - uint32_t type, pe_working_set_t *data_set) -{ - GList *gIter = NULL; - GList *children = NULL; - gboolean current = FALSE; - uint32_t changed = pcmk__updated_none; - - /* Fix this - lazy */ - if (pcmk__ends_with(first->uuid, "_stopped_0") - || pcmk__ends_with(first->uuid, "_demoted_0")) { - current = TRUE; - } - - children = get_containers_or_children(then->rsc); - for (gIter = children; gIter != NULL; gIter = gIter->next) { - pe_resource_t *then_child = gIter->data; - pe_resource_t *first_child = find_compatible_child(then_child, - first->rsc, - RSC_ROLE_UNKNOWN, - current); - if (first_child == NULL && current) { - crm_trace("Ignore"); - - } else if (first_child == NULL) { - crm_debug("No match found for %s (%d / %s / %s)", then_child->id, current, first->uuid, then->uuid); - - /* Me no like this hack - but what else can we do? - * - * If there is no-one active or about to be active - * on the same node as then_child, then they must - * not be allowed to start - */ - if (pcmk_any_flags_set(type, pe_order_runnable_left|pe_order_implies_then) /* Mandatory */ ) { - pe_rsc_info(then->rsc, "Inhibiting %s from being active", then_child->id); - if (pcmk__assign_resource(then_child, NULL, true)) { - pcmk__set_updated_flags(changed, first, pcmk__updated_then); - } - } - - } else { - pe_action_t *first_action = NULL; - pe_action_t *then_action = NULL; - - enum action_tasks task = clone_child_action(first); - const char *first_task = task2text(task); - - const pe_resource_t *first_rsc = NULL; - const pe_resource_t *then_rsc = NULL; - - first_rsc = pcmk__get_rsc_in_container(first_child, node); - if ((first_rsc != NULL) && strstr(first->task, "stop")) { - /* Except for 'stopped' we should be looking at the - * in-container resource, actions for the child will - * happen later and are therefor more likely to align - * with the user's intent. - */ - first_action = find_first_action(first_rsc->actions, NULL, - task2text(task), node); - } else { - first_action = find_first_action(first_child->actions, NULL, task2text(task), node); - } - - then_rsc = pcmk__get_rsc_in_container(then_child, node); - if ((then_rsc != NULL) && strstr(then->task, "mote")) { - /* Promote/demote actions will never be found for the - * container resource, look in the child instead - * - * Alternatively treat: - * 'XXXX then promote YYYY' as 'XXXX then start container for YYYY', and - * 'demote XXXX then stop YYYY' as 'stop container for XXXX then stop YYYY' - */ - then_action = find_first_action(then_rsc->actions, NULL, - then->task, node); - } else { - then_action = find_first_action(then_child->actions, NULL, then->task, node); - } - - if (first_action == NULL) { - if (!pcmk_is_set(first_child->flags, pe_rsc_orphan) - && !pcmk__str_any_of(first_task, RSC_STOP, RSC_DEMOTE, NULL)) { - crm_err("Internal error: No action found for %s in %s (first)", - first_task, first_child->id); - - } else { - crm_trace("No action found for %s in %s%s (first)", - first_task, first_child->id, - pcmk_is_set(first_child->flags, pe_rsc_orphan)? " (ORPHAN)" : ""); - } - continue; - } - - /* We're only interested if 'then' is neither stopping nor being demoted */ - if (then_action == NULL) { - if (!pcmk_is_set(then_child->flags, pe_rsc_orphan) - && !pcmk__str_any_of(then->task, RSC_STOP, RSC_DEMOTE, NULL)) { - crm_err("Internal error: No action found for %s in %s (then)", - then->task, then_child->id); - - } else { - crm_trace("No action found for %s in %s%s (then)", - then->task, then_child->id, - pcmk_is_set(then_child->flags, pe_rsc_orphan)? " (ORPHAN)" : ""); - } - continue; - } - - if (order_actions(first_action, then_action, type)) { - crm_debug("Created constraint for %s (%d) -> %s (%d) %.6x", - first_action->uuid, - pcmk_is_set(first_action->flags, pe_action_optional), - then_action->uuid, - pcmk_is_set(then_action->flags, pe_action_optional), - type); - pcmk__set_updated_flags(changed, first, - pcmk__updated_first|pcmk__updated_then); - } - if(first_action && then_action) { - changed |= then_child->cmds->update_ordered_actions(first_action, - then_action, - node, - first_child->cmds->action_flags(first_action, node), - filter, - type, - data_set); - } else { - crm_err("Nothing found either for %s (%p) or %s (%p) %s", - first_child->id, first_action, - then_child->id, then_action, task2text(task)); - } - } - } - - if(children != then->rsc->children) { - g_list_free(children); - } - return changed; -} - -static bool -can_interleave_actions(pe_action_t *first, pe_action_t *then) -{ - bool interleave = FALSE; - pe_resource_t *rsc = NULL; - const char *interleave_s = NULL; - - if(first->rsc == NULL || then->rsc == NULL) { - crm_trace("Not interleaving %s with %s (both must be resources)", first->uuid, then->uuid); - return FALSE; - } else if(first->rsc == then->rsc) { - crm_trace("Not interleaving %s with %s (must belong to different resources)", first->uuid, then->uuid); - return FALSE; - } else if(first->rsc->variant < pe_clone || then->rsc->variant < pe_clone) { - crm_trace("Not interleaving %s with %s (both sides must be clones or bundles)", first->uuid, then->uuid); - return FALSE; - } - - if (pcmk__ends_with(then->uuid, "_stop_0") - || pcmk__ends_with(then->uuid, "_demote_0")) { - rsc = first->rsc; - } else { - rsc = then->rsc; - } - - interleave_s = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE); - interleave = crm_is_true(interleave_s); - crm_trace("Interleave %s -> %s: %s (based on %s)", - first->uuid, then->uuid, interleave ? "yes" : "no", rsc->id); - - return interleave; -} - -/*! - * \internal - * \brief Update two actions according to an ordering between them - * - * Given information about an ordering of two actions, update the actions' - * flags (and runnable_before members if appropriate) as appropriate for the - * ordering. In some cases, the ordering could be disabled as well. - * - * \param[in,out] first 'First' action in an ordering - * \param[in,out] then 'Then' action in an ordering - * \param[in] node If not NULL, limit scope of ordering to this node - * (only used when interleaving instances) - * \param[in] flags Action flags for \p first for ordering purposes - * \param[in] filter Action flags to limit scope of certain updates (may - * include pe_action_optional to affect only mandatory - * actions, and pe_action_runnable to affect only - * runnable actions) - * \param[in] type Group of enum pe_ordering flags to apply - * \param[in,out] data_set Cluster working set - * - * \return Group of enum pcmk__updated flags indicating what was updated - */ -uint32_t -pcmk__multi_update_actions(pe_action_t *first, pe_action_t *then, - const pe_node_t *node, uint32_t flags, - uint32_t filter, uint32_t type, - pe_working_set_t *data_set) -{ - uint32_t changed = pcmk__updated_none; - - crm_trace("%s -> %s", first->uuid, then->uuid); - - if(can_interleave_actions(first, then)) { - changed = multi_update_interleave_actions(first, then, node, filter, - type, data_set); - - } else if(then->rsc) { - GList *gIter = NULL; - GList *children = NULL; - - // Handle the 'primitive' ordering case - changed |= pcmk__update_ordered_actions(first, then, node, flags, - filter, type, data_set); - - // Now any children (or containers in the case of a bundle) - children = get_containers_or_children(then->rsc); - for (gIter = children; gIter != NULL; gIter = gIter->next) { - pe_resource_t *then_child = (pe_resource_t *) gIter->data; - uint32_t then_child_changed = pcmk__updated_none; - pe_action_t *then_child_action = find_first_action(then_child->actions, NULL, then->task, node); - - if (then_child_action) { - uint32_t then_child_flags = then_child->cmds->action_flags(then_child_action, - node); - - if (pcmk_is_set(then_child_flags, pe_action_runnable)) { - then_child_changed |= then_child->cmds->update_ordered_actions(first, - then_child_action, - node, - flags, - filter, - type, - data_set); - } - changed |= then_child_changed; - if (pcmk_is_set(then_child_changed, pcmk__updated_then)) { - for (GList *lpc = then_child_action->actions_after; lpc != NULL; lpc = lpc->next) { - pe_action_wrapper_t *next = (pe_action_wrapper_t *) lpc->data; - - pcmk__update_action_for_orderings(next->action, - data_set); - } - } - } - } - - if(children != then->rsc->children) { - g_list_free(children); - } - } - return changed; -} - void pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); pcmk__apply_location(rsc, constraint); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (replica->container) { replica->container->cmds->apply_location(replica->container, constraint); } if (replica->ip) { replica->ip->cmds->apply_location(replica->ip, constraint); } } if (bundle_data->child && ((constraint->role_filter == RSC_ROLE_UNPROMOTED) || (constraint->role_filter == RSC_ROLE_PROMOTED))) { bundle_data->child->cmds->apply_location(bundle_data->child, constraint); bundle_data->child->rsc_location = g_list_prepend(bundle_data->child->rsc_location, constraint); } } /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void pcmk__bundle_expand(pe_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); if (bundle_data->child) { bundle_data->child->cmds->add_actions_to_graph(bundle_data->child); } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->remote && replica->container && pe__bundle_needs_remote_name(replica->remote)) { /* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that * run pacemaker-remoted inside, without needing a separate IP for * the container. This is done by configuring the inner remote's * connection host as the magic string "#uname", then * replacing it with the underlying host when needed. */ xmlNode *nvpair = get_xpath_object("//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']", replica->remote->xml, LOG_ERR); const char *calculated_addr = NULL; // Replace the value in replica->remote->xml (if appropriate) calculated_addr = pe__add_bundle_remote_name(replica->remote, rsc->cluster, nvpair, "value"); if (calculated_addr) { /* Since this is for the bundle as a resource, and not any * particular action, replace the value in the default * parameters (not evaluated for node). create_graph_action() * will grab it from there to replace it in node-evaluated * parameters. */ GHashTable *params = pe_rsc_params(replica->remote, NULL, rsc->cluster); g_hash_table_replace(params, strdup(XML_RSC_ATTR_REMOTE_RA_ADDR), strdup(calculated_addr)); } else { /* The only way to get here is if the remote connection is * neither currently running nor scheduled to run. That means we * won't be doing any operations that require addr (only start * requires it; we additionally use it to compare digests when * unpacking status, promote, and migrate_from history, but * that's already happened by this point). */ crm_info("Unable to determine address for bundle %s remote connection", rsc->id); } } if (replica->ip) { replica->ip->cmds->add_actions_to_graph(replica->ip); } if (replica->container) { replica->container->cmds->add_actions_to_graph(replica->container); } if (replica->remote) { replica->remote->cmds->add_actions_to_graph(replica->remote); } } } /*! * \internal * * \brief Schedule any probes needed for a resource on a node * * \param[in,out] rsc Resource to create probe for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node) { bool any_created = false; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return false); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if ((replica->ip != NULL) && replica->ip->cmds->create_probe(replica->ip, node)) { any_created = true; } if ((replica->child != NULL) && (node->details == replica->node->details) && replica->child->cmds->create_probe(replica->child, node)) { any_created = true; } if ((replica->container != NULL) && replica->container->cmds->create_probe(replica->container, node)) { any_created = true; /* If we're limited to one replica per host (due to * the lack of an IP range probably), then we don't * want any of our peer containers starting until * we've established that no other copies are already * running. * * Partly this is to ensure that nreplicas_per_host is * observed, but also to ensure that the containers * don't fail to start because the necessary port * mappings (which won't include an IP for uniqueness) * are already taken */ for (GList *tIter = bundle_data->replicas; tIter && (bundle_data->nreplicas_per_host == 1); tIter = tIter->next) { pe__bundle_replica_t *other = tIter->data; if ((other != replica) && (other != NULL) && (other->container != NULL)) { pcmk__new_ordering(replica->container, pcmk__op_key(replica->container->id, RSC_STATUS, 0), NULL, other->container, pcmk__op_key(other->container->id, RSC_START, 0), NULL, pe_order_optional|pe_order_same_node, rsc->cluster); } } } if ((replica->container != NULL) && (replica->remote != NULL) && replica->remote->cmds->create_probe(replica->remote, node)) { /* Do not probe the remote resource until we know where the * container is running. This is required for REMOTE_CONTAINER_HACK * to correctly probe remote resources. */ char *probe_uuid = pcmk__op_key(replica->remote->id, RSC_STATUS, 0); pe_action_t *probe = find_first_action(replica->remote->actions, probe_uuid, NULL, node); free(probe_uuid); if (probe != NULL) { any_created = true; crm_trace("Ordering %s probe on %s", replica->remote->id, pe__node_name(node)); pcmk__new_ordering(replica->container, pcmk__op_key(replica->container->id, RSC_START, 0), NULL, replica->remote, NULL, probe, pe_order_probe, rsc->cluster); } } } return any_created; } void pcmk__output_bundle_actions(pe_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->ip != NULL) { replica->ip->cmds->output_actions(replica->ip); } if (replica->container != NULL) { replica->container->cmds->output_actions(replica->container); } if (replica->remote != NULL) { replica->remote->cmds->output_actions(replica->remote); } if (replica->child != NULL) { replica->child->cmds->output_actions(replica->child); } } } // Bundle implementation of resource_alloc_functions_t:add_utilization() void pcmk__bundle_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { pe__bundle_variant_data_t *bundle_data = NULL; pe__bundle_replica_t *replica = NULL; if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } get_bundle_variant_data(bundle_data, rsc); if (bundle_data->replicas == NULL) { return; } /* All bundle replicas are identical, so using the utilization of the first * is sufficient for any. Only the implicit container resource can have * utilization values. */ replica = (pe__bundle_replica_t *) bundle_data->replicas->data; if (replica->container != NULL) { replica->container->cmds->add_utilization(replica->container, orig_rsc, all_rscs, utilization); } } // Bundle implementation of resource_alloc_functions_t:shutdown_lock() void pcmk__bundle_shutdown_lock(pe_resource_t *rsc) { return; // Bundles currently don't support shutdown locks } diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c index ca325cebeb..1f90ed4335 100644 --- a/lib/pacemaker/pcmk_sched_instances.c +++ b/lib/pacemaker/pcmk_sched_instances.c @@ -1,698 +1,997 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ /* This file is intended for code usable with both clone instances and bundle * replica containers. */ #include #include #include #include "libpacemaker_private.h" static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all); /*! * \internal * \brief Check whether a node is allowed to run an instance * * \param[in] instance Clone instance or bundle container to check * \param[in] node Node to check * \param[in] max_per_node Maximum number of instances allowed to run on a node * * \return true if \p node is allowed to run \p instance, otherwise false */ static bool can_run_instance(const pe_resource_t *instance, const pe_node_t *node, int max_per_node) { pe_node_t *allowed_node = NULL; if (pcmk_is_set(instance->flags, pe_rsc_orphan)) { pe_rsc_trace(instance, "%s cannot run on %s: orphaned", instance->id, pe__node_name(node)); return false; } if (!pcmk__node_available(node, false, false)) { pe_rsc_trace(instance, "%s cannot run on %s: node cannot run resources", instance->id, pe__node_name(node)); return false; } allowed_node = pcmk__top_allowed_node(instance, node); if (allowed_node == NULL) { crm_warn("%s cannot run on %s: node not allowed", instance->id, pe__node_name(node)); return false; } if (allowed_node->weight < 0) { pe_rsc_trace(instance, "%s cannot run on %s: parent score is %s there", instance->id, pe__node_name(node), pcmk_readable_score(allowed_node->weight)); return false; } if (allowed_node->count >= max_per_node) { pe_rsc_trace(instance, "%s cannot run on %s: node already has %d instance%s", instance->id, pe__node_name(node), max_per_node, pcmk__plural_s(max_per_node)); return false; } pe_rsc_trace(instance, "%s can run on %s (%d already running)", instance->id, pe__node_name(node), allowed_node->count); return true; } /*! * \internal * \brief Ban a clone instance or bundle replica from unavailable allowed nodes * * \param[in,out] instance Clone instance or bundle replica to ban * \param[in] max_per_node Maximum instances allowed to run on a node */ static void ban_unavailable_allowed_nodes(pe_resource_t *instance, int max_per_node) { if (instance->allowed_nodes != NULL) { GHashTableIter iter; const pe_node_t *allowed_node = NULL; g_hash_table_iter_init(&iter, instance->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &allowed_node)) { if (!can_run_instance(instance, allowed_node, max_per_node)) { // Ban instance (and all its children) from node common_update_score(instance, allowed_node->details->id, -INFINITY); } } } } /*! * \internal * \brief Choose a node for an instance * * \param[in,out] instance Clone instance or bundle replica container * \param[in] prefer If not NULL, attempt early assignment to this * node, if still the best choice; otherwise, * perform final assignment * \param[in] all_coloc If true (indicating that there are more * available nodes than instances), add all parent * colocations to instance, otherwise add only * negative (and for "this with" colocations, * infinite) colocations to avoid needless * shuffling of instances among nodes * \param[in] max_per_node Assign at most this many instances to one node * * \return true if \p instance could be assigned to a node, otherwise false */ static bool assign_instance(pe_resource_t *instance, const pe_node_t *prefer, bool all_coloc, int max_per_node) { pe_node_t *chosen = NULL; pe_node_t *allowed = NULL; CRM_ASSERT(instance != NULL); pe_rsc_trace(instance, "Assigning %s (preferring %s, using %s parent colocations)", instance->id, ((prefer == NULL)? "no node" : prefer->details->uname), (all_coloc? "all" : "essential")); if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) { // Instance is already assigned return instance->fns->location(instance, NULL, FALSE) != NULL; } if (pcmk_is_set(instance->flags, pe_rsc_allocating)) { pe_rsc_debug(instance, "Assignment loop detected involving %s colocations", instance->id); return false; } if (prefer != NULL) { // Possible early assignment to preferred node // Get preferred node with instance's scores allowed = g_hash_table_lookup(instance->allowed_nodes, prefer->details->id); if ((allowed == NULL) || (allowed->weight < 0)) { pe_rsc_trace(instance, "Not assigning %s to preferred node %s: unavailable", instance->id, pe__node_name(prefer)); return false; } } ban_unavailable_allowed_nodes(instance, max_per_node); if (prefer == NULL) { // Final assignment chosen = instance->cmds->assign(instance, NULL); } else { // Possible early assignment to preferred node GHashTable *backup = pcmk__copy_node_table(instance->allowed_nodes); chosen = instance->cmds->assign(instance, prefer); // Revert nodes if preferred node won't be assigned if ((chosen != NULL) && (chosen->details != prefer->details)) { crm_info("Not assigning %s to preferred node %s: %s is better", instance->id, pe__node_name(prefer), pe__node_name(chosen)); g_hash_table_destroy(instance->allowed_nodes); instance->allowed_nodes = backup; pcmk__unassign_resource(instance); chosen = NULL; } else if (backup != NULL) { g_hash_table_destroy(backup); } } // The parent tracks how many instances have been assigned to each node if (chosen != NULL) { allowed = pcmk__top_allowed_node(instance, chosen); if (allowed == NULL) { /* The instance is allowed on the node, but its parent isn't. This * shouldn't be possible if the resource is managed, and we won't be * able to limit the number of instances assigned to the node. */ CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pe_rsc_managed)); } else { allowed->count++; } } return chosen != NULL; } static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all) { GList *gIter = NULL; gIter = rsc->rsc_cons; for (; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; if (all || cons->score < 0 || cons->score == INFINITY) { pcmk__add_this_with(child, cons); } } gIter = rsc->rsc_cons_lhs; for (; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(cons, child)) { continue; } if (all || cons->score < 0) { pcmk__add_with_this(child, cons); } } } /*! * \internal * \brief Reset the node counts of a resource's allowed nodes to zero * * \param[in,out] rsc Resource to reset * * \return Number of nodes that are available to run resources */ static unsigned int reset_allowed_node_counts(pe_resource_t *rsc) { unsigned int available_nodes = 0; pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { node->count = 0; if (pcmk__node_available(node, false, false)) { available_nodes++; } } return available_nodes; } /*! * \internal * \brief Check whether an instance has a preferred node * * \param[in] rsc Clone or bundle being assigned (for logs only) * \param[in] instance Clone instance or bundle replica container * \param[in] optimal_per_node Optimal number of instances per node * * \return Instance's current node if still available, otherwise NULL */ static const pe_node_t * preferred_node(const pe_resource_t *rsc, const pe_resource_t *instance, int optimal_per_node) { const pe_node_t *node = NULL; const pe_node_t *parent_node = NULL; // Check whether instance is active, healthy, and not yet assigned if ((instance->running_on == NULL) || !pcmk_is_set(instance->flags, pe_rsc_provisional) || pcmk_is_set(instance->flags, pe_rsc_failed)) { return NULL; } // Check whether instance's current node can run resources node = pe__current_node(instance); if (!pcmk__node_available(node, true, false)) { pe_rsc_trace(rsc, "Not assigning %s to %s early (unavailable)", instance->id, pe__node_name(node)); return NULL; } // Check whether node already has optimal number of instances assigned parent_node = pcmk__top_allowed_node(instance, node); if ((parent_node != NULL) && (parent_node->count >= optimal_per_node)) { pe_rsc_trace(rsc, "Not assigning %s to %s early " "(optimal instances already assigned)", instance->id, pe__node_name(node)); return NULL; } return node; } /*! * \internal * \brief Assign collective instances to nodes * * \param[in,out] collective Clone or bundle resource being assigned * \param[in,out] instances List of clone instances or bundle containers * \param[in] max_total Maximum instances to assign in total * \param[in] max_per_node Maximum instances to assign to any one node */ void pcmk__assign_instances(pe_resource_t *collective, GList *instances, int max_total, int max_per_node) { // Reuse node count to track number of assigned instances unsigned int available_nodes = reset_allowed_node_counts(collective); /* Include finite positive preferences of the collective's * colocation dependents only if not every node will get an instance. */ bool all_coloc = (max_total < available_nodes); int optimal_per_node = 0; int assigned = 0; GList *iter = NULL; pe_resource_t *instance = NULL; const pe_node_t *current = NULL; if (available_nodes > 0) { optimal_per_node = max_total / available_nodes; } if (optimal_per_node < 1) { optimal_per_node = 1; } pe_rsc_debug(collective, "Assigning up to %d %s instance%s to up to %d node%s " "(at most %d per host, %d optimal)", max_total, collective->id, pcmk__plural_s(max_total), available_nodes, pcmk__plural_s(available_nodes), max_per_node, optimal_per_node); // Assign as many instances as possible to their current location for (iter = instances; (iter != NULL) && (assigned < max_total); iter = iter->next) { instance = (pe_resource_t *) iter->data; append_parent_colocation(instance->parent, instance, all_coloc); current = preferred_node(collective, instance, optimal_per_node); if ((current != NULL) && assign_instance(instance, current, all_coloc, max_per_node)) { pe_rsc_trace(collective, "Assigned %s to current node %s", instance->id, pe__node_name(current)); assigned++; } } pe_rsc_trace(collective, "Assigned %d of %d instance%s to current node", assigned, max_total, pcmk__plural_s(max_total)); for (iter = instances; iter != NULL; iter = iter->next) { instance = (pe_resource_t *) iter->data; if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) { continue; // Already assigned } if (instance->running_on != NULL) { current = pe__current_node(instance); if (pcmk__top_allowed_node(instance, current) == NULL) { const char *unmanaged = ""; if (!pcmk_is_set(instance->flags, pe_rsc_managed)) { unmanaged = "Unmanaged resource "; } crm_notice("%s%s is running on %s which is no longer allowed", unmanaged, instance->id, pe__node_name(current)); } } if (assigned >= max_total) { pe_rsc_debug(collective, "Not assigning %s because maximum %d instances " "already assigned", instance->id, max_total); resource_location(instance, NULL, -INFINITY, "collective_limit_reached", collective->cluster); } else if (assign_instance(instance, NULL, all_coloc, max_per_node)) { assigned++; } } pe_rsc_debug(collective, "Assigned %d of %d possible instance%s of %s", assigned, max_total, pcmk__plural_s(max_total), collective->id); } enum instance_state { instance_starting = (1 << 0), instance_stopping = (1 << 1), instance_restarting = (1 << 2), instance_active = (1 << 3), }; /*! * \internal * \brief Check whether an instance is active, starting, and/or stopping * * \param[in] instance Clone instance or bundle replica container * \param[in,out] state Whether any instance is starting, stopping, etc. */ static void check_instance_state(const pe_resource_t *instance, uint32_t *state) { const GList *iter = NULL; uint32_t instance_state = 0; // State of just this instance // If the instance has its own children (a cloned group), check each one if (instance->children != NULL) { for (iter = instance->children; iter != NULL; iter = iter->next) { check_instance_state((const pe_resource_t *) iter->data, state); } return; } if (instance->running_on != NULL) { instance_state |= instance_active; } // Check each of the instance's actions for runnable start or stop for (iter = instance->actions; (iter != NULL) && !pcmk_all_flags_set(instance_state, instance_starting |instance_stopping); iter = iter->next) { const pe_action_t *action = (const pe_action_t *) iter->data; if (pcmk__str_eq(RSC_START, action->task, pcmk__str_none)) { if (!pcmk_is_set(action->flags, pe_action_optional) && pcmk_is_set(action->flags, pe_action_runnable)) { pe_rsc_trace(instance, "Instance is starting due to %s", action->uuid); instance_state |= instance_starting; } else { pe_rsc_trace(instance, "%s doesn't affect %s state (unrunnable)", action->uuid, instance->id); } } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_none)) { if (!pcmk_is_set(action->flags, pe_action_optional) && pcmk_any_flags_set(action->flags, // Pseudo-stops are implied by fencing pe_action_pseudo|pe_action_runnable)) { pe_rsc_trace(instance, "Instance is stopping due to %s", action->uuid); instance_state |= instance_stopping; } else { pe_rsc_trace(instance, "%s doesn't affect %s state (unrunnable)", action->uuid, instance->id); } } } if (pcmk_all_flags_set(instance_state, instance_starting|instance_stopping)) { instance_state |= instance_restarting; } *state |= instance_state; } /*! * \internal * \brief Create actions for collective resource instances * * \param[in,out] collective Clone or bundle resource to create actions for * \param[in,out] instances List of clone instances or bundle containers * \param[out] start_notify If not NULL, create start notification actions * \param[out] stop_notify If not NULL, create stop notification actions */ void pcmk__create_instance_actions(pe_resource_t *collective, GList *instances, notify_data_t **start_notify, notify_data_t **stop_notify) { uint32_t state = 0; pe_action_t *stop = NULL; pe_action_t *stopped = NULL; pe_action_t *start = NULL; pe_action_t *started = NULL; pe_rsc_trace(collective, "Creating collective instance actions for %s", collective->id); // Create actions for each instance appropriate to its variant for (GList *iter = instances; iter != NULL; iter = iter->next) { pe_resource_t *instance = (pe_resource_t *) iter->data; instance->cmds->create_actions(instance); check_instance_state(instance, &state); } // Create pseudo-actions for rsc start and started start = pe__new_rsc_pseudo_action(collective, RSC_START, !pcmk_is_set(state, instance_starting), true); started = pe__new_rsc_pseudo_action(collective, RSC_STARTED, !pcmk_is_set(state, instance_starting), false); started->priority = INFINITY; if (pcmk_any_flags_set(state, instance_active|instance_starting)) { pe__set_action_flags(started, pe_action_runnable); } if ((start_notify != NULL) && (*start_notify == NULL)) { *start_notify = pe__clone_notif_pseudo_ops(collective, RSC_START, start, started); } // Create pseudo-actions for rsc stop and stopped stop = pe__new_rsc_pseudo_action(collective, RSC_STOP, !pcmk_is_set(state, instance_stopping), true); stopped = pe__new_rsc_pseudo_action(collective, RSC_STOPPED, !pcmk_is_set(state, instance_stopping), true); stopped->priority = INFINITY; if (!pcmk_is_set(state, instance_restarting)) { pe__set_action_flags(stop, pe_action_migrate_runnable); } if ((stop_notify != NULL) && (*stop_notify == NULL)) { *stop_notify = pe__clone_notif_pseudo_ops(collective, RSC_STOP, stop, stopped); if ((start_notify != NULL) && (*start_notify != NULL) && (*stop_notify != NULL)) { order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional); } } } +static inline GList * +get_containers_or_children(const pe_resource_t *rsc) +{ + return (rsc->variant == pe_container)? + pcmk__bundle_containers(rsc) : rsc->children; +} + gboolean is_child_compatible(const pe_resource_t *child_rsc, const pe_node_t *local_node, enum rsc_role_e filter, gboolean current) { pe_node_t *node = NULL; enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); CRM_CHECK(child_rsc && local_node, return FALSE); if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { /* We only want instances that haven't failed */ node = child_rsc->fns->location(child_rsc, NULL, current); } if (filter != RSC_ROLE_UNKNOWN && next_role != filter) { crm_trace("Filtered %s", child_rsc->id); return FALSE; } if (node && (node->details == local_node->details)) { return TRUE; } else if (node) { crm_trace("%s - %s vs %s", child_rsc->id, pe__node_name(node), pe__node_name(local_node)); } else { crm_trace("%s - not allocated %d", child_rsc->id, current); } return FALSE; } +static pe_resource_t * +find_compatible_child_by_node(const pe_resource_t *local_child, + const pe_node_t *local_node, + const pe_resource_t *rsc, enum rsc_role_e filter, + gboolean current) +{ + GList *gIter = NULL; + GList *children = NULL; + + if (local_node == NULL) { + crm_err("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id); + return NULL; + } + + crm_trace("Looking for compatible child from %s for %s on %s", + local_child->id, rsc->id, pe__node_name(local_node)); + + children = get_containers_or_children(rsc); + for (gIter = children; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; + + if(is_child_compatible(child_rsc, local_node, filter, current)) { + crm_trace("Pairing %s with %s on %s", + local_child->id, child_rsc->id, pe__node_name(local_node)); + return child_rsc; + } + } + + crm_trace("Can't pair %s with %s", local_child->id, rsc->id); + if(children != rsc->children) { + g_list_free(children); + } + return NULL; +} + pe_resource_t * find_compatible_child(const pe_resource_t *local_child, const pe_resource_t *rsc, enum rsc_role_e filter, gboolean current) { pe_resource_t *pair = NULL; GList *gIter = NULL; GList *scratch = NULL; pe_node_t *local_node = NULL; local_node = local_child->fns->location(local_child, NULL, current); if (local_node) { return find_compatible_child_by_node(local_child, local_node, rsc, filter, current); } scratch = g_hash_table_get_values(local_child->allowed_nodes); scratch = pcmk__sort_nodes(scratch, NULL); gIter = scratch; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pair = find_compatible_child_by_node(local_child, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id); done: g_list_free(scratch); return pair; } +static uint32_t +multi_update_interleave_actions(pe_action_t *first, pe_action_t *then, + const pe_node_t *node, uint32_t filter, + uint32_t type, pe_working_set_t *data_set) +{ + GList *gIter = NULL; + GList *children = NULL; + gboolean current = FALSE; + uint32_t changed = pcmk__updated_none; + + /* Fix this - lazy */ + if (pcmk__ends_with(first->uuid, "_stopped_0") + || pcmk__ends_with(first->uuid, "_demoted_0")) { + current = TRUE; + } + + children = get_containers_or_children(then->rsc); + for (gIter = children; gIter != NULL; gIter = gIter->next) { + pe_resource_t *then_child = gIter->data; + pe_resource_t *first_child = find_compatible_child(then_child, + first->rsc, + RSC_ROLE_UNKNOWN, + current); + if (first_child == NULL && current) { + crm_trace("Ignore"); + + } else if (first_child == NULL) { + crm_debug("No match found for %s (%d / %s / %s)", then_child->id, current, first->uuid, then->uuid); + + /* Me no like this hack - but what else can we do? + * + * If there is no-one active or about to be active + * on the same node as then_child, then they must + * not be allowed to start + */ + if (pcmk_any_flags_set(type, pe_order_runnable_left|pe_order_implies_then) /* Mandatory */ ) { + pe_rsc_info(then->rsc, "Inhibiting %s from being active", then_child->id); + if (pcmk__assign_resource(then_child, NULL, true)) { + pcmk__set_updated_flags(changed, first, pcmk__updated_then); + } + } + + } else { + pe_action_t *first_action = NULL; + pe_action_t *then_action = NULL; + + enum action_tasks task = clone_child_action(first); + const char *first_task = task2text(task); + + const pe_resource_t *first_rsc = NULL; + const pe_resource_t *then_rsc = NULL; + + first_rsc = pcmk__get_rsc_in_container(first_child, node); + if ((first_rsc != NULL) && strstr(first->task, "stop")) { + /* Except for 'stopped' we should be looking at the + * in-container resource, actions for the child will + * happen later and are therefor more likely to align + * with the user's intent. + */ + first_action = find_first_action(first_rsc->actions, NULL, + task2text(task), node); + } else { + first_action = find_first_action(first_child->actions, NULL, task2text(task), node); + } + + then_rsc = pcmk__get_rsc_in_container(then_child, node); + if ((then_rsc != NULL) && strstr(then->task, "mote")) { + /* Promote/demote actions will never be found for the + * container resource, look in the child instead + * + * Alternatively treat: + * 'XXXX then promote YYYY' as 'XXXX then start container for YYYY', and + * 'demote XXXX then stop YYYY' as 'stop container for XXXX then stop YYYY' + */ + then_action = find_first_action(then_rsc->actions, NULL, + then->task, node); + } else { + then_action = find_first_action(then_child->actions, NULL, then->task, node); + } + + if (first_action == NULL) { + if (!pcmk_is_set(first_child->flags, pe_rsc_orphan) + && !pcmk__str_any_of(first_task, RSC_STOP, RSC_DEMOTE, NULL)) { + crm_err("Internal error: No action found for %s in %s (first)", + first_task, first_child->id); + + } else { + crm_trace("No action found for %s in %s%s (first)", + first_task, first_child->id, + pcmk_is_set(first_child->flags, pe_rsc_orphan)? " (ORPHAN)" : ""); + } + continue; + } + + /* We're only interested if 'then' is neither stopping nor being demoted */ + if (then_action == NULL) { + if (!pcmk_is_set(then_child->flags, pe_rsc_orphan) + && !pcmk__str_any_of(then->task, RSC_STOP, RSC_DEMOTE, NULL)) { + crm_err("Internal error: No action found for %s in %s (then)", + then->task, then_child->id); + + } else { + crm_trace("No action found for %s in %s%s (then)", + then->task, then_child->id, + pcmk_is_set(then_child->flags, pe_rsc_orphan)? " (ORPHAN)" : ""); + } + continue; + } + + if (order_actions(first_action, then_action, type)) { + crm_debug("Created constraint for %s (%d) -> %s (%d) %.6x", + first_action->uuid, + pcmk_is_set(first_action->flags, pe_action_optional), + then_action->uuid, + pcmk_is_set(then_action->flags, pe_action_optional), + type); + pcmk__set_updated_flags(changed, first, + pcmk__updated_first|pcmk__updated_then); + } + if(first_action && then_action) { + changed |= then_child->cmds->update_ordered_actions(first_action, + then_action, + node, + first_child->cmds->action_flags(first_action, node), + filter, + type, + data_set); + } else { + crm_err("Nothing found either for %s (%p) or %s (%p) %s", + first_child->id, first_action, + then_child->id, then_action, task2text(task)); + } + } + } + + if(children != then->rsc->children) { + g_list_free(children); + } + return changed; +} + +static bool +can_interleave_actions(pe_action_t *first, pe_action_t *then) +{ + bool interleave = FALSE; + pe_resource_t *rsc = NULL; + const char *interleave_s = NULL; + + if(first->rsc == NULL || then->rsc == NULL) { + crm_trace("Not interleaving %s with %s (both must be resources)", first->uuid, then->uuid); + return FALSE; + } else if(first->rsc == then->rsc) { + crm_trace("Not interleaving %s with %s (must belong to different resources)", first->uuid, then->uuid); + return FALSE; + } else if(first->rsc->variant < pe_clone || then->rsc->variant < pe_clone) { + crm_trace("Not interleaving %s with %s (both sides must be clones or bundles)", first->uuid, then->uuid); + return FALSE; + } + + if (pcmk__ends_with(then->uuid, "_stop_0") + || pcmk__ends_with(then->uuid, "_demote_0")) { + rsc = first->rsc; + } else { + rsc = then->rsc; + } + + interleave_s = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE); + interleave = crm_is_true(interleave_s); + crm_trace("Interleave %s -> %s: %s (based on %s)", + first->uuid, then->uuid, interleave ? "yes" : "no", rsc->id); + + return interleave; +} + +/*! + * \internal + * \brief Update two actions according to an ordering between them + * + * Given information about an ordering of two actions, update the actions' + * flags (and runnable_before members if appropriate) as appropriate for the + * ordering. In some cases, the ordering could be disabled as well. + * + * \param[in,out] first 'First' action in an ordering + * \param[in,out] then 'Then' action in an ordering + * \param[in] node If not NULL, limit scope of ordering to this node + * (only used when interleaving instances) + * \param[in] flags Action flags for \p first for ordering purposes + * \param[in] filter Action flags to limit scope of certain updates (may + * include pe_action_optional to affect only mandatory + * actions, and pe_action_runnable to affect only + * runnable actions) + * \param[in] type Group of enum pe_ordering flags to apply + * \param[in,out] data_set Cluster working set + * + * \return Group of enum pcmk__updated flags indicating what was updated + */ +uint32_t +pcmk__multi_update_actions(pe_action_t *first, pe_action_t *then, + const pe_node_t *node, uint32_t flags, + uint32_t filter, uint32_t type, + pe_working_set_t *data_set) +{ + uint32_t changed = pcmk__updated_none; + + crm_trace("%s -> %s", first->uuid, then->uuid); + + if(can_interleave_actions(first, then)) { + changed = multi_update_interleave_actions(first, then, node, filter, + type, data_set); + + } else if(then->rsc) { + GList *gIter = NULL; + GList *children = NULL; + + // Handle the 'primitive' ordering case + changed |= pcmk__update_ordered_actions(first, then, node, flags, + filter, type, data_set); + + // Now any children (or containers in the case of a bundle) + children = get_containers_or_children(then->rsc); + for (gIter = children; gIter != NULL; gIter = gIter->next) { + pe_resource_t *then_child = (pe_resource_t *) gIter->data; + uint32_t then_child_changed = pcmk__updated_none; + pe_action_t *then_child_action = find_first_action(then_child->actions, NULL, then->task, node); + + if (then_child_action) { + uint32_t then_child_flags = then_child->cmds->action_flags(then_child_action, + node); + + if (pcmk_is_set(then_child_flags, pe_action_runnable)) { + then_child_changed |= then_child->cmds->update_ordered_actions(first, + then_child_action, + node, + flags, + filter, + type, + data_set); + } + changed |= then_child_changed; + if (pcmk_is_set(then_child_changed, pcmk__updated_then)) { + for (GList *lpc = then_child_action->actions_after; lpc != NULL; lpc = lpc->next) { + pe_action_wrapper_t *next = (pe_action_wrapper_t *) lpc->data; + + pcmk__update_action_for_orderings(next->action, + data_set); + } + } + } + } + + if(children != then->rsc->children) { + g_list_free(children); + } + } + return changed; +} + enum action_tasks clone_child_action(pe_action_t * action) { enum action_tasks result = no_action; pe_resource_t *child = (pe_resource_t *) action->rsc->children->data; if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) { /* Find the action we're notifying about instead */ int stop = 0; char *key = action->uuid; int lpc = strlen(key); for (; lpc > 0; lpc--) { if (key[lpc] == '_' && stop == 0) { stop = lpc; } else if (key[lpc] == '_') { char *task_mutable = NULL; lpc++; task_mutable = strdup(key + lpc); task_mutable[stop - lpc] = 0; crm_trace("Extracted action '%s' from '%s'", task_mutable, key); result = get_complex_task(child, task_mutable, TRUE); free(task_mutable); break; } } } else { result = get_complex_task(child, action->task, TRUE); } return result; } #define pe__clear_action_summary_flags(flags, action, flag) do { \ flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Action summary", action->rsc->id, \ flags, flag, #flag); \ } while (0) enum pe_action_flags summary_action_flags(pe_action_t *action, GList *children, const pe_node_t *node) { GList *gIter = NULL; gboolean any_runnable = FALSE; gboolean check_runnable = TRUE; enum action_tasks task = clone_child_action(action); enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); const char *task_s = task2text(task); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_action_t *child_action = NULL; pe_resource_t *child = (pe_resource_t *) gIter->data; child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node); pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id, pe__node_name(node), child_action?child_action->uuid:"NA"); if (child_action) { enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); if (pcmk_is_set(flags, pe_action_optional) && !pcmk_is_set(child_flags, pe_action_optional)) { pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid, child_action->uuid); pe__clear_action_summary_flags(flags, action, pe_action_optional); pe__clear_action_flags(action, pe_action_optional); } if (pcmk_is_set(child_flags, pe_action_runnable)) { any_runnable = TRUE; } } } if (check_runnable && any_runnable == FALSE) { pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid); pe__clear_action_summary_flags(flags, action, pe_action_runnable); if (node == NULL) { pe__clear_action_flags(action, pe_action_runnable); } } return flags; }