diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h index 0c43e72cee..e5b020a548 100644 --- a/lib/pacemaker/libpacemaker_private.h +++ b/lib/pacemaker/libpacemaker_private.h @@ -1,433 +1,436 @@ /* * Copyright 2021-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__LIBPACEMAKER_PRIVATE__H # define PCMK__LIBPACEMAKER_PRIVATE__H /* This header is for the sole use of libpacemaker, so that functions can be * declared with G_GNUC_INTERNAL for efficiency. */ #include // pe_action_t, pe_node_t, pe_working_set_t // Actions (pcmk_sched_actions.c) G_GNUC_INTERNAL void pcmk__update_action_for_orderings(pe_action_t *action, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__log_action(const char *pre_text, pe_action_t *action, bool details); G_GNUC_INTERNAL pe_action_t *pcmk__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task, bool optional, bool runnable); G_GNUC_INTERNAL pe_action_t *pcmk__new_cancel_action(pe_resource_t *rsc, const char *name, guint interval_ms, pe_node_t *node); G_GNUC_INTERNAL pe_action_t *pcmk__new_shutdown_action(pe_node_t *node, pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__action_locks_rsc_to_node(const pe_action_t *action); G_GNUC_INTERNAL void pcmk__deduplicate_action_inputs(pe_action_t *action); G_GNUC_INTERNAL void pcmk__output_actions(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op); G_GNUC_INTERNAL void pcmk__handle_rsc_config_changes(pe_working_set_t *data_set); // Producing transition graphs (pcmk_graph_producer.c) G_GNUC_INTERNAL bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action, pe_action_wrapper_t *input); G_GNUC_INTERNAL void pcmk__add_action_to_graph(pe_action_t *action, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__create_graph(pe_working_set_t *data_set); // Fencing (pcmk_sched_fencing.c) G_GNUC_INTERNAL void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__fence_guest(pe_node_t *node, pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__node_unfenced(pe_node_t *node); G_GNUC_INTERNAL bool pcmk__is_unfence_device(const pe_resource_t *rsc, const pe_working_set_t *data_set); // Injected scheduler inputs (pcmk_sched_injections.c) void pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib, pcmk_injections_t *injections); // Constraints of any type (pcmk_sched_constraints.c) G_GNUC_INTERNAL pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id); G_GNUC_INTERNAL xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__valid_resource_or_tag(pe_working_set_t *data_set, const char *id, pe_resource_t **rsc, pe_tag_t **tag); G_GNUC_INTERNAL bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr, bool convert_rsc, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__create_internal_constraints(pe_working_set_t *data_set); // Location constraints G_GNUC_INTERNAL void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc, int node_weight, const char *discover_mode, pe_node_t *foo_node, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_locations(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_location(pe__location_t *constraint, pe_resource_t *rsc); // Colocation constraints enum pcmk__coloc_affects { pcmk__coloc_affects_nothing = 0, pcmk__coloc_affects_location, pcmk__coloc_affects_role, }; G_GNUC_INTERNAL enum pcmk__coloc_affects pcmk__colocation_affects(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, bool preview); G_GNUC_INTERNAL void pcmk__apply_coloc_to_weights(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint); G_GNUC_INTERNAL void pcmk__apply_coloc_to_priority(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint); G_GNUC_INTERNAL void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__new_colocation(const char *id, const char *node_attr, int score, pe_resource_t *dependent, pe_resource_t *primary, const char *dependent_role, const char *primary_role, bool influence, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__block_colocated_starts(pe_action_t *action, pe_working_set_t *data_set); /*! * \internal * \brief Check whether colocation's left-hand preferences should be considered * * \param[in] colocation Colocation constraint * \param[in] rsc Right-hand instance (normally this will be * colocation->primary, which NULL will be treated as, * but for clones or bundles with multiple instances * this can be a particular instance) * * \return true if colocation influence should be effective, otherwise false */ static inline bool pcmk__colocation_has_influence(const pcmk__colocation_t *colocation, const pe_resource_t *rsc) { if (rsc == NULL) { rsc = colocation->primary; } /* The left hand of a colocation influences the right hand's location * if the influence option is true, or the right hand is not yet active. */ return colocation->influence || (rsc->running_on == NULL); } // Ordering constraints (pcmk_sched_ordering.c) G_GNUC_INTERNAL void pcmk__new_ordering(pe_resource_t *lh_rsc, char *lh_task, pe_action_t *lh_action, pe_resource_t *rh_rsc, char *rh_task, pe_action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__disable_invalid_orderings(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_stops_before_shutdown(pe_node_t *node, pe_action_t *shutdown_op, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_orderings(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_after_each(pe_action_t *after, GList *list); /*! * \internal * \brief Create a new ordering between two resource actions * * \param[in] lh_rsc Resource for 'first' action * \param[in] rh_rsc Resource for 'then' action * \param[in] lh_task Action key for 'first' action * \param[in] rh_task Action key for 'then' action * \param[in] flags Bitmask of enum pe_ordering flags * \param[in] data_set Cluster working set to add ordering to */ #define pcmk__order_resource_actions(lh_rsc, lh_task, rh_rsc, rh_task, \ flags, data_set) \ pcmk__new_ordering((lh_rsc), pcmk__op_key((lh_rsc)->id, (lh_task), 0), \ NULL, \ (rh_rsc), pcmk__op_key((rh_rsc)->id, (rh_task), 0), \ NULL, (flags), (data_set)) #define pcmk__order_starts(rsc1, rsc2, type, data_set) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \ (rsc2), CRMD_ACTION_START, (type), (data_set)) #define pcmk__order_stops(rsc1, rsc2, type, data_set) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \ (rsc2), CRMD_ACTION_STOP, (type), (data_set)) G_GNUC_INTERNAL void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__require_promotion_tickets(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__is_failed_remote_node(pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_remote_connection_actions(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__rsc_corresponds_to_guest(pe_resource_t *rsc, pe_node_t *node); G_GNUC_INTERNAL pe_node_t *pcmk__connection_host_for_action(pe_action_t *action); G_GNUC_INTERNAL void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, pe_action_t *action); // Groups (pcmk_sched_group.c) G_GNUC_INTERNAL GList *pcmk__group_colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *colocated_rscs); // Bundles (pcmk_sched_bundle.c) G_GNUC_INTERNAL void pcmk__output_bundle_actions(pe_resource_t *rsc); // Injections (pcmk_injections.c) G_GNUC_INTERNAL xmlNode *pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid); G_GNUC_INTERNAL xmlNode *pcmk__inject_node_state_change(cib_t *cib_conn, const char *node, bool up); G_GNUC_INTERNAL xmlNode *pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node, const char *resource, const char *lrm_name, const char *rclass, const char *rtype, const char *rprovider); G_GNUC_INTERNAL void pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node, const char *resource, const char *task, guint interval_ms, int rc); G_GNUC_INTERNAL xmlNode *pcmk__inject_action_result(xmlNode *cib_resource, lrmd_event_data_t *op, int target_rc); // Nodes (pcmk_sched_nodes.c) G_GNUC_INTERNAL bool pcmk__node_available(const pe_node_t *node); G_GNUC_INTERNAL bool pcmk__any_node_available(GHashTable *nodes); G_GNUC_INTERNAL GHashTable *pcmk__copy_node_table(GHashTable *nodes); G_GNUC_INTERNAL GList *pcmk__sort_nodes(GList *nodes, pe_node_t *active_node, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_node_health(pe_working_set_t *data_set); G_GNUC_INTERNAL pe_node_t *pcmk__top_allowed_node(const pe_resource_t *rsc, const pe_node_t *node); // Clone notifictions (pcmk_sched_notif.c) G_GNUC_INTERNAL void pcmk__create_notifications(pe_resource_t *rsc, notify_data_t *n_data); G_GNUC_INTERNAL notify_data_t *pcmk__clone_notif_pseudo_ops(pe_resource_t *rsc, const char *task, pe_action_t *action, pe_action_t *complete); G_GNUC_INTERNAL void pcmk__free_notification_data(notify_data_t *n_data); G_GNUC_INTERNAL void pcmk__order_notifs_after_fencing(pe_action_t *action, pe_resource_t *rsc, pe_action_t *stonith_op); // Functions applying to more than one variant (pcmk_sched_resource.c) G_GNUC_INTERNAL void pcmk__set_allocation_methods(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node, const xmlNode *rsc_entry, bool active_on_node); G_GNUC_INTERNAL GList *pcmk__rscs_matching_id(const char *id, pe_working_set_t *data_set); G_GNUC_INTERNAL GList *pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *colocated_rscs); G_GNUC_INTERNAL void pcmk__output_resource_actions(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force); G_GNUC_INTERNAL bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force); G_GNUC_INTERNAL void pcmk__unassign_resource(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node, pe_resource_t **failed); G_GNUC_INTERNAL void pcmk__sort_resources(pe_working_set_t *data_set); G_GNUC_INTERNAL gint pcmk__cmp_instance(gconstpointer a, gconstpointer b); +G_GNUC_INTERNAL +gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b); + // Functions related to probes (pcmk_sched_probes.c) G_GNUC_INTERNAL void pcmk__order_probes(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__schedule_probes(pe_working_set_t *data_set); // Functions related to node utilization (pcmk_sched_utilization.c) G_GNUC_INTERNAL int pcmk__compare_node_capacities(const pe_node_t *node1, const pe_node_t *node2); G_GNUC_INTERNAL void pcmk__consume_node_capacity(GHashTable *current_utilization, pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__release_node_capacity(GHashTable *current_utilization, pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__ban_insufficient_capacity(pe_resource_t *rsc, pe_node_t **prefer, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__create_utilization_constraints(pe_resource_t *rsc, GList *allowed_nodes); G_GNUC_INTERNAL void pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set); #endif // PCMK__LIBPACEMAKER_PRIVATE__H diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c index bcbf25c4d6..d0bd47f9e8 100644 --- a/lib/pacemaker/pcmk_sched_clone.c +++ b/lib/pacemaker/pcmk_sched_clone.c @@ -1,1196 +1,1172 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include "libpacemaker_private.h" #define VARIANT_CLONE 1 #include static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all); -static gint -sort_rsc_id(gconstpointer a, gconstpointer b) -{ - const pe_resource_t *resource1 = (const pe_resource_t *)a; - const pe_resource_t *resource2 = (const pe_resource_t *)b; - long num1, num2; - - CRM_ASSERT(resource1 != NULL); - CRM_ASSERT(resource2 != NULL); - - /* - * Sort clone instances numerically by instance number, so instance :10 - * comes after :9. - */ - num1 = strtol(strrchr(resource1->id, ':') + 1, NULL, 10); - num2 = strtol(strrchr(resource2->id, ':') + 1, NULL, 10); - if (num1 < num2) { - return -1; - } else if (num1 > num2) { - return 1; - } - return 0; -} - static pe_node_t * can_run_instance(pe_resource_t * rsc, pe_node_t * node, int limit) { pe_node_t *local_node = NULL; if (node == NULL && rsc->allowed_nodes) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) { can_run_instance(rsc, local_node, limit); } return NULL; } if (!node) { /* make clang analyzer happy */ goto bail; } else if (!pcmk__node_available(node)) { goto bail; } else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { goto bail; } local_node = pcmk__top_allowed_node(rsc, node); if (local_node == NULL) { crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname); goto bail; } else if (local_node->weight < 0) { common_update_score(rsc, node->details->id, local_node->weight); pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.", rsc->id, node->details->uname); } else if (local_node->count < limit) { pe_rsc_trace(rsc, "%s can run on %s (already running %d)", rsc->id, node->details->uname, local_node->count); return local_node; } else { pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)", rsc->id, node->details->uname, local_node->count, limit); } bail: if (node) { common_update_score(rsc, node->details->id, -INFINITY); } return NULL; } static pe_node_t * allocate_instance(pe_resource_t *rsc, pe_node_t *prefer, gboolean all_coloc, int limit, pe_working_set_t *data_set) { pe_node_t *chosen = NULL; GHashTable *backup = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)", rsc->id, (prefer? prefer->details->uname: "none"), (all_coloc? "all" : "some")); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return rsc->fns->location(rsc, NULL, FALSE); } else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } /* Only include positive colocation preferences of dependent resources * if not every node will get a copy of the clone */ append_parent_colocation(rsc->parent, rsc, all_coloc); if (prefer) { pe_node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (local_prefer == NULL || local_prefer->weight < 0) { pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id, prefer->details->uname); return NULL; } } can_run_instance(rsc, NULL, limit); backup = pcmk__copy_node_table(rsc->allowed_nodes); pe_rsc_trace(rsc, "Allocating instance %s", rsc->id); chosen = rsc->cmds->allocate(rsc, prefer, data_set); if (chosen && prefer && (chosen->details != prefer->details)) { crm_info("Not pre-allocating %s to %s because %s is better", rsc->id, prefer->details->uname, chosen->details->uname); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = backup; pcmk__unassign_resource(rsc); chosen = NULL; backup = NULL; } if (chosen) { pe_node_t *local_node = pcmk__top_allowed_node(rsc, chosen); if (local_node) { local_node->count++; } else if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { /* what to do? we can't enforce per-node limits in this case */ pcmk__config_err("%s not found in %s (list of %d)", chosen->details->id, rsc->parent->id, g_hash_table_size(rsc->parent->allowed_nodes)); } } if(backup) { g_hash_table_destroy(backup); } return chosen; } static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all) { GList *gIter = NULL; gIter = rsc->rsc_cons; for (; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; if (all || cons->score < 0 || cons->score == INFINITY) { child->rsc_cons = g_list_prepend(child->rsc_cons, cons); } } gIter = rsc->rsc_cons_lhs; for (; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(cons, child)) { continue; } if (all || cons->score < 0) { child->rsc_cons_lhs = g_list_prepend(child->rsc_cons_lhs, cons); } } } void distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, int max, int per_host_max, pe_working_set_t * data_set); void distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, int max, int per_host_max, pe_working_set_t * data_set) { int loop_max = 0; int allocated = 0; int available_nodes = 0; bool all_coloc = false; /* count now tracks the number of clones currently allocated */ for(GList *nIter = nodes; nIter != NULL; nIter = nIter->next) { pe_node_t *node = nIter->data; node->count = 0; if (pcmk__node_available(node)) { available_nodes++; } } all_coloc = (max < available_nodes) ? true : false; if(available_nodes) { loop_max = max / available_nodes; } if (loop_max < 1) { loop_max = 1; } pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)", max, rsc->id, available_nodes, per_host_max, loop_max); /* Pre-allocate as many instances as we can to their current location */ for (GList *gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; pe_node_t *child_node = NULL; pe_node_t *local_node = NULL; if ((child->running_on == NULL) || !pcmk_is_set(child->flags, pe_rsc_provisional) || pcmk_is_set(child->flags, pe_rsc_failed)) { continue; } child_node = pe__current_node(child); local_node = pcmk__top_allowed_node(child, child_node); pe_rsc_trace(rsc, "Checking pre-allocation of %s to %s (%d remaining of %d)", child->id, child_node->details->uname, max - allocated, max); if (!pcmk__node_available(child_node) || (child_node->weight < 0)) { pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s", child_node->details->uname, child->id); continue; } if ((local_node != NULL) && (local_node->count >= loop_max)) { pe_rsc_trace(rsc, "Not pre-allocating because %s already allocated " "optimal instances", child_node->details->uname); continue; } if (allocate_instance(child, child_node, all_coloc, per_host_max, data_set)) { pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id, child_node->details->uname); allocated++; } } pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max); for (GList *gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (child->running_on != NULL) { pe_node_t *child_node = pe__current_node(child); pe_node_t *local_node = pcmk__top_allowed_node(child, child_node); if (local_node == NULL) { crm_err("%s is running on %s which isn't allowed", child->id, child_node->details->uname); } } if (!pcmk_is_set(child->flags, pe_rsc_provisional)) { } else if (allocated >= max) { pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max); resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set); } else { if (allocate_instance(child, NULL, all_coloc, per_host_max, data_set)) { allocated++; } } } pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d", allocated, rsc->id, max); } pe_node_t * pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *prefer, pe_working_set_t *data_set) { GList *nodes = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return NULL; } else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__add_promotion_scores(rsc); } pe__set_resource_flags(rsc, pe_rsc_allocating); /* This information is used by pcmk__cmp_instance() when deciding the order * in which to assign clone instances to nodes. */ for (GList *gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; pe_rsc_trace(rsc, "%s: Allocating %s first", rsc->id, constraint->primary->id); constraint->primary->cmds->allocate(constraint->primary, prefer, data_set); } for (GList *gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(constraint, NULL)) { continue; } rsc->allowed_nodes = constraint->dependent->cmds->merge_weights( constraint->dependent, rsc->id, rsc->allowed_nodes, constraint->node_attribute, (float)constraint->score / INFINITY, (pe_weights_rollback | pe_weights_positive)); } pe__show_node_weights(!pcmk_is_set(data_set->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, data_set); nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = pcmk__sort_nodes(nodes, NULL, data_set); rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance); distribute_children(rsc, rsc->children, nodes, clone_data->clone_max, clone_data->clone_node_max, data_set); g_list_free(nodes); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__set_instance_roles(rsc, data_set); } pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating); pe_rsc_trace(rsc, "Done allocating %s", rsc->id); return NULL; } static void clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting, gboolean * active) { GList *gIter = NULL; if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; clone_update_pseudo_status(child, stopping, starting, active); } return; } CRM_ASSERT(active != NULL); CRM_ASSERT(starting != NULL); CRM_ASSERT(stopping != NULL); if (rsc->running_on) { *active = TRUE; } gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (*starting && *stopping) { return; } else if (pcmk_is_set(action->flags, pe_action_optional)) { pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid); continue; } else if (!pcmk_any_flags_set(action->flags, pe_action_pseudo|pe_action_runnable)) { pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid); continue; } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) { pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid); *stopping = TRUE; } else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) { if (!pcmk_is_set(action->flags, pe_action_runnable)) { pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d", action->uuid, pcmk_is_set(action->flags, pe_action_runnable), pcmk_is_set(action->flags, pe_action_pseudo)); } else { pe_rsc_trace(rsc, "Starting due to: %s", action->uuid); pe_rsc_trace(rsc, "%s run=%d, pseudo=%d", action->uuid, pcmk_is_set(action->flags, pe_action_runnable), pcmk_is_set(action->flags, pe_action_pseudo)); *starting = TRUE; } } } } static pe_action_t * find_rsc_action(pe_resource_t *rsc, const char *task) { pe_action_t *match = NULL; GList *actions = pe__resource_actions(rsc, NULL, task, FALSE); for (GList *item = actions; item != NULL; item = item->next) { pe_action_t *op = (pe_action_t *) item->data; if (!pcmk_is_set(op->flags, pe_action_optional)) { if (match != NULL) { // More than one match, don't return any match = NULL; break; } match = op; } } g_list_free(actions); return match; } static void child_ordering_constraints(pe_resource_t * rsc, pe_working_set_t * data_set) { pe_action_t *stop = NULL; pe_action_t *start = NULL; pe_action_t *last_stop = NULL; pe_action_t *last_start = NULL; GList *gIter = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (clone_data->ordered == FALSE) { return; } /* we have to maintain a consistent sorted child list when building order constraints */ - rsc->children = g_list_sort(rsc->children, sort_rsc_id); + rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; stop = find_rsc_action(child, RSC_STOP); if (stop) { if (last_stop) { /* child/child relative stop */ order_actions(stop, last_stop, pe_order_optional); } last_stop = stop; } start = find_rsc_action(child, RSC_START); if (start) { if (last_start) { /* child/child relative start */ order_actions(last_start, start, pe_order_optional); } last_start = start; } } } void clone_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); clone_create_pseudo_actions(rsc, rsc->children, &clone_data->start_notify, &clone_data->stop_notify,data_set); child_ordering_constraints(rsc, data_set); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { create_promotable_actions(rsc, data_set); } } void clone_create_pseudo_actions( pe_resource_t * rsc, GList *children, notify_data_t **start_notify, notify_data_t **stop_notify, pe_working_set_t * data_set) { gboolean child_active = FALSE; gboolean child_starting = FALSE; gboolean child_stopping = FALSE; gboolean allow_dependent_migrations = TRUE; pe_action_t *stop = NULL; pe_action_t *stopped = NULL; pe_action_t *start = NULL; pe_action_t *started = NULL; pe_rsc_trace(rsc, "Creating actions for %s", rsc->id); for (GList *gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; gboolean starting = FALSE; gboolean stopping = FALSE; child_rsc->cmds->create_actions(child_rsc, data_set); clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active); if (stopping && starting) { allow_dependent_migrations = FALSE; } child_stopping |= stopping; child_starting |= starting; } /* start */ start = pcmk__new_rsc_pseudo_action(rsc, RSC_START, !child_starting, true); started = pcmk__new_rsc_pseudo_action(rsc, RSC_STARTED, !child_starting, false); started->priority = INFINITY; if (child_active || child_starting) { pe__set_action_flags(started, pe_action_runnable); } if (start_notify != NULL && *start_notify == NULL) { *start_notify = pcmk__clone_notif_pseudo_ops(rsc, RSC_START, start, started); } /* stop */ stop = pcmk__new_rsc_pseudo_action(rsc, RSC_STOP, !child_stopping, true); stopped = pcmk__new_rsc_pseudo_action(rsc, RSC_STOPPED, !child_stopping, true); stopped->priority = INFINITY; if (allow_dependent_migrations) { pe__set_action_flags(stop, pe_action_migrate_runnable); } if (stop_notify != NULL && *stop_notify == NULL) { *stop_notify = pcmk__clone_notif_pseudo_ops(rsc, RSC_STOP, stop, stopped); if (start_notify && *start_notify && *stop_notify) { order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional); } } } void clone_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set) { pe_resource_t *last_rsc = NULL; GList *gIter; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id); pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set); pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP, pe_order_optional, data_set); pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE, pe_order_runnable_left, data_set); } if (clone_data->ordered) { /* we have to maintain a consistent sorted child list when building order constraints */ - rsc->children = g_list_sort(rsc->children, sort_rsc_id); + rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->internal_constraints(child_rsc, data_set); pcmk__order_starts(rsc, child_rsc, pe_order_runnable_left|pe_order_implies_first_printed, data_set); pcmk__order_resource_actions(child_rsc, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); if (clone_data->ordered && last_rsc) { pcmk__order_starts(last_rsc, child_rsc, pe_order_optional, data_set); } pcmk__order_stops(rsc, child_rsc, pe_order_implies_first_printed, data_set); pcmk__order_resource_actions(child_rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); if (clone_data->ordered && last_rsc) { pcmk__order_stops(child_rsc, last_rsc, pe_order_optional, data_set); } last_rsc = child_rsc; } if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { promotable_constraints(rsc, data_set); } } gboolean is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current) { pe_node_t *node = NULL; enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); CRM_CHECK(child_rsc && local_node, return FALSE); if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { /* We only want instances that haven't failed */ node = child_rsc->fns->location(child_rsc, NULL, current); } if (filter != RSC_ROLE_UNKNOWN && next_role != filter) { crm_trace("Filtered %s", child_rsc->id); return FALSE; } if (node && (node->details == local_node->details)) { return TRUE; } else if (node) { crm_trace("%s - %s vs %s", child_rsc->id, node->details->uname, local_node->details->uname); } else { crm_trace("%s - not allocated %d", child_rsc->id, current); } return FALSE; } pe_resource_t * find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set) { pe_resource_t *pair = NULL; GList *gIter = NULL; GList *scratch = NULL; pe_node_t *local_node = NULL; local_node = local_child->fns->location(local_child, NULL, current); if (local_node) { return find_compatible_child_by_node(local_child, local_node, rsc, filter, current); } scratch = g_hash_table_get_values(local_child->allowed_nodes); scratch = pcmk__sort_nodes(scratch, NULL, data_set); gIter = scratch; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pair = find_compatible_child_by_node(local_child, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id); done: g_list_free(scratch); return pair; } void clone_rsc_colocation_lh(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, pe_working_set_t *data_set) { /* -- Never called -- * * Instead we add the colocation constraints to the child and call from there */ CRM_ASSERT(FALSE); } void clone_rsc_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, pe_working_set_t *data_set) { GList *gIter = NULL; gboolean do_interleave = FALSE; const char *interleave_s = NULL; CRM_CHECK(constraint != NULL, return); CRM_CHECK(dependent != NULL, pe_err("dependent was NULL for %s", constraint->id); return); CRM_CHECK(primary != NULL, pe_err("primary was NULL for %s", constraint->id); return); CRM_CHECK(dependent->variant == pe_native, return); pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d", constraint->id, dependent->id, primary->id, constraint->score); if (pcmk_is_set(primary->flags, pe_rsc_promotable)) { if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { pe_rsc_trace(primary, "%s is still provisional", primary->id); return; } else if (constraint->primary_role == RSC_ROLE_UNKNOWN) { pe_rsc_trace(primary, "Handling %s as a clone colocation", constraint->id); } else { promotable_colocation_rh(dependent, primary, constraint, data_set); return; } } /* only the LHS side needs to be labeled as interleave */ interleave_s = g_hash_table_lookup(constraint->dependent->meta, XML_RSC_ATTR_INTERLEAVE); if (crm_is_true(interleave_s) && (constraint->dependent->variant > pe_group)) { /* @TODO Do we actually care about multiple primary copies sharing a * dependent copy anymore? */ if (copies_per_node(constraint->dependent) != copies_per_node(constraint->primary)) { pcmk__config_err("Cannot interleave %s and %s because they do not " "support the same number of instances per node", constraint->dependent->id, constraint->primary->id); } else { do_interleave = TRUE; } } if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { pe_rsc_trace(primary, "%s is still provisional", primary->id); return; } else if (do_interleave) { pe_resource_t *primary_instance = NULL; primary_instance = find_compatible_child(dependent, primary, RSC_ROLE_UNKNOWN, FALSE, data_set); if (primary_instance != NULL) { pe_rsc_debug(primary, "Pairing %s with %s", dependent->id, primary_instance->id); dependent->cmds->rsc_colocation_lh(dependent, primary_instance, constraint, data_set); } else if (constraint->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", dependent->id, primary->id); pcmk__assign_resource(dependent, NULL, true); } else { pe_rsc_debug(primary, "Cannot pair %s with instance of %s", dependent->id, primary->id); } return; } else if (constraint->score >= INFINITY) { GList *affected_nodes = NULL; gIter = primary->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { pe_rsc_trace(primary, "Allowing %s: %s %d", constraint->id, chosen->details->uname, chosen->weight); affected_nodes = g_list_prepend(affected_nodes, chosen); } } node_list_exclude(dependent->allowed_nodes, affected_nodes, FALSE); g_list_free(affected_nodes); return; } gIter = primary->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_colocation_rh(dependent, child_rsc, constraint, data_set); } } enum action_tasks clone_child_action(pe_action_t * action) { enum action_tasks result = no_action; pe_resource_t *child = (pe_resource_t *) action->rsc->children->data; if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) { /* Find the action we're notifying about instead */ int stop = 0; char *key = action->uuid; int lpc = strlen(key); for (; lpc > 0; lpc--) { if (key[lpc] == '_' && stop == 0) { stop = lpc; } else if (key[lpc] == '_') { char *task_mutable = NULL; lpc++; task_mutable = strdup(key + lpc); task_mutable[stop - lpc] = 0; crm_trace("Extracted action '%s' from '%s'", task_mutable, key); result = get_complex_task(child, task_mutable, TRUE); free(task_mutable); break; } } } else { result = get_complex_task(child, action->task, TRUE); } return result; } #define pe__clear_action_summary_flags(flags, action, flag) do { \ flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Action summary", action->rsc->id, \ flags, flag, #flag); \ } while (0) enum pe_action_flags summary_action_flags(pe_action_t * action, GList *children, pe_node_t * node) { GList *gIter = NULL; gboolean any_runnable = FALSE; gboolean check_runnable = TRUE; enum action_tasks task = clone_child_action(action); enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); const char *task_s = task2text(task); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_action_t *child_action = NULL; pe_resource_t *child = (pe_resource_t *) gIter->data; child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node); pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id, node ? node->details->uname : "none", child_action?child_action->uuid:"NA"); if (child_action) { enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); if (pcmk_is_set(flags, pe_action_optional) && !pcmk_is_set(child_flags, pe_action_optional)) { pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid, child_action->uuid); pe__clear_action_summary_flags(flags, action, pe_action_optional); pe__clear_action_flags(action, pe_action_optional); } if (pcmk_is_set(child_flags, pe_action_runnable)) { any_runnable = TRUE; } } } if (check_runnable && any_runnable == FALSE) { pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid); pe__clear_action_summary_flags(flags, action, pe_action_runnable); if (node == NULL) { pe__clear_action_flags(action, pe_action_runnable); } } return flags; } enum pe_action_flags clone_action_flags(pe_action_t * action, pe_node_t * node) { return summary_action_flags(action, action->rsc->children, node); } void clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { GList *gIter = rsc->children; pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id); pcmk__apply_location(constraint, rsc); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_location(child_rsc, constraint); } } void clone_expand(pe_resource_t * rsc, pe_working_set_t * data_set) { GList *gIter = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); g_list_foreach(rsc->actions, (GFunc) rsc->cmds->action_flags, NULL); pcmk__create_notifications(rsc, clone_data->start_notify); pcmk__create_notifications(rsc, clone_data->stop_notify); pcmk__create_notifications(rsc, clone_data->promote_notify); pcmk__create_notifications(rsc, clone_data->demote_notify); /* Now that the notifcations have been created we can expand the children */ gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } native_expand(rsc, data_set); /* The notifications are in the graph now, we can destroy the notify_data */ pcmk__free_notification_data(clone_data->demote_notify); clone_data->demote_notify = NULL; pcmk__free_notification_data(clone_data->stop_notify); clone_data->stop_notify = NULL; pcmk__free_notification_data(clone_data->start_notify); clone_data->start_notify = NULL; pcmk__free_notification_data(clone_data->promote_notify); clone_data->promote_notify = NULL; } // Check whether a resource or any of its children is known on node static bool rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node) { if (rsc->children) { for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; if (rsc_known_on(child, node)) { return TRUE; } } } else if (rsc->known_on) { GHashTableIter iter; pe_node_t *known_node = NULL; g_hash_table_iter_init(&iter, rsc->known_on); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) { if (node->details == known_node->details) { return TRUE; } } } return FALSE; } // Look for an instance of clone that is known on node static pe_resource_t * find_instance_on(const pe_resource_t *clone, const pe_node_t *node) { for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (rsc_known_on(child, node)) { return child; } } return NULL; } // For unique clones, probe each instance separately static gboolean probe_unique_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set) { gboolean any_created = FALSE; for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; any_created |= child->cmds->create_probe(child, node, complete, force, data_set); } return any_created; } // For anonymous clones, only a single instance needs to be probed static gboolean probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set) { // First, check if we probed an instance on this node last time pe_resource_t *child = find_instance_on(rsc, node); // Otherwise, check if we plan to start an instance on this node if (child == NULL) { for (GList *child_iter = rsc->children; child_iter && !child; child_iter = child_iter->next) { pe_node_t *local_node = NULL; pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data; if (child_rsc) { /* make clang analyzer happy */ local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); if (local_node && (local_node->details == node->details)) { child = child_rsc; } } } } // Otherwise, use the first clone instance if (child == NULL) { child = rsc->children->data; } CRM_ASSERT(child); return child->cmds->create_probe(child, node, complete, force, data_set); } gboolean clone_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete, gboolean force, pe_working_set_t * data_set) { gboolean any_created = FALSE; CRM_ASSERT(rsc); - rsc->children = g_list_sort(rsc->children, sort_rsc_id); + rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); if (rsc->children == NULL) { pe_warn("Clone %s has no children", rsc->id); return FALSE; } if (rsc->exclusive_discover) { pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on * * remove the node from allowed_nodes so that the * notification contains only nodes that we might ever run * on */ g_hash_table_remove(rsc->allowed_nodes, node->details->id); /* Bit of a shortcut - might as well take it */ return FALSE; } } if (pcmk_is_set(rsc->flags, pe_rsc_unique)) { any_created = probe_unique_clone(rsc, node, complete, force, data_set); } else { any_created = probe_anonymous_clone(rsc, node, complete, force, data_set); } return any_created; } void clone_append_meta(pe_resource_t * rsc, xmlNode * xml) { char *name = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); name = crm_meta_name(XML_RSC_ATTR_UNIQUE); crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_unique)); free(name); name = crm_meta_name(XML_RSC_ATTR_NOTIFY); crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_notify)); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX); crm_xml_add_int(xml, name, clone_data->clone_max); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX); crm_xml_add_int(xml, name, clone_data->clone_node_max); free(name); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX); crm_xml_add_int(xml, name, clone_data->promoted_max); free(name); name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX); crm_xml_add_int(xml, name, clone_data->promoted_node_max); free(name); /* @COMPAT Maintain backward compatibility with resource agents that * expect the old names (deprecated since 2.0.0). */ name = crm_meta_name(PCMK_XE_PROMOTED_MAX_LEGACY); crm_xml_add_int(xml, name, clone_data->promoted_max); free(name); name = crm_meta_name(PCMK_XE_PROMOTED_NODE_MAX_LEGACY); crm_xml_add_int(xml, name, clone_data->promoted_node_max); free(name); } } // Clone implementation of resource_alloc_functions_t:add_utilization() void pcmk__clone_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { bool existing = false; pe_resource_t *child = NULL; if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } // Look for any child already existing in the list for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { child = (pe_resource_t *) iter->data; if (g_list_find(all_rscs, child)) { existing = true; // Keep checking remaining children } else { // If this is a clone of a group, look for group's members for (GList *member_iter = child->children; member_iter != NULL; member_iter = member_iter->next) { pe_resource_t *member = (pe_resource_t *) member_iter->data; if (g_list_find(all_rscs, member) != NULL) { // Add *child's* utilization, not group member's child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization); existing = true; break; } } } } if (!existing && (rsc->children != NULL)) { // If nothing was found, still add first child's utilization child = (pe_resource_t *) rsc->children->data; child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization); } } // Clone implementation of resource_alloc_functions_t:shutdown_lock() void pcmk__clone_shutdown_lock(pe_resource_t *rsc) { return; // Clones currently don't support shutdown locks } diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c index 5f2a506f23..3b07f007ed 100644 --- a/lib/pacemaker/pcmk_sched_resource.c +++ b/lib/pacemaker/pcmk_sched_resource.c @@ -1,1097 +1,1135 @@ /* * Copyright 2014-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include + +#include +#include #include #include #include "libpacemaker_private.h" // Resource allocation methods that vary by resource variant static resource_alloc_functions_t allocation_methods[] = { { pcmk__native_merge_weights, pcmk__native_allocate, native_create_actions, native_create_probe, native_internal_constraints, native_rsc_colocation_lh, native_rsc_colocation_rh, pcmk__colocated_resources, native_rsc_location, native_action_flags, native_update_actions, pcmk__output_resource_actions, native_expand, native_append_meta, pcmk__primitive_add_utilization, pcmk__primitive_shutdown_lock, }, { pcmk__group_merge_weights, pcmk__group_allocate, group_create_actions, native_create_probe, group_internal_constraints, group_rsc_colocation_lh, group_rsc_colocation_rh, pcmk__group_colocated_resources, group_rsc_location, group_action_flags, group_update_actions, pcmk__output_resource_actions, group_expand, group_append_meta, pcmk__group_add_utilization, pcmk__group_shutdown_lock, }, { pcmk__native_merge_weights, pcmk__clone_allocate, clone_create_actions, clone_create_probe, clone_internal_constraints, clone_rsc_colocation_lh, clone_rsc_colocation_rh, pcmk__colocated_resources, clone_rsc_location, clone_action_flags, pcmk__multi_update_actions, pcmk__output_resource_actions, clone_expand, clone_append_meta, pcmk__clone_add_utilization, pcmk__clone_shutdown_lock, }, { pcmk__native_merge_weights, pcmk__bundle_allocate, pcmk__bundle_create_actions, pcmk__bundle_create_probe, pcmk__bundle_internal_constraints, pcmk__bundle_rsc_colocation_lh, pcmk__bundle_rsc_colocation_rh, pcmk__colocated_resources, pcmk__bundle_rsc_location, pcmk__bundle_action_flags, pcmk__multi_update_actions, pcmk__output_bundle_actions, pcmk__bundle_expand, pcmk__bundle_append_meta, pcmk__bundle_add_utilization, pcmk__bundle_shutdown_lock, } }; /*! * \internal * \brief Check whether a resource's agent standard, provider, or type changed * * \param[in] rsc Resource to check * \param[in] node Node needing unfencing/restart if agent changed * \param[in] rsc_entry XML with previously known agent information * \param[in] active_on_node Whether \p rsc is active on \p node * * \return true if agent for \p rsc changed, otherwise false */ bool pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node, const xmlNode *rsc_entry, bool active_on_node) { bool changed = false; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; for (int i = 0; i < PCMK__NELEM(attr_list); i++) { const char *value = crm_element_value(rsc->xml, attr_list[i]); const char *old_value = crm_element_value(rsc_entry, attr_list[i]); if (!pcmk__str_eq(value, old_value, pcmk__str_none)) { changed = true; trigger_unfencing(rsc, node, "Device definition changed", NULL, rsc->cluster); if (active_on_node) { crm_notice("Forcing restart of %s on %s " "because %s changed from '%s' to '%s'", rsc->id, node->details->uname, attr_list[i], pcmk__s(old_value, ""), pcmk__s(value, "")); } } } if (changed && active_on_node) { // Make sure the resource is restarted custom_action(rsc, stop_key(rsc), CRMD_ACTION_STOP, node, FALSE, TRUE, rsc->cluster); pe__set_resource_flags(rsc, pe_rsc_start_pending); } return changed; } /*! * \internal * \brief Add resource (and any matching children) to list if it matches ID * * \param[in] result List to add resource to * \param[in] rsc Resource to check * \param[in] id ID to match * * \return (Possibly new) head of list */ static GList * add_rsc_if_matching(GList *result, pe_resource_t *rsc, const char *id) { if ((strcmp(rsc->id, id) == 0) || ((rsc->clone_name != NULL) && (strcmp(rsc->clone_name, id) == 0))) { result = g_list_prepend(result, rsc); } for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *child = (pe_resource_t *) iter->data; result = add_rsc_if_matching(result, child, id); } return result; } /*! * \internal * \brief Find all resources matching a given ID by either ID or clone name * * \param[in] id Resource ID to check * \param[in] data_set Cluster working set * * \return List of all resources that match \p id * \note The caller is responsible for freeing the return value with * g_list_free(). */ GList * pcmk__rscs_matching_id(const char *id, pe_working_set_t *data_set) { GList *result = NULL; CRM_CHECK((id != NULL) && (data_set != NULL), return NULL); for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { result = add_rsc_if_matching(result, (pe_resource_t *) iter->data, id); } return result; } /*! * \internal * \brief Set the variant-appropriate allocation methods for a resource * * \param[in] rsc Resource to set allocation methods for * \param[in] ignored Only here so function can be used with g_list_foreach() */ static void set_allocation_methods_for_rsc(pe_resource_t *rsc, void *ignored) { rsc->cmds = &allocation_methods[rsc->variant]; g_list_foreach(rsc->children, (GFunc) set_allocation_methods_for_rsc, NULL); } /*! * \internal * \brief Set the variant-appropriate allocation methods for all resources * * \param[in] data_set Cluster working set */ void pcmk__set_allocation_methods(pe_working_set_t *data_set) { g_list_foreach(data_set->resources, (GFunc) set_allocation_methods_for_rsc, NULL); } // Shared implementation of resource_alloc_functions_t:colocated_resources() GList * pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *colocated_rscs) { GList *gIter = NULL; if (orig_rsc == NULL) { orig_rsc = rsc; } if ((rsc == NULL) || (g_list_find(colocated_rscs, rsc) != NULL)) { return colocated_rscs; } pe_rsc_trace(orig_rsc, "%s is in colocation chain with %s", rsc->id, orig_rsc->id); colocated_rscs = g_list_append(colocated_rscs, rsc); // Follow colocations where this resource is the dependent resource for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; pe_resource_t *primary = constraint->primary; if (primary == orig_rsc) { continue; // Break colocation loop } if ((constraint->score == INFINITY) && (pcmk__colocation_affects(rsc, primary, constraint, true) == pcmk__coloc_affects_location)) { colocated_rscs = primary->cmds->colocated_resources(primary, orig_rsc, colocated_rscs); } } // Follow colocations where this resource is the primary resource for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; pe_resource_t *dependent = constraint->dependent; if (dependent == orig_rsc) { continue; // Break colocation loop } if (pe_rsc_is_clone(rsc) && !pe_rsc_is_clone(dependent)) { continue; // We can't be sure whether dependent will be colocated } if ((constraint->score == INFINITY) && (pcmk__colocation_affects(dependent, rsc, constraint, true) == pcmk__coloc_affects_location)) { colocated_rscs = dependent->cmds->colocated_resources(dependent, orig_rsc, colocated_rscs); } } return colocated_rscs; } void pcmk__output_resource_actions(pe_resource_t *rsc) { pcmk__output_t *out = rsc->cluster->priv; pe_node_t *next = NULL; pe_node_t *current = NULL; if (rsc->children != NULL) { for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *child = (pe_resource_t *) iter->data; child->cmds->output_actions(child); } return; } next = rsc->allocated_to; if (rsc->running_on) { current = pe__current_node(rsc); if (rsc->role == RSC_ROLE_STOPPED) { /* * This can occur when resources are being recovered * We fiddle with the current role in native_create_actions() */ rsc->role = RSC_ROLE_STARTED; } } if ((current == NULL) && pcmk_is_set(rsc->flags, pe_rsc_orphan)) { /* Don't log stopped orphans */ return; } out->message(out, "rsc-action", rsc, current, next); } /*! * \internal * \brief Assign a specified primitive resource to a node * * Assign a specified primitive resource to a specified node, if the node can * run the resource (or unconditionally, if \p force is true). Mark the resource * as no longer provisional. If the primitive can't be assigned (or \p chosen is * NULL), unassign any previous assignment for it, set its next role to stopped, * and update any existing actions scheduled for it. This is not done * recursively for children, so it should be called only for primitives. * * \param[in] rsc Resource to assign * \param[in] chosen Node to assign \p rsc to * \param[in] force If true, assign to \p chosen even if unavailable * * \return true if \p rsc could be assigned, otherwise false * * \note Assigning a resource to the NULL node using this function is different * from calling pcmk__unassign_resource(), in that it will also update any * actions created for the resource. */ bool pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force) { pcmk__output_t *out = rsc->cluster->priv; CRM_ASSERT(rsc->variant == pe_native); if (!force && (chosen != NULL)) { if ((chosen->weight < 0) // Allow the graph to assume that guest node connections will come up || (!pcmk__node_available(chosen) && !pe__is_guest_node(chosen))) { crm_debug("All nodes for resource %s are unavailable, unclean or " "shutting down (%s can%s run resources, with weight %d)", rsc->id, chosen->details->uname, (pcmk__node_available(chosen)? "" : "not"), chosen->weight); pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability"); chosen = NULL; } } pcmk__unassign_resource(rsc); pe__clear_resource_flags(rsc, pe_rsc_provisional); if (chosen == NULL) { crm_debug("Could not allocate a node for %s", rsc->id); pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to allocate"); for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) { pe_action_t *op = (pe_action_t *) iter->data; crm_debug("Updating %s for allocation failure", op->uuid); if (pcmk__str_eq(op->task, RSC_STOP, pcmk__str_casei)) { pe__clear_action_flags(op, pe_action_optional); } else if (pcmk__str_eq(op->task, RSC_START, pcmk__str_casei)) { pe__clear_action_flags(op, pe_action_runnable); //pe__set_resource_flags(rsc, pe_rsc_block); } else { // Cancel recurring actions, unless for stopped state const char *interval_ms_s = NULL; const char *target_rc_s = NULL; char *rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING); interval_ms_s = g_hash_table_lookup(op->meta, XML_LRM_ATTR_INTERVAL_MS); target_rc_s = g_hash_table_lookup(op->meta, XML_ATTR_TE_TARGET_RC); if ((interval_ms_s != NULL) && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_none) && !pcmk__str_eq(rc_stopped, target_rc_s, pcmk__str_none)) { pe__clear_action_flags(op, pe_action_runnable); } free(rc_stopped); } } return false; } crm_debug("Assigning %s to %s", rsc->id, chosen->details->uname); rsc->allocated_to = pe__copy_node(chosen); chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc, rsc); chosen->details->num_resources++; chosen->count++; pcmk__consume_node_capacity(chosen->details->utilization, rsc); if (pcmk_is_set(rsc->cluster->flags, pe_flag_show_utilization)) { out->message(out, "resource-util", rsc, chosen, __func__); } return true; } /*! * \internal * \brief Assign a specified resource (of any variant) to a node * * Assign a specified resource and its children (if any) to a specified node, if * the node can run the resource (or unconditionally, if \p force is true). Mark * the resources as no longer provisional. If the resources can't be assigned * (or \p chosen is NULL), unassign any previous assignments, set next role to * stopped, and update any existing actions scheduled for them. * * \param[in] rsc Resource to assign * \param[in] chosen Node to assign \p rsc to * \param[in] force If true, assign to \p chosen even if unavailable * * \return true if \p rsc could be assigned, otherwise false * * \note Assigning a resource to the NULL node using this function is different * from calling pcmk__unassign_resource(), in that it will also update any * actions created for the resource. */ bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force) { bool changed = false; if (rsc->children == NULL) { if (rsc->allocated_to != NULL) { changed = true; } pcmk__assign_primitive(rsc, node, force); } else { for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *child_rsc = (pe_resource_t *) iter->data; changed |= pcmk__assign_resource(child_rsc, node, force); } } return changed; } /*! * \internal * \brief Remove any assignment of a specified resource to a node * * If a specified resource has been assigned to a node, remove that assignment * and mark the resource as provisional again. This is not done recursively for * children, so it should be called only for primitives. * * \param[in] rsc Resource to unassign */ void pcmk__unassign_resource(pe_resource_t *rsc) { pe_node_t *old = rsc->allocated_to; if (old == NULL) { return; } crm_info("Unassigning %s from %s", rsc->id, old->details->uname); pe__set_resource_flags(rsc, pe_rsc_provisional); rsc->allocated_to = NULL; /* We're going to free the pe_node_t, but its details member is shared and * will remain, so update that appropriately first. */ old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, rsc); old->details->num_resources--; pcmk__release_node_capacity(old->details->utilization, rsc); free(old); } /*! * \internal * \brief Check whether a resource has reached its migration threshold on a node * * \param[in] rsc Resource to check * \param[in] node Node to check * \param[out] failed If the threshold has been reached, this will be set to * the resource that failed (possibly a parent of \p rsc) * * \return true if the migration threshold has been reached, false otherwise */ bool pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node, pe_resource_t **failed) { int fail_count, remaining_tries; pe_resource_t *rsc_to_ban = rsc; // Migration threshold of 0 means never force away if (rsc->migration_threshold == 0) { return false; } // If we're ignoring failures, also ignore the migration threshold if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) { return false; } // If there are no failures, there's no need to force away fail_count = pe_get_failcount(node, rsc, NULL, pe_fc_effective|pe_fc_fillers, NULL, rsc->cluster); if (fail_count <= 0) { return false; } // If failed resource is anonymous clone instance, we'll force clone away if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { rsc_to_ban = uber_parent(rsc); } // How many more times recovery will be tried on this node remaining_tries = rsc->migration_threshold - fail_count; if (remaining_tries <= 0) { crm_warn("%s cannot run on %s due to reaching migration threshold " "(clean up resource to allow again)" CRM_XS " failures=%d migration-threshold=%d", rsc_to_ban->id, node->details->uname, fail_count, rsc->migration_threshold); if (failed != NULL) { *failed = rsc_to_ban; } return true; } crm_info("%s can fail %d more time%s on " "%s before reaching migration threshold (%d)", rsc_to_ban->id, remaining_tries, pcmk__plural_s(remaining_tries), node->details->uname, rsc->migration_threshold); return false; } static void * convert_const_pointer(const void *ptr) { /* Worst function ever */ return (void *)ptr; } /*! * \internal * \brief Get a node's weight * * \param[in] node Unweighted node to check (for node ID) * \param[in] nodes List of weighted nodes to look for \p node in * * \return Node's weight, or -INFINITY if not found */ static int get_node_weight(pe_node_t *node, GHashTable *nodes) { pe_node_t *weighted_node = NULL; if ((node != NULL) && (nodes != NULL)) { weighted_node = g_hash_table_lookup(nodes, node->details->id); } return (weighted_node == NULL)? -INFINITY : weighted_node->weight; } /*! * \internal * \brief Compare two resources according to which should be allocated first * * \param[in] a First resource to compare * \param[in] b Second resource to compare * \param[in] data Sorted list of all nodes in cluster * * \return -1 if \p a should be allocated before \b, 0 if they are equal, * or +1 if \p a should be allocated after \b */ static gint cmp_resources(gconstpointer a, gconstpointer b, gpointer data) { const pe_resource_t *resource1 = a; const pe_resource_t *resource2 = b; GList *nodes = (GList *) data; int rc = 0; int r1_weight = -INFINITY; int r2_weight = -INFINITY; pe_node_t *r1_node = NULL; pe_node_t *r2_node = NULL; GHashTable *r1_nodes = NULL; GHashTable *r2_nodes = NULL; const char *reason = NULL; // Resources with highest priority should be allocated first reason = "priority"; r1_weight = resource1->priority; r2_weight = resource2->priority; if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } // We need nodes to make any other useful comparisons reason = "no node list"; if (nodes == NULL) { goto done; } // Calculate and log node weights r1_nodes = pcmk__native_merge_weights(convert_const_pointer(resource1), resource1->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); r2_nodes = pcmk__native_merge_weights(convert_const_pointer(resource2), resource2->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); pe__show_node_weights(true, NULL, resource1->id, r1_nodes, resource1->cluster); pe__show_node_weights(true, NULL, resource2->id, r2_nodes, resource2->cluster); // The resource with highest score on its current node goes first reason = "current location"; if (resource1->running_on != NULL) { r1_node = pe__current_node(resource1); } if (resource2->running_on != NULL) { r2_node = pe__current_node(resource2); } r1_weight = get_node_weight(r1_node, r1_nodes); r2_weight = get_node_weight(r2_node, r2_nodes); if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } // Otherwise a higher weight on any node will do reason = "score"; for (GList *iter = nodes; iter != NULL; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; r1_weight = get_node_weight(node, r1_nodes); r2_weight = get_node_weight(node, r2_nodes); if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } } done: crm_trace("%s (%d)%s%s %c %s (%d)%s%s: %s", resource1->id, r1_weight, ((r1_node == NULL)? "" : " on "), ((r1_node == NULL)? "" : r1_node->details->id), ((rc < 0)? '>' : ((rc > 0)? '<' : '=')), resource2->id, r2_weight, ((r2_node == NULL)? "" : " on "), ((r2_node == NULL)? "" : r2_node->details->id), reason); if (r1_nodes != NULL) { g_hash_table_destroy(r1_nodes); } if (r2_nodes != NULL) { g_hash_table_destroy(r2_nodes); } return rc; } /*! * \internal * \brief Sort resources in the order they should be allocated to nodes * * \param[in] data_set Cluster working set */ void pcmk__sort_resources(pe_working_set_t *data_set) { GList *nodes = g_list_copy(data_set->nodes); nodes = pcmk__sort_nodes(nodes, NULL, data_set); data_set->resources = g_list_sort_with_data(data_set->resources, cmp_resources, nodes); g_list_free(nodes); } /*! * \internal * \brief Create a hash table with a single node in it * * \param[in] node Node to copy into new table * * \return Newly created hash table containing a copy of \p node * \note The caller is responsible for freeing the result with * g_hash_table_destroy(). */ static GHashTable * new_node_table(pe_node_t *node) { GHashTable *table = pcmk__strkey_table(NULL, free); node = pe__copy_node(node); g_hash_table_insert(table, (gpointer) node->details->id, node); return table; } /*! * \internal * \brief Apply a resource's parent's colocation scores to a node table * * \param[in] rsc Resource whose colocations should be applied * \param[in,out] nodes Node table to apply colocations to */ static void apply_parent_colocations(const pe_resource_t *rsc, GHashTable **nodes) { GList *iter = NULL; pcmk__colocation_t *colocation = NULL; for (iter = rsc->parent->rsc_cons; iter != NULL; iter = iter->next) { colocation = (pcmk__colocation_t *) iter->data; *nodes = pcmk__native_merge_weights(colocation->primary, rsc->id, *nodes, colocation->node_attribute, colocation->score / (float) INFINITY, 0); } for (iter = rsc->parent->rsc_cons_lhs; iter != NULL; iter = iter->next) { colocation = (pcmk__colocation_t *) iter->data; if (!pcmk__colocation_has_influence(colocation, rsc)) { continue; } *nodes = pcmk__native_merge_weights(colocation->dependent, rsc->id, *nodes, colocation->node_attribute, colocation->score / (float) INFINITY, pe_weights_positive); } } /*! * \internal * \brief Compare clone or bundle instances based on colocation scores * * Determine the relative order in which two clone or bundle instances should be * assigned to nodes, considering the scores of colocation constraints directly * or indirectly involving them. * * \param[in] instance1 First instance to compare * \param[in] instance2 Second instance to compare * * \return A negative number if \p instance1 should be assigned first, * a positive number if \p instance2 should be assigned first, * or 0 if assignment order doesn't matter */ static int cmp_instance_by_colocation(const pe_resource_t *instance1, const pe_resource_t *instance2) { int rc = 0; pe_node_t *node1 = NULL; pe_node_t *node2 = NULL; pe_node_t *current_node1 = pe__current_node(instance1); pe_node_t *current_node2 = pe__current_node(instance2); GList *list1 = NULL; GList *list2 = NULL; GList *iter = NULL; GList *iter2 = NULL; GHashTable *colocated_scores1 = NULL; GHashTable *colocated_scores2 = NULL; CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL) && (instance2 != NULL) && (instance2->parent != NULL) && (current_node1 != NULL) && (current_node2 != NULL)); // Create node tables initialized with each node colocated_scores1 = new_node_table(current_node1); colocated_scores2 = new_node_table(current_node2); // Apply parental colocations apply_parent_colocations(instance1, &colocated_scores1); apply_parent_colocations(instance2, &colocated_scores2); // Find original nodes again, with scores updated for colocations node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id); node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id); // Compare nodes by updated scores if (node1->weight < node2->weight) { if (node1->weight < 0) { crm_trace("%s (%d on %s) > %s (%d on %s)", instance1->id, node1->weight, node1->details->uname, instance2->id, node2->weight, node2->details->uname); rc = -1; } else { crm_trace("%s (%d on %s) < %s (%d on %s)", instance1->id, node1->weight, node1->details->uname, instance2->id, node2->weight, node2->details->uname); rc = 1; } goto out; } else if (node1->weight > node2->weight) { crm_trace("%s (%d on %s) > %s (%d on %s)", instance1->id, node1->weight, node1->details->uname, instance2->id, node2->weight, node2->details->uname); rc = -1; goto out; } /* All location scores */ list1 = pcmk__sort_nodes(g_hash_table_get_values(colocated_scores1), current_node1, instance1->cluster); list2 = pcmk__sort_nodes(g_hash_table_get_values(colocated_scores2), current_node2, instance1->cluster); for (iter = list1, iter2 = list2; (iter != NULL) && (iter2 != NULL); iter = iter->next, iter2 = iter2->next) { node1 = (pe_node_t *) iter->data; node2 = (pe_node_t *) iter2->data; if (node1 == NULL) { crm_trace("%s < %s: colocated score NULL", instance1->id, instance2->id); rc = 1; break; } else if (node2 == NULL) { crm_trace("%s > %s: colocated score NULL", instance1->id, instance2->id); rc = -1; break; } if (node1->weight < node2->weight) { crm_trace("%s < %s: colocated score", instance1->id, instance2->id); rc = 1; break; } else if (node1->weight > node2->weight) { crm_trace("%s > %s: colocated score", instance1->id, instance2->id); rc = -1; break; } } out: g_hash_table_destroy(colocated_scores1); g_hash_table_destroy(colocated_scores2); g_list_free(list1); g_list_free(list2); return rc; } /*! * \internal * \brief Check whether a resource or any of its children are failed * * \param[in] rsc Resource to check * * \return true if \p rsc or any of its children are failed, otherwise false */ static bool did_fail(const pe_resource_t * rsc) { if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { return true; } for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { if (did_fail((pe_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether a node is allowed to run a resource * * \param[in] rsc Resource to check * \param[in,out] node Node to check (will be set NULL if not allowed) * * \return true if *node is either NULL or allowed for \p rsc, otherwise false */ static bool node_is_allowed(const pe_resource_t *rsc, pe_node_t **node) { if (*node != NULL) { pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes, (*node)->details->id); if ((allowed == NULL) || (allowed->weight < 0)) { pe_rsc_trace(rsc, "%s: current location (%s) is unavailable", rsc->id, (*node)->details->uname); return false; } } return true; } +/*! + * \internal + * \brief Compare two clone or bundle instances' instance numbers + * + * \param[in] a First instance to compare + * \param[in] b Second instance to compare + * + * \return A negative number if \p a's instance number is lower, + * a positive number if \p b's instance number is lower, + * or 0 if their instance numbers are the same + */ +gint +pcmk__cmp_instance_number(gconstpointer a, gconstpointer b) +{ + const pe_resource_t *instance1 = (const pe_resource_t *) a; + const pe_resource_t *instance2 = (const pe_resource_t *) b; + char *div1 = NULL; + char *div2 = NULL; + + CRM_ASSERT((instance1 != NULL) && (instance2 != NULL)); + + // Clone numbers are after a colon, bundle numbers after a dash + div1 = strrchr(instance1->id, ':'); + if (div1 == NULL) { + div1 = strrchr(instance1->id, '-'); + } + div2 = strrchr(instance2->id, ':'); + if (div2 == NULL) { + div2 = strrchr(instance2->id, '-'); + } + CRM_ASSERT((div1 != NULL) && (div2 != NULL)); + + return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10)); +} + /*! * \internal * \brief Compare clone or bundle instances according to assignment order * * Compare two clone or bundle instances according to the order they should be * assigned to nodes, preferring (in order): * * - Active instance that is less multiply active * - Instance that is not active on a disallowed node * - Instance with higher configured priority * - Active instance whose current node can run resources * - Active instance whose parent is allowed on current node * - Active instance whose current node has fewer other instances * - Active instance * - Failed instance * - Instance whose colocations result in higher score on current node * - Instance with lower ID in lexicographic order * * \param[in] a First instance to compare * \param[in] b Second instance to compare * * \return A negative number if \p a should be assigned first, * a positive number if \p b should be assigned first, * or 0 if assignment order doesn't matter */ gint pcmk__cmp_instance(gconstpointer a, gconstpointer b) { int rc = 0; pe_node_t *node1 = NULL; pe_node_t *node2 = NULL; unsigned int nnodes1 = 0; unsigned int nnodes2 = 0; bool can1 = true; bool can2 = true; const pe_resource_t *instance1 = (const pe_resource_t *) a; const pe_resource_t *instance2 = (const pe_resource_t *) b; CRM_ASSERT((instance1 != NULL) && (instance2 != NULL)); node1 = pe__find_active_on(instance1, &nnodes1, NULL); node2 = pe__find_active_on(instance2, &nnodes2, NULL); /* If both instances are running and at least one is multiply * active, prefer instance that's running on fewer nodes. */ if ((nnodes1 > 0) && (nnodes2 > 0)) { if (nnodes1 < nnodes2) { crm_trace("Assign %s (active on %d) before %s (active on %d): " "less multiply active", instance1->id, nnodes1, instance2->id, nnodes2); return -1; } else if (nnodes1 > nnodes2) { crm_trace("Assign %s (active on %d) after %s (active on %d): " "more multiply active", instance1->id, nnodes1, instance2->id, nnodes2); return 1; } } /* An instance that is either inactive or active on an allowed node is * preferred over an instance that is active on a no-longer-allowed node. */ can1 = node_is_allowed(instance1, &node1); can2 = node_is_allowed(instance2, &node2); if (can1 && !can2) { crm_trace("Assign %s before %s: not active on a disallowed node", instance1->id, instance2->id); return -1; } else if (!can1 && can2) { crm_trace("Assign %s after %s: active on a disallowed node", instance1->id, instance2->id); return 1; } // Prefer instance with higher configured priority if (instance1->priority > instance2->priority) { crm_trace("Assign %s before %s: priority (%d > %d)", instance1->id, instance2->id, instance1->priority, instance2->priority); return -1; } else if (instance1->priority < instance2->priority) { crm_trace("Assign %s after %s: priority (%d < %d)", instance1->id, instance2->id, instance1->priority, instance2->priority); return 1; } // Prefer active instance if ((node1 == NULL) && (node2 == NULL)) { crm_trace("No assignment preference for %s vs. %s: inactive", instance1->id, instance2->id); return 0; } else if (node1 == NULL) { crm_trace("Assign %s after %s: active", instance1->id, instance2->id); return 1; } else if (node2 == NULL) { crm_trace("Assign %s before %s: active", instance1->id, instance2->id); return -1; } // Prefer instance whose current node can run resources can1 = pcmk__node_available(node1); can2 = pcmk__node_available(node2); if (can1 && !can2) { crm_trace("Assign %s before %s: current node can run resources", instance1->id, instance2->id); return -1; } else if (!can1 && can2) { crm_trace("Assign %s after %s: current node can't run resources", instance1->id, instance2->id); return 1; } // Prefer instance whose parent is allowed to run on instance's current node node1 = pcmk__top_allowed_node(instance1, node1); node2 = pcmk__top_allowed_node(instance2, node2); if ((node1 == NULL) && (node2 == NULL)) { crm_trace("No assignment preference for %s vs. %s: " "parent not allowed on either instance's current node", instance1->id, instance2->id); return 0; } else if (node1 == NULL) { crm_trace("Assign %s after %s: parent not allowed on current node", instance1->id, instance2->id); return 1; } else if (node2 == NULL) { crm_trace("Assign %s before %s: parent allowed on current node", instance1->id, instance2->id); return -1; } // Prefer instance whose current node is running fewer other instances if (node1->count < node2->count) { crm_trace("Assign %s before %s: fewer active instances on current node", instance1->id, instance2->id); return -1; } else if (node1->count > node2->count) { crm_trace("Assign %s after %s: more active instances on current node", instance1->id, instance2->id); return 1; } // Prefer failed instance can1 = did_fail(instance1); can2 = did_fail(instance2); if (!can1 && can2) { crm_trace("Assign %s before %s: failed", instance1->id, instance2->id); return -1; } else if (can1 && !can2) { crm_trace("Assign %s after %s: not failed", instance1->id, instance2->id); return 1; } // Prefer instance with higher cumulative colocation score on current node rc = cmp_instance_by_colocation(instance1, instance2); if (rc != 0) { return rc; } // Prefer instance with lower ID in lexicographic order rc = strcmp(instance1->id, instance2->id); crm_trace("Assign %s %s %s: default", instance1->id, ((rc < 0)? "before" : "after"), instance2->id); return rc; }