diff --git a/include/pcmki/pcmki_sched_utils.h b/include/pcmki/pcmki_sched_utils.h index 68d60fc7db..9076030921 100644 --- a/include/pcmki/pcmki_sched_utils.h +++ b/include/pcmki/pcmki_sched_utils.h @@ -1,60 +1,59 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PENGINE_AUTILS__H # define PENGINE_AUTILS__H #include // bool #include // GList, GHashTable, gboolean, guint #include // lrmd_event_data_t #include // cib_t #include #include #include #include #include /* Constraint helper functions */ pcmk__colocation_t *invert_constraint(pcmk__colocation_t *constraint); pe__location_t *copy_constraint(pe__location_t *constraint); GHashTable *pcmk__copy_node_table(GHashTable *nodes); GList *pcmk__copy_node_list(const GList *list, bool reset); GList *pcmk__sort_nodes(GList *nodes, pe_node_t *active_node, pe_working_set_t *data_set); bool pcmk__node_available(const pe_node_t *node); bool pcmk__any_node_available(GHashTable *nodes); pe_resource_t *find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set); pe_resource_t *find_compatible_child_by_node(pe_resource_t * local_child, pe_node_t * local_node, pe_resource_t * rsc, enum rsc_role_e filter, gboolean current); gboolean is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current); enum pe_action_flags summary_action_flags(pe_action_t * action, GList *children, pe_node_t * node); enum action_tasks clone_child_action(pe_action_t * action); int copies_per_node(pe_resource_t * rsc); -extern int compare_capacity(const pe_node_t * node1, const pe_node_t * node2); extern void calculate_utilization(GHashTable * current_utilization, GHashTable * utilization, gboolean plus); extern void process_utilization(pe_resource_t * rsc, pe_node_t ** prefer, pe_working_set_t * data_set); xmlNode *pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *event, const char *caller_version, int target_rc, const char *node, const char *origin, int level); # define LOAD_STOPPED "load_stopped" #endif diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h index db65da7314..8a4bd056cb 100644 --- a/lib/pacemaker/libpacemaker_private.h +++ b/lib/pacemaker/libpacemaker_private.h @@ -1,309 +1,316 @@ /* * Copyright 2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__LIBPACEMAKER_PRIVATE__H # define PCMK__LIBPACEMAKER_PRIVATE__H /* This header is for the sole use of libpacemaker, so that functions can be * declared with G_GNUC_INTERNAL for efficiency. */ #include // pe_action_t, pe_node_t, pe_working_set_t // Actions G_GNUC_INTERNAL void pcmk__update_action_for_orderings(pe_action_t *action, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__log_action(const char *pre_text, pe_action_t *action, bool details); G_GNUC_INTERNAL pe_action_t *pcmk__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task, bool optional, bool runnable); G_GNUC_INTERNAL pe_action_t *pcmk__new_cancel_action(pe_resource_t *rsc, const char *name, guint interval_ms, pe_node_t *node); G_GNUC_INTERNAL pe_action_t *pcmk__new_shutdown_action(pe_node_t *node, pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__action_locks_rsc_to_node(const pe_action_t *action); G_GNUC_INTERNAL void pcmk__deduplicate_action_inputs(pe_action_t *action); G_GNUC_INTERNAL void pcmk__output_actions(pe_working_set_t *data_set); // Producing transition graphs (pcmk_graph_producer.c) G_GNUC_INTERNAL bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action, pe_action_wrapper_t *input); G_GNUC_INTERNAL void pcmk__add_action_to_graph(pe_action_t *action, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__create_graph(pe_working_set_t *data_set); // Fencing (pcmk_sched_fencing.c) G_GNUC_INTERNAL void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__fence_guest(pe_node_t *node, pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__node_unfenced(pe_node_t *node); G_GNUC_INTERNAL bool pcmk__is_unfence_device(const pe_resource_t *rsc, const pe_working_set_t *data_set); // Injected scheduler inputs (pcmk_sched_injections.c) void pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib, pcmk_injections_t *injections); // Constraints of any type (pcmk_sched_constraints.c) G_GNUC_INTERNAL pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id); G_GNUC_INTERNAL xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__valid_resource_or_tag(pe_working_set_t *data_set, const char *id, pe_resource_t **rsc, pe_tag_t **tag); G_GNUC_INTERNAL bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr, bool convert_rsc, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__create_internal_constraints(pe_working_set_t *data_set); // Location constraints G_GNUC_INTERNAL void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc, int node_weight, const char *discover_mode, pe_node_t *foo_node, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_locations(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_location(pe__location_t *constraint, pe_resource_t *rsc); // Colocation constraints enum pcmk__coloc_affects { pcmk__coloc_affects_nothing = 0, pcmk__coloc_affects_location, pcmk__coloc_affects_role, }; G_GNUC_INTERNAL enum pcmk__coloc_affects pcmk__colocation_affects(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, bool preview); G_GNUC_INTERNAL void pcmk__apply_coloc_to_weights(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint); G_GNUC_INTERNAL void pcmk__apply_coloc_to_priority(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint); G_GNUC_INTERNAL void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__new_colocation(const char *id, const char *node_attr, int score, pe_resource_t *dependent, pe_resource_t *primary, const char *dependent_role, const char *primary_role, bool influence, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__block_colocated_starts(pe_action_t *action, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__new_ordering(pe_resource_t *lh_rsc, char *lh_task, pe_action_t *lh_action, pe_resource_t *rh_rsc, char *rh_task, pe_action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__disable_invalid_orderings(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_stops_before_shutdown(pe_node_t *node, pe_action_t *shutdown_op, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_orderings(pe_working_set_t *data_set); /*! * \internal * \brief Create a new ordering between two resource actions * * \param[in] lh_rsc Resource for 'first' action * \param[in] rh_rsc Resource for 'then' action * \param[in] lh_task Action key for 'first' action * \param[in] rh_task Action key for 'then' action * \param[in] flags Bitmask of enum pe_ordering flags * \param[in] data_set Cluster working set to add ordering to */ #define pcmk__order_resource_actions(lh_rsc, lh_task, rh_rsc, rh_task, \ flags, data_set) \ pcmk__new_ordering((lh_rsc), pcmk__op_key((lh_rsc)->id, (lh_task), 0), \ NULL, \ (rh_rsc), pcmk__op_key((rh_rsc)->id, (rh_task), 0), \ NULL, (flags), (data_set)) #define pcmk__order_starts(rsc1, rsc2, type, data_set) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \ (rsc2), CRMD_ACTION_START, (type), (data_set)) #define pcmk__order_stops(rsc1, rsc2, type, data_set) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \ (rsc2), CRMD_ACTION_STOP, (type), (data_set)) G_GNUC_INTERNAL void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__is_failed_remote_node(pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_remote_connection_actions(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__rsc_corresponds_to_guest(pe_resource_t *rsc, pe_node_t *node); G_GNUC_INTERNAL pe_node_t *pcmk__connection_host_for_action(pe_action_t *action); G_GNUC_INTERNAL void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, pe_action_t *action); // Groups (pcmk_sched_group.c) G_GNUC_INTERNAL GList *pcmk__group_colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *colocated_rscs); // Bundles (pcmk_sched_bundle.c) G_GNUC_INTERNAL void pcmk__output_bundle_actions(pe_resource_t *rsc); // Injections (pcmk_injections.c) G_GNUC_INTERNAL xmlNode *pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid); G_GNUC_INTERNAL xmlNode *pcmk__inject_node_state_change(cib_t *cib_conn, const char *node, bool up); G_GNUC_INTERNAL xmlNode *pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node, const char *resource, const char *lrm_name, const char *rclass, const char *rtype, const char *rprovider); G_GNUC_INTERNAL void pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node, const char *resource, const char *task, guint interval_ms, int rc); G_GNUC_INTERNAL xmlNode *pcmk__inject_action_result(xmlNode *cib_resource, lrmd_event_data_t *op, int target_rc); // Functions applying to more than one variant (pcmk_sched_resource.c) G_GNUC_INTERNAL GList *pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *colocated_rscs); G_GNUC_INTERNAL void pcmk__output_resource_actions(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force); G_GNUC_INTERNAL bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force); G_GNUC_INTERNAL void pcmk__unassign_resource(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set, pe_resource_t **failed); // Functions related to probes (pcmk_sched_probes.c) G_GNUC_INTERNAL void pcmk__order_probes(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__schedule_probes(pe_working_set_t *data_set); +// Functions related to node utilization (pcmk_sched_utilization.c) + +G_GNUC_INTERNAL +int pcmk__compare_node_capacities(const pe_node_t *node1, + const pe_node_t *node2); + + #endif // PCMK__LIBPACEMAKER_PRIVATE__H diff --git a/lib/pacemaker/pcmk_sched_nodes.c b/lib/pacemaker/pcmk_sched_nodes.c index a73d031070..b13afd848b 100644 --- a/lib/pacemaker/pcmk_sched_nodes.c +++ b/lib/pacemaker/pcmk_sched_nodes.c @@ -1,255 +1,255 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // lrmd_event_data_t #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Check whether a node is available to run resources * * \param[in] node Node to check * * \return true if node is online and not shutting down, unclean, or in standby * or maintenance mode, otherwise false */ bool pcmk__node_available(const pe_node_t *node) { // @TODO Should we add (node->weight >= 0)? return (node != NULL) && (node->details != NULL) && node->details->online && !node->details->shutdown && !node->details->unclean && !node->details->standby && !node->details->maintenance; } /*! * \internal * \brief Copy a hash table of node objects * * \param[in] nodes Hash table to copy * * \return New copy of nodes (or NULL if nodes is NULL) */ GHashTable * pcmk__copy_node_table(GHashTable *nodes) { GHashTable *new_table = NULL; GHashTableIter iter; pe_node_t *node = NULL; if (nodes == NULL) { return NULL; } new_table = pcmk__strkey_table(NULL, free); g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { pe_node_t *new_node = pe__copy_node(node); g_hash_table_insert(new_table, (gpointer) new_node->details->id, new_node); } return new_table; } /*! * \internal * \brief Copy a list of node objects * * \param[in] list List to copy * \param[in] reset Set copies' scores to 0 * * \return New list of shallow copies of nodes in original list */ GList * pcmk__copy_node_list(const GList *list, bool reset) { GList *result = NULL; for (const GList *gIter = list; gIter != NULL; gIter = gIter->next) { pe_node_t *new_node = NULL; pe_node_t *this_node = (pe_node_t *) gIter->data; new_node = pe__copy_node(this_node); if (reset) { new_node->weight = 0; } result = g_list_prepend(result, new_node); } return result; } struct node_weight_s { pe_node_t *active; pe_working_set_t *data_set; }; /*! * \internal * \brief Compare two nodes for allocation desirability * * Given two nodes, check which one is more preferred by allocation criteria * such as node weight and utilization. * * \param[in] a First node to compare * \param[in] b Second node to compare * \param[in] data Sort data (as struct node_weight_s *) * * \return -1 if \p a is preferred, +1 if \p b is preferred, or 0 if they are * equally preferred */ static gint compare_nodes(gconstpointer a, gconstpointer b, gpointer data) { const pe_node_t *node1 = (const pe_node_t *) a; const pe_node_t *node2 = (const pe_node_t *) b; struct node_weight_s *nw = data; int node1_weight = 0; int node2_weight = 0; int result = 0; if (a == NULL) { return 1; } if (b == NULL) { return -1; } // Compare node weights node1_weight = pcmk__node_available(node1)? node1->weight : -INFINITY; node2_weight = pcmk__node_available(node2)? node2->weight : -INFINITY; if (node1_weight > node2_weight) { crm_trace("%s (%d) > %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return -1; } if (node1_weight < node2_weight) { crm_trace("%s (%d) < %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return 1; } crm_trace("%s (%d) == %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); // If appropriate, compare node utilization if (pcmk__str_eq(nw->data_set->placement_strategy, "minimal", pcmk__str_casei)) { goto equal; } if (pcmk__str_eq(nw->data_set->placement_strategy, "balanced", pcmk__str_casei)) { - result = compare_capacity(node1, node2); + result = pcmk__compare_node_capacities(node1, node2); if (result < 0) { crm_trace("%s > %s : capacity (%d)", node1->details->uname, node2->details->uname, result); return -1; } else if (result > 0) { crm_trace("%s < %s : capacity (%d)", node1->details->uname, node2->details->uname, result); return 1; } } // Compare number of allocated resources if (node1->details->num_resources < node2->details->num_resources) { crm_trace("%s (%d) > %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if (node1->details->num_resources > node2->details->num_resources) { crm_trace("%s (%d) < %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } // Check whether one node is already running desired resource if (nw->active != NULL) { if (nw->active->details == node1->details) { crm_trace("%s (%d) > %s (%d) : active", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if (nw->active->details == node2->details) { crm_trace("%s (%d) < %s (%d) : active", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } } // If all else is equal, prefer node with lowest-sorting name equal: crm_trace("%s = %s", node1->details->uname, node2->details->uname); return strcmp(node1->details->uname, node2->details->uname); } /*! * \internal * \brief Sort a list of nodes by allocation desirability * * \param[in] nodes Node list to sort * \param[in] active_node If not NULL, node currently running resource * \param[in] data_set Cluster working set * * \return New head of sorted list */ GList * pcmk__sort_nodes(GList *nodes, pe_node_t *active_node, pe_working_set_t *data_set) { struct node_weight_s nw = { active_node, data_set }; return g_list_sort_with_data(nodes, compare_nodes, &nw); } /*! * \internal * \brief Check whether any node is available to run resources * * \param[in] nodes Nodes to check * * \return true if any node in \p nodes is available to run resources, * otherwise false */ bool pcmk__any_node_available(GHashTable *nodes) { GHashTableIter iter; pe_node_t *node = NULL; if (nodes == NULL) { return false; } g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if ((node->weight >= 0) && pcmk__node_available(node)) { return true; } } return false; } diff --git a/lib/pacemaker/pcmk_sched_utilization.c b/lib/pacemaker/pcmk_sched_utilization.c index 2f311e9ad8..6230649873 100644 --- a/lib/pacemaker/pcmk_sched_utilization.c +++ b/lib/pacemaker/pcmk_sched_utilization.c @@ -1,397 +1,419 @@ /* * Copyright 2014-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include "libpacemaker_private.h" static void group_add_unallocated_utilization(GHashTable * all_utilization, pe_resource_t * rsc, GList *all_rscs); -struct compare_data { - const pe_node_t *node1; - const pe_node_t *node2; - int result; -}; - /*! * \internal * \brief Get integer utilization from a string * * \param[in] s String representation of a node utilization value * * \return Integer equivalent of \p s * \todo It would make sense to restrict utilization values to nonnegative * integers, but the documentation just says "integers" and we didn't * restrict them initially, so for backward compatibility, allow any * integer. */ static int utilization_value(const char *s) { int value = 0; if ((s != NULL) && (pcmk__scan_min_int(s, &value, INT_MIN) == EINVAL)) { pe_warn("Using 0 for utilization instead of invalid value '%s'", value); value = 0; } return value; } -static void -do_compare_capacity1(gpointer key, gpointer value, gpointer user_data) -{ - int node1_capacity = 0; - int node2_capacity = 0; - struct compare_data *data = user_data; - node1_capacity = utilization_value(value); - node2_capacity = utilization_value(g_hash_table_lookup(data->node2->details->utilization, key)); +/* + * Functions for comparing node capacities + */ - if (node1_capacity > node2_capacity) { - data->result--; - } else if (node1_capacity < node2_capacity) { - data->result++; - } -} +struct compare_data { + const pe_node_t *node1; + const pe_node_t *node2; + bool node2_only; + int result; +}; +/*! + * \internal + * \brief Compare a single utilization attribute for two nodes + * + * Compare one utilization attribute for two nodes, incrementing the result if + * the first node has greater capacity, and decrementing it if the second node + * has greater capacity. + * + * \param[in] key Utilization attribute name to compare + * \param[in] value Utilization attribute value to compare + * \param[in] user_data Comparison data (as struct compare_data*) + */ static void -do_compare_capacity2(gpointer key, gpointer value, gpointer user_data) +compare_utilization_value(gpointer key, gpointer value, gpointer user_data) { int node1_capacity = 0; int node2_capacity = 0; struct compare_data *data = user_data; + const char *node2_value = NULL; - if (g_hash_table_lookup_extended(data->node1->details->utilization, key, NULL, NULL)) { - return; + if (data->node2_only) { + if (g_hash_table_lookup(data->node1->details->utilization, key)) { + return; // We've already compared this attribute + } + } else { + node1_capacity = utilization_value((const char *) value); } - node1_capacity = 0; - node2_capacity = utilization_value(value); + node2_value = g_hash_table_lookup(data->node2->details->utilization, key); + node2_capacity = utilization_value(node2_value); if (node1_capacity > node2_capacity) { data->result--; } else if (node1_capacity < node2_capacity) { data->result++; } } -/* rc < 0 if 'node1' has more capacity remaining - * rc > 0 if 'node1' has less capacity remaining +/*! + * \internal + * \brief Compare utilization capacities of two nodes + * + * \param[in] node1 First node to compare + * \param[in] node2 Second node to compare + * + * \return Negative integer if node1 has more free capacity, + * 0 if the capacities are equal, or a positive integer + * if node2 has more free capacity */ int -compare_capacity(const pe_node_t * node1, const pe_node_t * node2) +pcmk__compare_node_capacities(const pe_node_t *node1, const pe_node_t *node2) { - struct compare_data data; - - data.node1 = node1; - data.node2 = node2; - data.result = 0; - - g_hash_table_foreach(node1->details->utilization, do_compare_capacity1, &data); - g_hash_table_foreach(node2->details->utilization, do_compare_capacity2, &data); + struct compare_data data = { + .node1 = node1, + .node2 = node2, + .node2_only = false, + .result = 0, + }; + + // Compare utilization values that node1 and maybe node2 have + g_hash_table_foreach(node1->details->utilization, compare_utilization_value, + &data); + + // Compare utilization values that only node2 has + data.node2_only = true; + g_hash_table_foreach(node2->details->utilization, compare_utilization_value, + &data); return data.result; } + struct calculate_data { GHashTable *current_utilization; gboolean plus; }; static void do_calculate_utilization(gpointer key, gpointer value, gpointer user_data) { const char *current = NULL; char *result = NULL; struct calculate_data *data = user_data; current = g_hash_table_lookup(data->current_utilization, key); if (data->plus) { result = pcmk__itoa(utilization_value(current) + utilization_value(value)); g_hash_table_replace(data->current_utilization, strdup(key), result); } else if (current) { result = pcmk__itoa(utilization_value(current) - utilization_value(value)); g_hash_table_replace(data->current_utilization, strdup(key), result); } } /* Specify 'plus' to FALSE when allocating * Otherwise to TRUE when deallocating */ void calculate_utilization(GHashTable * current_utilization, GHashTable * utilization, gboolean plus) { struct calculate_data data; data.current_utilization = current_utilization; data.plus = plus; g_hash_table_foreach(utilization, do_calculate_utilization, &data); } struct capacity_data { pe_node_t *node; const char *rsc_id; gboolean is_enough; }; static void check_capacity(gpointer key, gpointer value, gpointer user_data) { int required = 0; int remaining = 0; struct capacity_data *data = user_data; required = utilization_value(value); remaining = utilization_value(g_hash_table_lookup(data->node->details->utilization, key)); if (required > remaining) { CRM_ASSERT(data->rsc_id); CRM_ASSERT(data->node); crm_debug("Node %s does not have enough %s for %s: required=%d remaining=%d", data->node->details->uname, (char *)key, data->rsc_id, required, remaining); data->is_enough = FALSE; } } static gboolean have_enough_capacity(pe_node_t * node, const char * rsc_id, GHashTable * utilization) { struct capacity_data data; data.node = node; data.rsc_id = rsc_id; data.is_enough = TRUE; g_hash_table_foreach(utilization, check_capacity, &data); return data.is_enough; } static void native_add_unallocated_utilization(GHashTable * all_utilization, pe_resource_t * rsc) { if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } calculate_utilization(all_utilization, rsc->utilization, TRUE); } static void add_unallocated_utilization(GHashTable * all_utilization, pe_resource_t * rsc, GList *all_rscs, pe_resource_t * orig_rsc) { if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } if (rsc->variant == pe_native) { pe_rsc_trace(orig_rsc, "%s: Adding %s as colocated utilization", orig_rsc->id, rsc->id); native_add_unallocated_utilization(all_utilization, rsc); } else if (rsc->variant == pe_group) { pe_rsc_trace(orig_rsc, "%s: Adding %s as colocated utilization", orig_rsc->id, rsc->id); group_add_unallocated_utilization(all_utilization, rsc, all_rscs); } else if (pe_rsc_is_clone(rsc)) { GList *gIter1 = NULL; gboolean existing = FALSE; /* Check if there's any child already existing in the list */ gIter1 = rsc->children; for (; gIter1 != NULL; gIter1 = gIter1->next) { pe_resource_t *child = (pe_resource_t *) gIter1->data; GList *gIter2 = NULL; if (g_list_find(all_rscs, child)) { existing = TRUE; } else { /* Check if there's any child of another cloned group already existing in the list */ gIter2 = child->children; for (; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *grandchild = (pe_resource_t *) gIter2->data; if (g_list_find(all_rscs, grandchild)) { pe_rsc_trace(orig_rsc, "%s: Adding %s as colocated utilization", orig_rsc->id, child->id); add_unallocated_utilization(all_utilization, child, all_rscs, orig_rsc); existing = TRUE; break; } } } } // rsc->children is always non-NULL but this makes static analysis happy if (!existing && (rsc->children != NULL)) { pe_resource_t *first_child = (pe_resource_t *) rsc->children->data; pe_rsc_trace(orig_rsc, "%s: Adding %s as colocated utilization", orig_rsc->id, ID(first_child->xml)); add_unallocated_utilization(all_utilization, first_child, all_rscs, orig_rsc); } } } static GHashTable * sum_unallocated_utilization(pe_resource_t * rsc, GList *colocated_rscs) { GList *gIter = NULL; GList *all_rscs = NULL; GHashTable *all_utilization = pcmk__strkey_table(free, free); all_rscs = g_list_copy(colocated_rscs); if (g_list_find(all_rscs, rsc) == FALSE) { all_rscs = g_list_append(all_rscs, rsc); } for (gIter = all_rscs; gIter != NULL; gIter = gIter->next) { pe_resource_t *listed_rsc = (pe_resource_t *) gIter->data; if (!pcmk_is_set(listed_rsc->flags, pe_rsc_provisional)) { continue; } pe_rsc_trace(rsc, "%s: Processing unallocated colocated %s", rsc->id, listed_rsc->id); add_unallocated_utilization(all_utilization, listed_rsc, all_rscs, rsc); } g_list_free(all_rscs); return all_utilization; } void process_utilization(pe_resource_t * rsc, pe_node_t ** prefer, pe_working_set_t * data_set) { CRM_CHECK(rsc && prefer && data_set, return); if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) { GHashTableIter iter; GList *colocated_rscs = NULL; gboolean any_capable = FALSE; pe_node_t *node = NULL; colocated_rscs = rsc->cmds->colocated_resources(rsc, NULL, NULL); if (colocated_rscs) { GHashTable *unallocated_utilization = NULL; char *rscs_id = crm_strdup_printf("%s and its colocated resources", rsc->id); pe_node_t *most_capable_node = NULL; unallocated_utilization = sum_unallocated_utilization(rsc, colocated_rscs); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (!pcmk__node_available(node) || (node->weight < 0)) { continue; } if (have_enough_capacity(node, rscs_id, unallocated_utilization)) { any_capable = TRUE; } if (most_capable_node == NULL || - compare_capacity(node, most_capable_node) < 0) { + pcmk__compare_node_capacities(node, most_capable_node) < 0) { /* < 0 means 'node' is more capable */ most_capable_node = node; } } if (any_capable) { g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (!pcmk__node_available(node) || (node->weight < 0)) { continue; } if (have_enough_capacity(node, rscs_id, unallocated_utilization) == FALSE) { pe_rsc_debug(rsc, "Resource %s and its colocated resources" " cannot be allocated to node %s: not enough capacity", rsc->id, node->details->uname); resource_location(rsc, node, -INFINITY, "__limit_utilization__", data_set); } } } else if (*prefer == NULL) { *prefer = most_capable_node; } if (unallocated_utilization) { g_hash_table_destroy(unallocated_utilization); } g_list_free(colocated_rscs); free(rscs_id); } if (any_capable == FALSE) { g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (!pcmk__node_available(node) || (node->weight < 0)) { continue; } if (have_enough_capacity(node, rsc->id, rsc->utilization) == FALSE) { pe_rsc_debug(rsc, "Resource %s cannot be allocated to node %s:" " not enough capacity", rsc->id, node->details->uname); resource_location(rsc, node, -INFINITY, "__limit_utilization__", data_set); } } } pe__show_node_weights(true, rsc, "Post-utilization", rsc->allowed_nodes, data_set); } } #define VARIANT_GROUP 1 #include static void group_add_unallocated_utilization(GHashTable * all_utilization, pe_resource_t * rsc, GList *all_rscs) { group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); if (group_data->colocated || pe_rsc_is_clone(rsc->parent)) { GList *gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if (pcmk_is_set(child_rsc->flags, pe_rsc_provisional) && g_list_find(all_rscs, child_rsc) == FALSE) { native_add_unallocated_utilization(all_utilization, child_rsc); } } } else { if (group_data->first_child && pcmk_is_set(group_data->first_child->flags, pe_rsc_provisional) && g_list_find(all_rscs, group_data->first_child) == FALSE) { native_add_unallocated_utilization(all_utilization, group_data->first_child); } } }