diff --git a/include/pcmki/pcmki_sched_utils.h b/include/pcmki/pcmki_sched_utils.h index fb6b8306e8..7e789a8245 100644 --- a/include/pcmki/pcmki_sched_utils.h +++ b/include/pcmki/pcmki_sched_utils.h @@ -1,80 +1,79 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PENGINE_AUTILS__H # define PENGINE_AUTILS__H #include // bool #include // GList, GHashTable, gboolean, guint #include // lrmd_event_data_t #include // cib_t #include #include #include #include #include /* Constraint helper functions */ pcmk__colocation_t *invert_constraint(pcmk__colocation_t *constraint); pe__location_t *copy_constraint(pe__location_t *constraint); GHashTable *pcmk__copy_node_table(GHashTable *nodes); GList *pcmk__copy_node_list(const GList *list, bool reset); GList *sort_nodes_by_weight(GList *nodes, pe_node_t *active_node, pe_working_set_t *data_set); extern gboolean can_run_resources(const pe_node_t * node); extern void log_action(unsigned int log_level, const char *pre_text, pe_action_t * action, gboolean details); gboolean can_run_any(GHashTable * nodes); pe_resource_t *find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set); pe_resource_t *find_compatible_child_by_node(pe_resource_t * local_child, pe_node_t * local_node, pe_resource_t * rsc, enum rsc_role_e filter, gboolean current); gboolean is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current); -bool assign_node(pe_resource_t * rsc, pe_node_t * node, gboolean force); enum pe_action_flags summary_action_flags(pe_action_t * action, GList *children, pe_node_t * node); enum action_tasks clone_child_action(pe_action_t * action); int copies_per_node(pe_resource_t * rsc); extern int compare_capacity(const pe_node_t * node1, const pe_node_t * node2); extern void calculate_utilization(GHashTable * current_utilization, GHashTable * utilization, gboolean plus); extern void process_utilization(pe_resource_t * rsc, pe_node_t ** prefer, pe_working_set_t * data_set); pe_action_t *create_pseudo_resource_op(pe_resource_t * rsc, const char *task, bool optional, bool runnable, pe_working_set_t *data_set); pe_action_t *pe_cancel_op(pe_resource_t *rsc, const char *name, guint interval_ms, pe_node_t *node, pe_working_set_t *data_set); pe_action_t *sched_shutdown_op(pe_node_t *node, pe_working_set_t *data_set); xmlNode *pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *event, const char *caller_version, int target_rc, const char *node, const char *origin, int level); # define LOAD_STOPPED "load_stopped" void modify_configuration(pe_working_set_t *data_set, cib_t *cib, pcmk_injections_t *injections); enum transition_status run_simulation(pe_working_set_t * data_set, cib_t *cib, GList *op_fail_list); pcmk__output_t *pcmk__new_logger(void); bool pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set, pe_resource_t **failed); #endif diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h index 3d04a3ebad..530684d2dc 100644 --- a/lib/pacemaker/libpacemaker_private.h +++ b/lib/pacemaker/libpacemaker_private.h @@ -1,207 +1,210 @@ /* * Copyright 2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__LIBPACEMAKER_PRIVATE__H # define PCMK__LIBPACEMAKER_PRIVATE__H /* This header is for the sole use of libpacemaker, so that functions can be * declared with G_GNUC_INTERNAL for efficiency. */ #include // pe_action_t, pe_node_t, pe_working_set_t G_GNUC_INTERNAL bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action, pe_action_wrapper_t *input); G_GNUC_INTERNAL void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__fence_guest(pe_node_t *node, pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__node_unfenced(pe_node_t *node); G_GNUC_INTERNAL bool pcmk__is_unfence_device(const pe_resource_t *rsc, const pe_working_set_t *data_set); G_GNUC_INTERNAL pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id); G_GNUC_INTERNAL xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__valid_resource_or_tag(pe_working_set_t *data_set, const char *id, pe_resource_t **rsc, pe_tag_t **tag); G_GNUC_INTERNAL bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr, bool convert_rsc, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__create_internal_constraints(pe_working_set_t *data_set); // Location constraints G_GNUC_INTERNAL void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc, int node_weight, const char *discover_mode, pe_node_t *foo_node, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_locations(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_location(pe__location_t *constraint, pe_resource_t *rsc); // Colocation constraints enum pcmk__coloc_affects { pcmk__coloc_affects_nothing = 0, pcmk__coloc_affects_location, pcmk__coloc_affects_role, }; G_GNUC_INTERNAL enum pcmk__coloc_affects pcmk__colocation_affects(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, bool preview); G_GNUC_INTERNAL void pcmk__apply_coloc_to_weights(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint); G_GNUC_INTERNAL void pcmk__apply_coloc_to_priority(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint); G_GNUC_INTERNAL void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__new_colocation(const char *id, const char *node_attr, int score, pe_resource_t *dependent, pe_resource_t *primary, const char *dependent_role, const char *primary_role, bool influence, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__block_colocated_starts(pe_action_t *action, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__new_ordering(pe_resource_t *lh_rsc, char *lh_task, pe_action_t *lh_action, pe_resource_t *rh_rsc, char *rh_task, pe_action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__disable_invalid_orderings(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_stops_before_shutdown(pe_node_t *node, pe_action_t *shutdown_op, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_orderings(pe_working_set_t *data_set); /*! * \internal * \brief Create a new ordering between two resource actions * * \param[in] lh_rsc Resource for 'first' action * \param[in] rh_rsc Resource for 'then' action * \param[in] lh_task Action key for 'first' action * \param[in] rh_task Action key for 'then' action * \param[in] flags Bitmask of enum pe_ordering flags * \param[in] data_set Cluster working set to add ordering to */ #define pcmk__order_resource_actions(lh_rsc, lh_task, rh_rsc, rh_task, \ flags, data_set) \ pcmk__new_ordering((lh_rsc), pcmk__op_key((lh_rsc)->id, (lh_task), 0), \ NULL, \ (rh_rsc), pcmk__op_key((rh_rsc)->id, (rh_task), 0), \ NULL, (flags), (data_set)) #define pcmk__order_starts(rsc1, rsc2, type, data_set) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \ (rsc2), CRMD_ACTION_START, (type), (data_set)) #define pcmk__order_stops(rsc1, rsc2, type, data_set) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \ (rsc2), CRMD_ACTION_STOP, (type), (data_set)) G_GNUC_INTERNAL void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_probes(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__is_failed_remote_node(pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_remote_connection_actions(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__rsc_corresponds_to_guest(pe_resource_t *rsc, pe_node_t *node); G_GNUC_INTERNAL pe_node_t *pcmk__connection_host_for_action(pe_action_t *action); G_GNUC_INTERNAL void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, pe_action_t *action); // Groups (pcmk_sched_group.c) G_GNUC_INTERNAL GList *pcmk__group_colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *colocated_rscs); // Functions applying to more than one variant (pcmk_sched_resource.c) G_GNUC_INTERNAL GList *pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *colocated_rscs); G_GNUC_INTERNAL bool pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force); +G_GNUC_INTERNAL +bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force); + G_GNUC_INTERNAL void pcmk__unassign_resource(pe_resource_t *rsc); #endif // PCMK__LIBPACEMAKER_PRIVATE__H diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c index 1f3841a39f..bde841efc2 100644 --- a/lib/pacemaker/pcmk_sched_bundle.c +++ b/lib/pacemaker/pcmk_sched_bundle.c @@ -1,1091 +1,1091 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include "libpacemaker_private.h" #define PE__VARIANT_BUNDLE 1 #include static bool is_bundle_node(pe__bundle_variant_data_t *data, pe_node_t *node) { for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (node->details == replica->node->details) { return TRUE; } } return FALSE; } gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set); void distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, int max, int per_host_max, pe_working_set_t * data_set); static GList * get_container_list(pe_resource_t *rsc) { GList *containers = NULL; if (rsc->variant == pe_container) { pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; containers = g_list_append(containers, replica->container); } } return containers; } static inline GList * get_containers_or_children(pe_resource_t *rsc) { return (rsc->variant == pe_container)? get_container_list(rsc) : rsc->children; } pe_node_t * pcmk__bundle_allocate(pe_resource_t *rsc, pe_node_t *prefer, pe_working_set_t *data_set) { GList *containers = NULL; GList *nodes = NULL; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return NULL); get_bundle_variant_data(bundle_data, rsc); pe__set_resource_flags(rsc, pe_rsc_allocating); containers = get_container_list(rsc); pe__show_node_weights(!pcmk_is_set(data_set->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, data_set); nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = sort_nodes_by_weight(nodes, NULL, data_set); containers = g_list_sort_with_data(containers, sort_clone_instance, data_set); distribute_children(rsc, containers, nodes, bundle_data->nreplicas, bundle_data->nreplicas_per_host, data_set); g_list_free(nodes); g_list_free(containers); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; pe_node_t *container_host = NULL; CRM_ASSERT(replica); if (replica->ip) { pe_rsc_trace(rsc, "Allocating bundle %s IP %s", rsc->id, replica->ip->id); replica->ip->cmds->allocate(replica->ip, prefer, data_set); } container_host = replica->container->allocated_to; if (replica->remote && pe__is_guest_or_remote_node(container_host)) { /* We need 'nested' connection resources to be on the same * host because pacemaker-remoted only supports a single * active connection */ pcmk__new_colocation("child-remote-with-docker-remote", NULL, INFINITY, replica->remote, container_host->details->remote_rsc, NULL, NULL, true, data_set); } if (replica->remote) { pe_rsc_trace(rsc, "Allocating bundle %s connection %s", rsc->id, replica->remote->id); replica->remote->cmds->allocate(replica->remote, prefer, data_set); } // Explicitly allocate replicas' children before bundle child if (replica->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, replica->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (node->details != replica->node->details) { node->weight = -INFINITY; } else if (!pcmk__threshold_reached(replica->child, node, data_set, NULL)) { node->weight = INFINITY; } } pe__set_resource_flags(replica->child->parent, pe_rsc_allocating); pe_rsc_trace(rsc, "Allocating bundle %s replica child %s", rsc->id, replica->child->id); replica->child->cmds->allocate(replica->child, replica->node, data_set); pe__clear_resource_flags(replica->child->parent, pe_rsc_allocating); } } if (bundle_data->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, bundle_data->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (is_bundle_node(bundle_data, node)) { node->weight = 0; } else { node->weight = -INFINITY; } } pe_rsc_trace(rsc, "Allocating bundle %s child %s", rsc->id, bundle_data->child->id); bundle_data->child->cmds->allocate(bundle_data->child, prefer, data_set); } pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional); return NULL; } void pcmk__bundle_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set) { pe_action_t *action = NULL; GList *containers = NULL; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); containers = get_container_list(rsc); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->ip) { replica->ip->cmds->create_actions(replica->ip, data_set); } if (replica->container) { replica->container->cmds->create_actions(replica->container, data_set); } if (replica->remote) { replica->remote->cmds->create_actions(replica->remote, data_set); } } clone_create_pseudo_actions(rsc, containers, NULL, NULL, data_set); if (bundle_data->child) { bundle_data->child->cmds->create_actions(bundle_data->child, data_set); if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) { /* promote */ create_pseudo_resource_op(rsc, RSC_PROMOTE, TRUE, TRUE, data_set); action = create_pseudo_resource_op(rsc, RSC_PROMOTED, TRUE, TRUE, data_set); action->priority = INFINITY; /* demote */ create_pseudo_resource_op(rsc, RSC_DEMOTE, TRUE, TRUE, data_set); action = create_pseudo_resource_op(rsc, RSC_DEMOTED, TRUE, TRUE, data_set); action->priority = INFINITY; } } g_list_free(containers); } void pcmk__bundle_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); if (bundle_data->child) { pcmk__order_resource_actions(rsc, RSC_START, bundle_data->child, RSC_START, pe_order_implies_first_printed, data_set); pcmk__order_resource_actions(rsc, RSC_STOP, bundle_data->child, RSC_STOP, pe_order_implies_first_printed, data_set); if (bundle_data->child->children) { pcmk__order_resource_actions(bundle_data->child, RSC_STARTED, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); pcmk__order_resource_actions(bundle_data->child, RSC_STOPPED, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); } else { pcmk__order_resource_actions(bundle_data->child, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); pcmk__order_resource_actions(bundle_data->child, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); } } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); CRM_ASSERT(replica->container); replica->container->cmds->internal_constraints(replica->container, data_set); pcmk__order_starts(rsc, replica->container, pe_order_runnable_left|pe_order_implies_first_printed, data_set); if (replica->child) { pcmk__order_stops(rsc, replica->child, pe_order_implies_first_printed, data_set); } pcmk__order_stops(rsc, replica->container, pe_order_implies_first_printed, data_set); pcmk__order_resource_actions(replica->container, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); pcmk__order_resource_actions(replica->container, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); if (replica->ip) { replica->ip->cmds->internal_constraints(replica->ip, data_set); // Start IP then container pcmk__order_starts(replica->ip, replica->container, pe_order_runnable_left|pe_order_preserve, data_set); pcmk__order_stops(replica->container, replica->ip, pe_order_implies_first|pe_order_preserve, data_set); pcmk__new_colocation("ip-with-docker", NULL, INFINITY, replica->ip, replica->container, NULL, NULL, true, data_set); } if (replica->remote) { /* This handles ordering and colocating remote relative to container * (via "resource-with-container"). Since IP is also ordered and * colocated relative to the container, we don't need to do anything * explicit here with IP. */ replica->remote->cmds->internal_constraints(replica->remote, data_set); } if (replica->child) { CRM_ASSERT(replica->remote); // "Start remote then child" is implicit in scheduler's remote logic } } if (bundle_data->child) { bundle_data->child->cmds->internal_constraints(bundle_data->child, data_set); if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) { promote_demote_constraints(rsc, data_set); /* child demoted before global demoted */ pcmk__order_resource_actions(bundle_data->child, RSC_DEMOTED, rsc, RSC_DEMOTED, pe_order_implies_then_printed, data_set); /* global demote before child demote */ pcmk__order_resource_actions(rsc, RSC_DEMOTE, bundle_data->child, RSC_DEMOTE, pe_order_implies_first_printed, data_set); /* child promoted before global promoted */ pcmk__order_resource_actions(bundle_data->child, RSC_PROMOTED, rsc, RSC_PROMOTED, pe_order_implies_then_printed, data_set); /* global promote before child promote */ pcmk__order_resource_actions(rsc, RSC_PROMOTE, bundle_data->child, RSC_PROMOTE, pe_order_implies_first_printed, data_set); } } } static pe_resource_t * compatible_replica_for_node(pe_resource_t *rsc_lh, pe_node_t *candidate, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(candidate != NULL, return NULL); get_bundle_variant_data(bundle_data, rsc); crm_trace("Looking for compatible child from %s for %s on %s", rsc_lh->id, rsc->id, candidate->details->uname); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (is_child_compatible(replica->container, candidate, filter, current)) { crm_trace("Pairing %s with %s on %s", rsc_lh->id, replica->container->id, candidate->details->uname); return replica->container; } } crm_trace("Can't pair %s with %s", rsc_lh->id, rsc->id); return NULL; } static pe_resource_t * compatible_replica(pe_resource_t *rsc_lh, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set) { GList *scratch = NULL; pe_resource_t *pair = NULL; pe_node_t *active_node_lh = NULL; active_node_lh = rsc_lh->fns->location(rsc_lh, NULL, current); if (active_node_lh) { return compatible_replica_for_node(rsc_lh, active_node_lh, rsc, filter, current); } scratch = g_hash_table_get_values(rsc_lh->allowed_nodes); scratch = sort_nodes_by_weight(scratch, NULL, data_set); for (GList *gIter = scratch; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pair = compatible_replica_for_node(rsc_lh, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", rsc_lh->id, (rsc? rsc->id : "none")); done: g_list_free(scratch); return pair; } void pcmk__bundle_rsc_colocation_lh(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, pe_working_set_t *data_set) { /* -- Never called -- * * Instead we add the colocation constraints to the child and call from there */ CRM_ASSERT(FALSE); } int copies_per_node(pe_resource_t * rsc) { /* Strictly speaking, there should be a 'copies_per_node' addition * to the resource function table and each case would be a * function. However that would be serious overkill to return an * int. In fact, it seems to me that both function tables * could/should be replaced by resources.{c,h} full of * rsc_{some_operation} functions containing a switch as below * which calls out to functions named {variant}_{some_operation} * as needed. */ switch(rsc->variant) { case pe_unknown: return 0; case pe_native: case pe_group: return 1; case pe_clone: { const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX); if (max_clones_node == NULL) { return 1; } else { int max_i; pcmk__scan_min_int(max_clones_node, &max_i, 0); return max_i; } } case pe_container: { pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); return data->nreplicas_per_host; } } return 0; } void pcmk__bundle_rsc_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, pe_working_set_t *data_set) { GList *allocated_primaries = NULL; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(constraint != NULL, return); CRM_CHECK(dependent != NULL, pe_err("dependent was NULL for %s", constraint->id); return); CRM_CHECK(primary != NULL, pe_err("primary was NULL for %s", constraint->id); return); CRM_ASSERT(dependent->variant == pe_native); if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { pe_rsc_trace(primary, "%s is still provisional", primary->id); return; } else if(constraint->dependent->variant > pe_group) { pe_resource_t *primary_replica = compatible_replica(dependent, primary, RSC_ROLE_UNKNOWN, FALSE, data_set); if (primary_replica) { pe_rsc_debug(primary, "Pairing %s with %s", dependent->id, primary_replica->id); dependent->cmds->rsc_colocation_lh(dependent, primary_replica, constraint, data_set); } else if (constraint->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", dependent->id, primary->id); - assign_node(dependent, NULL, TRUE); + pcmk__assign_resource(dependent, NULL, true); } else { pe_rsc_debug(primary, "Cannot pair %s with instance of %s", dependent->id, primary->id); } return; } get_bundle_variant_data(bundle_data, primary); pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d", constraint->id, dependent->id, primary->id, constraint->score); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (constraint->score < INFINITY) { replica->container->cmds->rsc_colocation_rh(dependent, replica->container, constraint, data_set); } else { pe_node_t *chosen = replica->container->fns->location(replica->container, NULL, FALSE); if ((chosen == NULL) || is_set_recursive(replica->container, pe_rsc_block, TRUE)) { continue; } if ((constraint->primary_role >= RSC_ROLE_PROMOTED) && (replica->child == NULL)) { continue; } if ((constraint->primary_role >= RSC_ROLE_PROMOTED) && (replica->child->next_role < RSC_ROLE_PROMOTED)) { continue; } pe_rsc_trace(primary, "Allowing %s: %s %d", constraint->id, chosen->details->uname, chosen->weight); allocated_primaries = g_list_prepend(allocated_primaries, chosen); } } if (constraint->score >= INFINITY) { node_list_exclude(dependent->allowed_nodes, allocated_primaries, FALSE); } g_list_free(allocated_primaries); } enum pe_action_flags pcmk__bundle_action_flags(pe_action_t *action, pe_node_t *node) { GList *containers = NULL; enum pe_action_flags flags = 0; pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, action->rsc); if(data->child) { enum action_tasks task = get_complex_task(data->child, action->task, TRUE); switch(task) { case no_action: case action_notify: case action_notified: case action_promote: case action_promoted: case action_demote: case action_demoted: return summary_action_flags(action, data->child->children, node); default: break; } } containers = get_container_list(action->rsc); flags = summary_action_flags(action, containers, node); g_list_free(containers); return flags; } pe_resource_t * find_compatible_child_by_node(pe_resource_t * local_child, pe_node_t * local_node, pe_resource_t * rsc, enum rsc_role_e filter, gboolean current) { GList *gIter = NULL; GList *children = NULL; if (local_node == NULL) { crm_err("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id); return NULL; } crm_trace("Looking for compatible child from %s for %s on %s", local_child->id, rsc->id, local_node->details->uname); children = get_containers_or_children(rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if(is_child_compatible(child_rsc, local_node, filter, current)) { crm_trace("Pairing %s with %s on %s", local_child->id, child_rsc->id, local_node->details->uname); return child_rsc; } } crm_trace("Can't pair %s with %s", local_child->id, rsc->id); if(children != rsc->children) { g_list_free(children); } return NULL; } static pe__bundle_replica_t * replica_for_container(pe_resource_t *rsc, pe_resource_t *container, pe_node_t *node) { if (rsc->variant == pe_container) { pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (replica->child && (container == replica->container) && (node->details == replica->node->details)) { return replica; } } } return NULL; } static enum pe_graph_flags multi_update_interleave_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set) { GList *gIter = NULL; GList *children = NULL; gboolean current = FALSE; enum pe_graph_flags changed = pe_graph_none; /* Fix this - lazy */ if (pcmk__ends_with(first->uuid, "_stopped_0") || pcmk__ends_with(first->uuid, "_demoted_0")) { current = TRUE; } children = get_containers_or_children(then->rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *then_child = gIter->data; pe_resource_t *first_child = find_compatible_child(then_child, first->rsc, RSC_ROLE_UNKNOWN, current, data_set); if (first_child == NULL && current) { crm_trace("Ignore"); } else if (first_child == NULL) { crm_debug("No match found for %s (%d / %s / %s)", then_child->id, current, first->uuid, then->uuid); /* Me no like this hack - but what else can we do? * * If there is no-one active or about to be active * on the same node as then_child, then they must * not be allowed to start */ if (type & (pe_order_runnable_left | pe_order_implies_then) /* Mandatory */ ) { pe_rsc_info(then->rsc, "Inhibiting %s from being active", then_child->id); - if(assign_node(then_child, NULL, TRUE)) { + if (pcmk__assign_resource(then_child, NULL, true)) { pe__set_graph_flags(changed, first, pe_graph_updated_then); } } } else { pe_action_t *first_action = NULL; pe_action_t *then_action = NULL; enum action_tasks task = clone_child_action(first); const char *first_task = task2text(task); pe__bundle_replica_t *first_replica = NULL; pe__bundle_replica_t *then_replica = NULL; first_replica = replica_for_container(first->rsc, first_child, node); if (strstr(first->task, "stop") && first_replica && first_replica->child) { /* Except for 'stopped' we should be looking at the * in-container resource, actions for the child will * happen later and are therefor more likely to align * with the user's intent. */ first_action = find_first_action(first_replica->child->actions, NULL, task2text(task), node); } else { first_action = find_first_action(first_child->actions, NULL, task2text(task), node); } then_replica = replica_for_container(then->rsc, then_child, node); if (strstr(then->task, "mote") && then_replica && then_replica->child) { /* Promote/demote actions will never be found for the * container resource, look in the child instead * * Alternatively treat: * 'XXXX then promote YYYY' as 'XXXX then start container for YYYY', and * 'demote XXXX then stop YYYY' as 'stop container for XXXX then stop YYYY' */ then_action = find_first_action(then_replica->child->actions, NULL, then->task, node); } else { then_action = find_first_action(then_child->actions, NULL, then->task, node); } if (first_action == NULL) { if (!pcmk_is_set(first_child->flags, pe_rsc_orphan) && !pcmk__str_any_of(first_task, RSC_STOP, RSC_DEMOTE, NULL)) { crm_err("Internal error: No action found for %s in %s (first)", first_task, first_child->id); } else { crm_trace("No action found for %s in %s%s (first)", first_task, first_child->id, pcmk_is_set(first_child->flags, pe_rsc_orphan)? " (ORPHAN)" : ""); } continue; } /* We're only interested if 'then' is neither stopping nor being demoted */ if (then_action == NULL) { if (!pcmk_is_set(then_child->flags, pe_rsc_orphan) && !pcmk__str_any_of(then->task, RSC_STOP, RSC_DEMOTE, NULL)) { crm_err("Internal error: No action found for %s in %s (then)", then->task, then_child->id); } else { crm_trace("No action found for %s in %s%s (then)", then->task, then_child->id, pcmk_is_set(then_child->flags, pe_rsc_orphan)? " (ORPHAN)" : ""); } continue; } if (order_actions(first_action, then_action, type)) { crm_debug("Created constraint for %s (%d) -> %s (%d) %.6x", first_action->uuid, pcmk_is_set(first_action->flags, pe_action_optional), then_action->uuid, pcmk_is_set(then_action->flags, pe_action_optional), type); pe__set_graph_flags(changed, first, pe_graph_updated_first|pe_graph_updated_then); } if(first_action && then_action) { changed |= then_child->cmds->update_actions(first_action, then_action, node, first_child->cmds->action_flags(first_action, node), filter, type, data_set); } else { crm_err("Nothing found either for %s (%p) or %s (%p) %s", first_child->id, first_action, then_child->id, then_action, task2text(task)); } } } if(children != then->rsc->children) { g_list_free(children); } return changed; } static bool can_interleave_actions(pe_action_t *first, pe_action_t *then) { bool interleave = FALSE; pe_resource_t *rsc = NULL; const char *interleave_s = NULL; if(first->rsc == NULL || then->rsc == NULL) { crm_trace("Not interleaving %s with %s (both must be resources)", first->uuid, then->uuid); return FALSE; } else if(first->rsc == then->rsc) { crm_trace("Not interleaving %s with %s (must belong to different resources)", first->uuid, then->uuid); return FALSE; } else if(first->rsc->variant < pe_clone || then->rsc->variant < pe_clone) { crm_trace("Not interleaving %s with %s (both sides must be clones or bundles)", first->uuid, then->uuid); return FALSE; } if (pcmk__ends_with(then->uuid, "_stop_0") || pcmk__ends_with(then->uuid, "_demote_0")) { rsc = first->rsc; } else { rsc = then->rsc; } interleave_s = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE); interleave = crm_is_true(interleave_s); crm_trace("Interleave %s -> %s: %s (based on %s)", first->uuid, then->uuid, interleave ? "yes" : "no", rsc->id); return interleave; } enum pe_graph_flags pcmk__multi_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set) { enum pe_graph_flags changed = pe_graph_none; crm_trace("%s -> %s", first->uuid, then->uuid); if(can_interleave_actions(first, then)) { changed = multi_update_interleave_actions(first, then, node, flags, filter, type, data_set); } else if(then->rsc) { GList *gIter = NULL; GList *children = NULL; // Handle the 'primitive' ordering case changed |= native_update_actions(first, then, node, flags, filter, type, data_set); // Now any children (or containers in the case of a bundle) children = get_containers_or_children(then->rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *then_child = (pe_resource_t *) gIter->data; enum pe_graph_flags then_child_changed = pe_graph_none; pe_action_t *then_child_action = find_first_action(then_child->actions, NULL, then->task, node); if (then_child_action) { enum pe_action_flags then_child_flags = then_child->cmds->action_flags(then_child_action, node); if (pcmk_is_set(then_child_flags, pe_action_runnable)) { then_child_changed |= then_child->cmds->update_actions(first, then_child_action, node, flags, filter, type, data_set); } changed |= then_child_changed; if (then_child_changed & pe_graph_updated_then) { for (GList *lpc = then_child_action->actions_after; lpc != NULL; lpc = lpc->next) { pe_action_wrapper_t *next = (pe_action_wrapper_t *) lpc->data; update_action(next->action, data_set); } } } } if(children != then->rsc->children) { g_list_free(children); } } return changed; } void pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); pcmk__apply_location(constraint, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (replica->container) { replica->container->cmds->rsc_location(replica->container, constraint); } if (replica->ip) { replica->ip->cmds->rsc_location(replica->ip, constraint); } } if (bundle_data->child && ((constraint->role_filter == RSC_ROLE_UNPROMOTED) || (constraint->role_filter == RSC_ROLE_PROMOTED))) { bundle_data->child->cmds->rsc_location(bundle_data->child, constraint); bundle_data->child->rsc_location = g_list_prepend(bundle_data->child->rsc_location, constraint); } } void pcmk__bundle_expand(pe_resource_t *rsc, pe_working_set_t * data_set) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); if (bundle_data->child) { bundle_data->child->cmds->expand(bundle_data->child, data_set); } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->remote && replica->container && pe__bundle_needs_remote_name(replica->remote, data_set)) { /* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that * run pacemaker-remoted inside, without needing a separate IP for * the container. This is done by configuring the inner remote's * connection host as the magic string "#uname", then * replacing it with the underlying host when needed. */ xmlNode *nvpair = get_xpath_object("//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']", replica->remote->xml, LOG_ERR); const char *calculated_addr = NULL; // Replace the value in replica->remote->xml (if appropriate) calculated_addr = pe__add_bundle_remote_name(replica->remote, data_set, nvpair, "value"); if (calculated_addr) { /* Since this is for the bundle as a resource, and not any * particular action, replace the value in the default * parameters (not evaluated for node). action2xml() will grab * it from there to replace it in node-evaluated parameters. */ GHashTable *params = pe_rsc_params(replica->remote, NULL, data_set); crm_trace("Set address for bundle connection %s to bundle host %s", replica->remote->id, calculated_addr); g_hash_table_replace(params, strdup(XML_RSC_ATTR_REMOTE_RA_ADDR), strdup(calculated_addr)); } else { /* The only way to get here is if the remote connection is * neither currently running nor scheduled to run. That means we * won't be doing any operations that require addr (only start * requires it; we additionally use it to compare digests when * unpacking status, promote, and migrate_from history, but * that's already happened by this point). */ crm_info("Unable to determine address for bundle %s remote connection", rsc->id); } } if (replica->ip) { replica->ip->cmds->expand(replica->ip, data_set); } if (replica->container) { replica->container->cmds->expand(replica->container, data_set); } if (replica->remote) { replica->remote->cmds->expand(replica->remote, data_set); } } } gboolean pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t * data_set) { bool any_created = FALSE; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return FALSE); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->ip) { any_created |= replica->ip->cmds->create_probe(replica->ip, node, complete, force, data_set); } if (replica->child && (node->details == replica->node->details)) { any_created |= replica->child->cmds->create_probe(replica->child, node, complete, force, data_set); } if (replica->container) { bool created = replica->container->cmds->create_probe(replica->container, node, complete, force, data_set); if(created) { any_created = TRUE; /* If we're limited to one replica per host (due to * the lack of an IP range probably), then we don't * want any of our peer containers starting until * we've established that no other copies are already * running. * * Partly this is to ensure that nreplicas_per_host is * observed, but also to ensure that the containers * don't fail to start because the necessary port * mappings (which won't include an IP for uniqueness) * are already taken */ for (GList *tIter = bundle_data->replicas; tIter && (bundle_data->nreplicas_per_host == 1); tIter = tIter->next) { pe__bundle_replica_t *other = tIter->data; if ((other != replica) && (other != NULL) && (other->container != NULL)) { pcmk__new_ordering(replica->container, pcmk__op_key(replica->container->id, RSC_STATUS, 0), NULL, other->container, pcmk__op_key(other->container->id, RSC_START, 0), NULL, pe_order_optional|pe_order_same_node, data_set); } } } } if (replica->container && replica->remote && replica->remote->cmds->create_probe(replica->remote, node, complete, force, data_set)) { /* Do not probe the remote resource until we know where the * container is running. This is required for REMOTE_CONTAINER_HACK * to correctly probe remote resources. */ char *probe_uuid = pcmk__op_key(replica->remote->id, RSC_STATUS, 0); pe_action_t *probe = find_first_action(replica->remote->actions, probe_uuid, NULL, node); free(probe_uuid); if (probe) { any_created = TRUE; crm_trace("Ordering %s probe on %s", replica->remote->id, node->details->uname); pcmk__new_ordering(replica->container, pcmk__op_key(replica->container->id, RSC_START, 0), NULL, replica->remote, NULL, probe, pe_order_probe, data_set); } } } return any_created; } void pcmk__bundle_append_meta(pe_resource_t *rsc, xmlNode *xml) { } void pcmk__bundle_log_actions(pe_resource_t *rsc, pe_working_set_t *data_set) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->ip) { LogActions(replica->ip, data_set); } if (replica->container) { LogActions(replica->container, data_set); } if (replica->remote) { LogActions(replica->remote, data_set); } if (replica->child) { LogActions(replica->child, data_set); } } } diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c index 2e7da549da..7916117a47 100644 --- a/lib/pacemaker/pcmk_sched_clone.c +++ b/lib/pacemaker/pcmk_sched_clone.c @@ -1,1558 +1,1534 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include "libpacemaker_private.h" #define VARIANT_CLONE 1 #include gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set); static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all); static gint sort_rsc_id(gconstpointer a, gconstpointer b) { const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; long num1, num2; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); /* * Sort clone instances numerically by instance number, so instance :10 * comes after :9. */ num1 = strtol(strrchr(resource1->id, ':') + 1, NULL, 10); num2 = strtol(strrchr(resource2->id, ':') + 1, NULL, 10); if (num1 < num2) { return -1; } else if (num1 > num2) { return 1; } return 0; } static pe_node_t * parent_node_instance(const pe_resource_t * rsc, pe_node_t * node) { pe_node_t *ret = NULL; if (node != NULL && rsc->parent) { ret = pe_hash_table_lookup(rsc->parent->allowed_nodes, node->details->id); } else if(node != NULL) { ret = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); } return ret; } static gboolean did_fail(const pe_resource_t * rsc) { GList *gIter = rsc->children; if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if (did_fail(child_rsc)) { return TRUE; } } return FALSE; } /*! * \internal * \brief Compare instances based on colocation scores. * * Determines the relative order in which \c rsc1 and \c rsc2 should be * allocated. If one resource compares less than the other, then it * should be allocated first. * * \param[in] rsc1 The first instance to compare. * \param[in] rsc2 The second instance to compare. * \param[in] data_set Cluster working set. * * \return -1 if `rsc1 < rsc2`, * 0 if `rsc1 == rsc2`, or * 1 if `rsc1 > rsc2` */ static int order_instance_by_colocation(const pe_resource_t *rsc1, const pe_resource_t *rsc2, pe_working_set_t *data_set) { int rc = 0; pe_node_t *n = NULL; pe_node_t *node1 = NULL; pe_node_t *node2 = NULL; pe_node_t *current_node1 = pe__current_node(rsc1); pe_node_t *current_node2 = pe__current_node(rsc2); GList *list1 = NULL; GList *list2 = NULL; GHashTable *hash1 = pcmk__strkey_table(NULL, free); GHashTable *hash2 = pcmk__strkey_table(NULL, free); /* Clone instances must have parents */ CRM_ASSERT(rsc1->parent != NULL); CRM_ASSERT(rsc2->parent != NULL); n = pe__copy_node(current_node1); g_hash_table_insert(hash1, (gpointer) n->details->id, n); n = pe__copy_node(current_node2); g_hash_table_insert(hash2, (gpointer) n->details->id, n); /* Apply rsc1's parental colocations */ for (GList *gIter = rsc1->parent->rsc_cons; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; crm_trace("Applying %s to %s", constraint->id, rsc1->id); hash1 = pcmk__native_merge_weights(constraint->primary, rsc1->id, hash1, constraint->node_attribute, constraint->score / (float) INFINITY, 0); } for (GList *gIter = rsc1->parent->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(constraint, rsc1)) { continue; } crm_trace("Applying %s to %s", constraint->id, rsc1->id); hash1 = pcmk__native_merge_weights(constraint->dependent, rsc1->id, hash1, constraint->node_attribute, constraint->score / (float) INFINITY, pe_weights_positive); } /* Apply rsc2's parental colocations */ for (GList *gIter = rsc2->parent->rsc_cons; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; crm_trace("Applying %s to %s", constraint->id, rsc2->id); hash2 = pcmk__native_merge_weights(constraint->primary, rsc2->id, hash2, constraint->node_attribute, constraint->score / (float) INFINITY, 0); } for (GList *gIter = rsc2->parent->rsc_cons_lhs; gIter; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(constraint, rsc2)) { continue; } crm_trace("Applying %s to %s", constraint->id, rsc2->id); hash2 = pcmk__native_merge_weights(constraint->dependent, rsc2->id, hash2, constraint->node_attribute, constraint->score / (float) INFINITY, pe_weights_positive); } /* Current location score */ node1 = g_hash_table_lookup(hash1, current_node1->details->id); node2 = g_hash_table_lookup(hash2, current_node2->details->id); if (node1->weight < node2->weight) { if (node1->weight < 0) { crm_trace("%s > %s: current score: %d %d", rsc1->id, rsc2->id, node1->weight, node2->weight); rc = -1; goto out; } else { crm_trace("%s < %s: current score: %d %d", rsc1->id, rsc2->id, node1->weight, node2->weight); rc = 1; goto out; } } else if (node1->weight > node2->weight) { crm_trace("%s > %s: current score: %d %d", rsc1->id, rsc2->id, node1->weight, node2->weight); rc = -1; goto out; } /* All location scores */ list1 = g_hash_table_get_values(hash1); list2 = g_hash_table_get_values(hash2); list1 = sort_nodes_by_weight(list1, current_node1, data_set); list2 = sort_nodes_by_weight(list2, current_node2, data_set); for (GList *gIter1 = list1, *gIter2 = list2; (gIter1 != NULL) && (gIter2 != NULL); gIter1 = gIter1->next, gIter2 = gIter2->next) { node1 = (pe_node_t *) gIter1->data; node2 = (pe_node_t *) gIter2->data; if (node1 == NULL) { crm_trace("%s < %s: colocated score NULL", rsc1->id, rsc2->id); rc = 1; break; } else if (node2 == NULL) { crm_trace("%s > %s: colocated score NULL", rsc1->id, rsc2->id); rc = -1; break; } if (node1->weight < node2->weight) { crm_trace("%s < %s: colocated score", rsc1->id, rsc2->id); rc = 1; break; } else if (node1->weight > node2->weight) { crm_trace("%s > %s: colocated score", rsc1->id, rsc2->id); rc = -1; break; } } out: g_hash_table_destroy(hash1); g_hash_table_destroy(hash2); g_list_free(list1); g_list_free(list2); return rc; } gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) { int rc = 0; pe_node_t *node1 = NULL; pe_node_t *node2 = NULL; pe_node_t *current_node1 = NULL; pe_node_t *current_node2 = NULL; unsigned int nnodes1 = 0; unsigned int nnodes2 = 0; gboolean can1 = TRUE; gboolean can2 = TRUE; const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); /* allocation order: * - active instances * - instances running on nodes with the least copies * - active instances on nodes that can't support them or are to be fenced * - failed instances * - inactive instances */ current_node1 = pe__find_active_on(resource1, &nnodes1, NULL); current_node2 = pe__find_active_on(resource2, &nnodes2, NULL); /* If both instances are running and at least one is multiply * active, give precedence to the one that's running on fewer nodes. */ if ((nnodes1 > 0) && (nnodes2 > 0)) { if (nnodes1 < nnodes2) { crm_trace("%s < %s: running_on", resource1->id, resource2->id); return -1; } else if (nnodes1 > nnodes2) { crm_trace("%s > %s: running_on", resource1->id, resource2->id); return 1; } } /* Instance whose current location is available sorts first */ node1 = current_node1; node2 = current_node2; if (node1 != NULL) { pe_node_t *match = pe_hash_table_lookup(resource1->allowed_nodes, node1->details->id); if (match == NULL || match->weight < 0) { crm_trace("%s: current location is unavailable", resource1->id); node1 = NULL; can1 = FALSE; } } if (node2 != NULL) { pe_node_t *match = pe_hash_table_lookup(resource2->allowed_nodes, node2->details->id); if (match == NULL || match->weight < 0) { crm_trace("%s: current location is unavailable", resource2->id); node2 = NULL; can2 = FALSE; } } if (can1 && !can2) { crm_trace("%s < %s: availability of current location", resource1->id, resource2->id); return -1; } else if (!can1 && can2) { crm_trace("%s > %s: availability of current location", resource1->id, resource2->id); return 1; } /* Higher-priority instance sorts first */ if (resource1->priority > resource2->priority) { crm_trace("%s < %s: priority", resource1->id, resource2->id); return -1; } else if (resource1->priority < resource2->priority) { crm_trace("%s > %s: priority", resource1->id, resource2->id); return 1; } /* Active instance sorts first */ if (node1 == NULL && node2 == NULL) { crm_trace("%s == %s: not active", resource1->id, resource2->id); return 0; } else if (node1 == NULL) { crm_trace("%s > %s: active", resource1->id, resource2->id); return 1; } else if (node2 == NULL) { crm_trace("%s < %s: active", resource1->id, resource2->id); return -1; } /* Instance whose current node can run resources sorts first */ can1 = can_run_resources(node1); can2 = can_run_resources(node2); if (can1 && !can2) { crm_trace("%s < %s: can", resource1->id, resource2->id); return -1; } else if (!can1 && can2) { crm_trace("%s > %s: can", resource1->id, resource2->id); return 1; } /* Is the parent allowed to run on the instance's current node? * Instance with parent allowed sorts first. */ node1 = parent_node_instance(resource1, node1); node2 = parent_node_instance(resource2, node2); if (node1 == NULL && node2 == NULL) { crm_trace("%s == %s: not allowed", resource1->id, resource2->id); return 0; } else if (node1 == NULL) { crm_trace("%s > %s: not allowed", resource1->id, resource2->id); return 1; } else if (node2 == NULL) { crm_trace("%s < %s: not allowed", resource1->id, resource2->id); return -1; } /* Does one node have more instances allocated? * Instance whose current node has fewer instances sorts first. */ if (node1->count < node2->count) { crm_trace("%s < %s: count", resource1->id, resource2->id); return -1; } else if (node1->count > node2->count) { crm_trace("%s > %s: count", resource1->id, resource2->id); return 1; } /* Failed instance sorts first */ can1 = did_fail(resource1); can2 = did_fail(resource2); if (can1 && !can2) { crm_trace("%s > %s: failed", resource1->id, resource2->id); return 1; } else if (!can1 && can2) { crm_trace("%s < %s: failed", resource1->id, resource2->id); return -1; } rc = order_instance_by_colocation(resource1, resource2, data_set); if (rc != 0) { return rc; } /* Default to lexicographic order by ID */ rc = strcmp(resource1->id, resource2->id); crm_trace("%s %c %s: default", resource1->id, rc < 0 ? '<' : '>', resource2->id); return rc; } static pe_node_t * can_run_instance(pe_resource_t * rsc, pe_node_t * node, int limit) { pe_node_t *local_node = NULL; if (node == NULL && rsc->allowed_nodes) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) { can_run_instance(rsc, local_node, limit); } return NULL; } if (!node) { /* make clang analyzer happy */ goto bail; } else if (can_run_resources(node) == FALSE) { goto bail; } else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { goto bail; } local_node = parent_node_instance(rsc, node); if (local_node == NULL) { crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname); goto bail; } else if (local_node->weight < 0) { common_update_score(rsc, node->details->id, local_node->weight); pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.", rsc->id, node->details->uname); } else if (local_node->count < limit) { pe_rsc_trace(rsc, "%s can run on %s (already running %d)", rsc->id, node->details->uname, local_node->count); return local_node; } else { pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)", rsc->id, node->details->uname, local_node->count, limit); } bail: if (node) { common_update_score(rsc, node->details->id, -INFINITY); } return NULL; } static pe_node_t * allocate_instance(pe_resource_t *rsc, pe_node_t *prefer, gboolean all_coloc, int limit, pe_working_set_t *data_set) { pe_node_t *chosen = NULL; GHashTable *backup = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)", rsc->id, (prefer? prefer->details->uname: "none"), (all_coloc? "all" : "some")); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return rsc->fns->location(rsc, NULL, FALSE); } else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } /* Only include positive colocation preferences of dependent resources * if not every node will get a copy of the clone */ append_parent_colocation(rsc->parent, rsc, all_coloc); if (prefer) { pe_node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (local_prefer == NULL || local_prefer->weight < 0) { pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id, prefer->details->uname); return NULL; } } can_run_instance(rsc, NULL, limit); backup = pcmk__copy_node_table(rsc->allowed_nodes); pe_rsc_trace(rsc, "Allocating instance %s", rsc->id); chosen = rsc->cmds->allocate(rsc, prefer, data_set); if (chosen && prefer && (chosen->details != prefer->details)) { crm_info("Not pre-allocating %s to %s because %s is better", rsc->id, prefer->details->uname, chosen->details->uname); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = backup; pcmk__unassign_resource(rsc); chosen = NULL; backup = NULL; } if (chosen) { pe_node_t *local_node = parent_node_instance(rsc, chosen); if (local_node) { local_node->count++; } else if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { /* what to do? we can't enforce per-node limits in this case */ pcmk__config_err("%s not found in %s (list of %d)", chosen->details->id, rsc->parent->id, g_hash_table_size(rsc->parent->allowed_nodes)); } } if(backup) { g_hash_table_destroy(backup); } return chosen; } static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all) { GList *gIter = NULL; gIter = rsc->rsc_cons; for (; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; if (all || cons->score < 0 || cons->score == INFINITY) { child->rsc_cons = g_list_prepend(child->rsc_cons, cons); } } gIter = rsc->rsc_cons_lhs; for (; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(cons, child)) { continue; } if (all || cons->score < 0) { child->rsc_cons_lhs = g_list_prepend(child->rsc_cons_lhs, cons); } } } void distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, int max, int per_host_max, pe_working_set_t * data_set); void distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, int max, int per_host_max, pe_working_set_t * data_set) { int loop_max = 0; int allocated = 0; int available_nodes = 0; bool all_coloc = false; /* count now tracks the number of clones currently allocated */ for(GList *nIter = nodes; nIter != NULL; nIter = nIter->next) { pe_node_t *node = nIter->data; node->count = 0; if (can_run_resources(node)) { available_nodes++; } } all_coloc = (max < available_nodes) ? true : false; if(available_nodes) { loop_max = max / available_nodes; } if (loop_max < 1) { loop_max = 1; } pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)", max, rsc->id, available_nodes, per_host_max, loop_max); /* Pre-allocate as many instances as we can to their current location */ for (GList *gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; pe_node_t *child_node = NULL; pe_node_t *local_node = NULL; if ((child->running_on == NULL) || !pcmk_is_set(child->flags, pe_rsc_provisional) || pcmk_is_set(child->flags, pe_rsc_failed)) { continue; } child_node = pe__current_node(child); local_node = parent_node_instance(child, child_node); pe_rsc_trace(rsc, "Checking pre-allocation of %s to %s (%d remaining of %d)", child->id, child_node->details->uname, max - allocated, max); if (!can_run_resources(child_node) || (child_node->weight < 0)) { pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s", child_node->details->uname, child->id); continue; } if ((local_node != NULL) && (local_node->count >= loop_max)) { pe_rsc_trace(rsc, "Not pre-allocating because %s already allocated " "optimal instances", child_node->details->uname); continue; } if (allocate_instance(child, child_node, all_coloc, per_host_max, data_set)) { pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id, child_node->details->uname); allocated++; } } pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max); for (GList *gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (child->running_on != NULL) { pe_node_t *child_node = pe__current_node(child); pe_node_t *local_node = parent_node_instance(child, child_node); if (local_node == NULL) { crm_err("%s is running on %s which isn't allowed", child->id, child_node->details->uname); } } if (!pcmk_is_set(child->flags, pe_rsc_provisional)) { } else if (allocated >= max) { pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max); resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set); } else { if (allocate_instance(child, NULL, all_coloc, per_host_max, data_set)) { allocated++; } } } pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d", allocated, rsc->id, max); } pe_node_t * pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *prefer, pe_working_set_t *data_set) { GList *nodes = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return NULL; } else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__add_promotion_scores(rsc); } pe__set_resource_flags(rsc, pe_rsc_allocating); /* this information is used by sort_clone_instance() when deciding in which * order to allocate clone instances */ for (GList *gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; pe_rsc_trace(rsc, "%s: Allocating %s first", rsc->id, constraint->primary->id); constraint->primary->cmds->allocate(constraint->primary, prefer, data_set); } for (GList *gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (!pcmk__colocation_has_influence(constraint, NULL)) { continue; } rsc->allowed_nodes = constraint->dependent->cmds->merge_weights( constraint->dependent, rsc->id, rsc->allowed_nodes, constraint->node_attribute, (float)constraint->score / INFINITY, (pe_weights_rollback | pe_weights_positive)); } pe__show_node_weights(!pcmk_is_set(data_set->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, data_set); nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = sort_nodes_by_weight(nodes, NULL, data_set); rsc->children = g_list_sort_with_data(rsc->children, sort_clone_instance, data_set); distribute_children(rsc, rsc->children, nodes, clone_data->clone_max, clone_data->clone_node_max, data_set); g_list_free(nodes); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__set_instance_roles(rsc, data_set); } pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating); pe_rsc_trace(rsc, "Done allocating %s", rsc->id); return NULL; } static void clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting, gboolean * active) { GList *gIter = NULL; if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; clone_update_pseudo_status(child, stopping, starting, active); } return; } CRM_ASSERT(active != NULL); CRM_ASSERT(starting != NULL); CRM_ASSERT(stopping != NULL); if (rsc->running_on) { *active = TRUE; } gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (*starting && *stopping) { return; } else if (pcmk_is_set(action->flags, pe_action_optional)) { pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid); continue; } else if (!pcmk_any_flags_set(action->flags, pe_action_pseudo|pe_action_runnable)) { pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid); continue; } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) { pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid); *stopping = TRUE; } else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) { if (!pcmk_is_set(action->flags, pe_action_runnable)) { pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d", action->uuid, pcmk_is_set(action->flags, pe_action_runnable), pcmk_is_set(action->flags, pe_action_pseudo)); } else { pe_rsc_trace(rsc, "Starting due to: %s", action->uuid); pe_rsc_trace(rsc, "%s run=%d, pseudo=%d", action->uuid, pcmk_is_set(action->flags, pe_action_runnable), pcmk_is_set(action->flags, pe_action_pseudo)); *starting = TRUE; } } } } static pe_action_t * find_rsc_action(pe_resource_t *rsc, const char *task) { pe_action_t *match = NULL; GList *actions = pe__resource_actions(rsc, NULL, task, FALSE); for (GList *item = actions; item != NULL; item = item->next) { pe_action_t *op = (pe_action_t *) item->data; if (!pcmk_is_set(op->flags, pe_action_optional)) { if (match != NULL) { // More than one match, don't return any match = NULL; break; } match = op; } } g_list_free(actions); return match; } static void child_ordering_constraints(pe_resource_t * rsc, pe_working_set_t * data_set) { pe_action_t *stop = NULL; pe_action_t *start = NULL; pe_action_t *last_stop = NULL; pe_action_t *last_start = NULL; GList *gIter = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (clone_data->ordered == FALSE) { return; } /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, sort_rsc_id); for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; stop = find_rsc_action(child, RSC_STOP); if (stop) { if (last_stop) { /* child/child relative stop */ order_actions(stop, last_stop, pe_order_optional); } last_stop = stop; } start = find_rsc_action(child, RSC_START); if (start) { if (last_start) { /* child/child relative start */ order_actions(last_start, start, pe_order_optional); } last_start = start; } } } void clone_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); clone_create_pseudo_actions(rsc, rsc->children, &clone_data->start_notify, &clone_data->stop_notify,data_set); child_ordering_constraints(rsc, data_set); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { create_promotable_actions(rsc, data_set); } } void clone_create_pseudo_actions( pe_resource_t * rsc, GList *children, notify_data_t **start_notify, notify_data_t **stop_notify, pe_working_set_t * data_set) { gboolean child_active = FALSE; gboolean child_starting = FALSE; gboolean child_stopping = FALSE; gboolean allow_dependent_migrations = TRUE; pe_action_t *stop = NULL; pe_action_t *stopped = NULL; pe_action_t *start = NULL; pe_action_t *started = NULL; pe_rsc_trace(rsc, "Creating actions for %s", rsc->id); for (GList *gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; gboolean starting = FALSE; gboolean stopping = FALSE; child_rsc->cmds->create_actions(child_rsc, data_set); clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active); if (stopping && starting) { allow_dependent_migrations = FALSE; } child_stopping |= stopping; child_starting |= starting; } /* start */ start = create_pseudo_resource_op(rsc, RSC_START, !child_starting, TRUE, data_set); started = create_pseudo_resource_op(rsc, RSC_STARTED, !child_starting, FALSE, data_set); started->priority = INFINITY; if (child_active || child_starting) { pe__set_action_flags(started, pe_action_runnable); } if (start_notify != NULL && *start_notify == NULL) { *start_notify = create_notification_boundaries(rsc, RSC_START, start, started, data_set); } /* stop */ stop = create_pseudo_resource_op(rsc, RSC_STOP, !child_stopping, TRUE, data_set); stopped = create_pseudo_resource_op(rsc, RSC_STOPPED, !child_stopping, TRUE, data_set); stopped->priority = INFINITY; if (allow_dependent_migrations) { pe__set_action_flags(stop, pe_action_migrate_runnable); } if (stop_notify != NULL && *stop_notify == NULL) { *stop_notify = create_notification_boundaries(rsc, RSC_STOP, stop, stopped, data_set); if (start_notify && *start_notify && *stop_notify) { order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional); } } } void clone_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set) { pe_resource_t *last_rsc = NULL; GList *gIter; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id); pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set); pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP, pe_order_optional, data_set); pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE, pe_order_runnable_left, data_set); } if (clone_data->ordered) { /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, sort_rsc_id); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->internal_constraints(child_rsc, data_set); pcmk__order_starts(rsc, child_rsc, pe_order_runnable_left|pe_order_implies_first_printed, data_set); pcmk__order_resource_actions(child_rsc, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); if (clone_data->ordered && last_rsc) { pcmk__order_starts(last_rsc, child_rsc, pe_order_optional, data_set); } pcmk__order_stops(rsc, child_rsc, pe_order_implies_first_printed, data_set); pcmk__order_resource_actions(child_rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); if (clone_data->ordered && last_rsc) { pcmk__order_stops(child_rsc, last_rsc, pe_order_optional, data_set); } last_rsc = child_rsc; } if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { promotable_constraints(rsc, data_set); } } -bool -assign_node(pe_resource_t * rsc, pe_node_t * node, gboolean force) -{ - bool changed = FALSE; - - if (rsc->children) { - - for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { - pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; - - changed |= assign_node(child_rsc, node, force); - } - - return changed; - } - - if (rsc->allocated_to != NULL) { - changed = true; - } - - pcmk__assign_primitive(rsc, node, force); - return changed; -} - gboolean is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current) { pe_node_t *node = NULL; enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); CRM_CHECK(child_rsc && local_node, return FALSE); if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { /* We only want instances that haven't failed */ node = child_rsc->fns->location(child_rsc, NULL, current); } if (filter != RSC_ROLE_UNKNOWN && next_role != filter) { crm_trace("Filtered %s", child_rsc->id); return FALSE; } if (node && (node->details == local_node->details)) { return TRUE; } else if (node) { crm_trace("%s - %s vs %s", child_rsc->id, node->details->uname, local_node->details->uname); } else { crm_trace("%s - not allocated %d", child_rsc->id, current); } return FALSE; } pe_resource_t * find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set) { pe_resource_t *pair = NULL; GList *gIter = NULL; GList *scratch = NULL; pe_node_t *local_node = NULL; local_node = local_child->fns->location(local_child, NULL, current); if (local_node) { return find_compatible_child_by_node(local_child, local_node, rsc, filter, current); } scratch = g_hash_table_get_values(local_child->allowed_nodes); scratch = sort_nodes_by_weight(scratch, NULL, data_set); gIter = scratch; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pair = find_compatible_child_by_node(local_child, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id); done: g_list_free(scratch); return pair; } void clone_rsc_colocation_lh(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, pe_working_set_t *data_set) { /* -- Never called -- * * Instead we add the colocation constraints to the child and call from there */ CRM_ASSERT(FALSE); } void clone_rsc_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, pe_working_set_t *data_set) { GList *gIter = NULL; gboolean do_interleave = FALSE; const char *interleave_s = NULL; CRM_CHECK(constraint != NULL, return); CRM_CHECK(dependent != NULL, pe_err("dependent was NULL for %s", constraint->id); return); CRM_CHECK(primary != NULL, pe_err("primary was NULL for %s", constraint->id); return); CRM_CHECK(dependent->variant == pe_native, return); pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d", constraint->id, dependent->id, primary->id, constraint->score); if (pcmk_is_set(primary->flags, pe_rsc_promotable)) { if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { pe_rsc_trace(primary, "%s is still provisional", primary->id); return; } else if (constraint->primary_role == RSC_ROLE_UNKNOWN) { pe_rsc_trace(primary, "Handling %s as a clone colocation", constraint->id); } else { promotable_colocation_rh(dependent, primary, constraint, data_set); return; } } /* only the LHS side needs to be labeled as interleave */ interleave_s = g_hash_table_lookup(constraint->dependent->meta, XML_RSC_ATTR_INTERLEAVE); if (crm_is_true(interleave_s) && (constraint->dependent->variant > pe_group)) { /* @TODO Do we actually care about multiple primary copies sharing a * dependent copy anymore? */ if (copies_per_node(constraint->dependent) != copies_per_node(constraint->primary)) { pcmk__config_err("Cannot interleave %s and %s because they do not " "support the same number of instances per node", constraint->dependent->id, constraint->primary->id); } else { do_interleave = TRUE; } } if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { pe_rsc_trace(primary, "%s is still provisional", primary->id); return; } else if (do_interleave) { pe_resource_t *primary_instance = NULL; primary_instance = find_compatible_child(dependent, primary, RSC_ROLE_UNKNOWN, FALSE, data_set); if (primary_instance != NULL) { pe_rsc_debug(primary, "Pairing %s with %s", dependent->id, primary_instance->id); dependent->cmds->rsc_colocation_lh(dependent, primary_instance, constraint, data_set); } else if (constraint->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", dependent->id, primary->id); - assign_node(dependent, NULL, TRUE); + pcmk__assign_resource(dependent, NULL, true); } else { pe_rsc_debug(primary, "Cannot pair %s with instance of %s", dependent->id, primary->id); } return; } else if (constraint->score >= INFINITY) { GList *affected_nodes = NULL; gIter = primary->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { pe_rsc_trace(primary, "Allowing %s: %s %d", constraint->id, chosen->details->uname, chosen->weight); affected_nodes = g_list_prepend(affected_nodes, chosen); } } node_list_exclude(dependent->allowed_nodes, affected_nodes, FALSE); g_list_free(affected_nodes); return; } gIter = primary->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_colocation_rh(dependent, child_rsc, constraint, data_set); } } enum action_tasks clone_child_action(pe_action_t * action) { enum action_tasks result = no_action; pe_resource_t *child = (pe_resource_t *) action->rsc->children->data; if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) { /* Find the action we're notifying about instead */ int stop = 0; char *key = action->uuid; int lpc = strlen(key); for (; lpc > 0; lpc--) { if (key[lpc] == '_' && stop == 0) { stop = lpc; } else if (key[lpc] == '_') { char *task_mutable = NULL; lpc++; task_mutable = strdup(key + lpc); task_mutable[stop - lpc] = 0; crm_trace("Extracted action '%s' from '%s'", task_mutable, key); result = get_complex_task(child, task_mutable, TRUE); free(task_mutable); break; } } } else { result = get_complex_task(child, action->task, TRUE); } return result; } #define pe__clear_action_summary_flags(flags, action, flag) do { \ flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Action summary", action->rsc->id, \ flags, flag, #flag); \ } while (0) enum pe_action_flags summary_action_flags(pe_action_t * action, GList *children, pe_node_t * node) { GList *gIter = NULL; gboolean any_runnable = FALSE; gboolean check_runnable = TRUE; enum action_tasks task = clone_child_action(action); enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); const char *task_s = task2text(task); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_action_t *child_action = NULL; pe_resource_t *child = (pe_resource_t *) gIter->data; child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node); pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id, node ? node->details->uname : "none", child_action?child_action->uuid:"NA"); if (child_action) { enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); if (pcmk_is_set(flags, pe_action_optional) && !pcmk_is_set(child_flags, pe_action_optional)) { pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid, child_action->uuid); pe__clear_action_summary_flags(flags, action, pe_action_optional); pe__clear_action_flags(action, pe_action_optional); } if (pcmk_is_set(child_flags, pe_action_runnable)) { any_runnable = TRUE; } } } if (check_runnable && any_runnable == FALSE) { pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid); pe__clear_action_summary_flags(flags, action, pe_action_runnable); if (node == NULL) { pe__clear_action_flags(action, pe_action_runnable); } } return flags; } enum pe_action_flags clone_action_flags(pe_action_t * action, pe_node_t * node) { return summary_action_flags(action, action->rsc->children, node); } void clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { GList *gIter = rsc->children; pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id); pcmk__apply_location(constraint, rsc); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_location(child_rsc, constraint); } } void clone_expand(pe_resource_t * rsc, pe_working_set_t * data_set) { GList *gIter = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); g_list_foreach(rsc->actions, (GFunc) rsc->cmds->action_flags, NULL); if (clone_data->start_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->start_notify); pcmk__create_notification_keys(rsc, clone_data->start_notify, data_set); create_notifications(rsc, clone_data->start_notify, data_set); } if (clone_data->stop_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->stop_notify); pcmk__create_notification_keys(rsc, clone_data->stop_notify, data_set); create_notifications(rsc, clone_data->stop_notify, data_set); } if (clone_data->promote_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->promote_notify); pcmk__create_notification_keys(rsc, clone_data->promote_notify, data_set); create_notifications(rsc, clone_data->promote_notify, data_set); } if (clone_data->demote_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->demote_notify); pcmk__create_notification_keys(rsc, clone_data->demote_notify, data_set); create_notifications(rsc, clone_data->demote_notify, data_set); } /* Now that the notifcations have been created we can expand the children */ gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } native_expand(rsc, data_set); /* The notifications are in the graph now, we can destroy the notify_data */ free_notification_data(clone_data->demote_notify); clone_data->demote_notify = NULL; free_notification_data(clone_data->stop_notify); clone_data->stop_notify = NULL; free_notification_data(clone_data->start_notify); clone_data->start_notify = NULL; free_notification_data(clone_data->promote_notify); clone_data->promote_notify = NULL; } // Check whether a resource or any of its children is known on node static bool rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node) { if (rsc->children) { for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; if (rsc_known_on(child, node)) { return TRUE; } } } else if (rsc->known_on) { GHashTableIter iter; pe_node_t *known_node = NULL; g_hash_table_iter_init(&iter, rsc->known_on); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) { if (node->details == known_node->details) { return TRUE; } } } return FALSE; } // Look for an instance of clone that is known on node static pe_resource_t * find_instance_on(const pe_resource_t *clone, const pe_node_t *node) { for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (rsc_known_on(child, node)) { return child; } } return NULL; } // For unique clones, probe each instance separately static gboolean probe_unique_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set) { gboolean any_created = FALSE; for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; any_created |= child->cmds->create_probe(child, node, complete, force, data_set); } return any_created; } // For anonymous clones, only a single instance needs to be probed static gboolean probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set) { // First, check if we probed an instance on this node last time pe_resource_t *child = find_instance_on(rsc, node); // Otherwise, check if we plan to start an instance on this node if (child == NULL) { for (GList *child_iter = rsc->children; child_iter && !child; child_iter = child_iter->next) { pe_node_t *local_node = NULL; pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data; if (child_rsc) { /* make clang analyzer happy */ local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); if (local_node && (local_node->details == node->details)) { child = child_rsc; } } } } // Otherwise, use the first clone instance if (child == NULL) { child = rsc->children->data; } CRM_ASSERT(child); return child->cmds->create_probe(child, node, complete, force, data_set); } gboolean clone_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete, gboolean force, pe_working_set_t * data_set) { gboolean any_created = FALSE; CRM_ASSERT(rsc); rsc->children = g_list_sort(rsc->children, sort_rsc_id); if (rsc->children == NULL) { pe_warn("Clone %s has no children", rsc->id); return FALSE; } if (rsc->exclusive_discover) { pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on * * remove the node from allowed_nodes so that the * notification contains only nodes that we might ever run * on */ g_hash_table_remove(rsc->allowed_nodes, node->details->id); /* Bit of a shortcut - might as well take it */ return FALSE; } } if (pcmk_is_set(rsc->flags, pe_rsc_unique)) { any_created = probe_unique_clone(rsc, node, complete, force, data_set); } else { any_created = probe_anonymous_clone(rsc, node, complete, force, data_set); } return any_created; } void clone_append_meta(pe_resource_t * rsc, xmlNode * xml) { char *name = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); name = crm_meta_name(XML_RSC_ATTR_UNIQUE); crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_unique)); free(name); name = crm_meta_name(XML_RSC_ATTR_NOTIFY); crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_notify)); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX); crm_xml_add_int(xml, name, clone_data->clone_max); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX); crm_xml_add_int(xml, name, clone_data->clone_node_max); free(name); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX); crm_xml_add_int(xml, name, clone_data->promoted_max); free(name); name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX); crm_xml_add_int(xml, name, clone_data->promoted_node_max); free(name); /* @COMPAT Maintain backward compatibility with resource agents that * expect the old names (deprecated since 2.0.0). */ name = crm_meta_name(PCMK_XE_PROMOTED_MAX_LEGACY); crm_xml_add_int(xml, name, clone_data->promoted_max); free(name); name = crm_meta_name(PCMK_XE_PROMOTED_NODE_MAX_LEGACY); crm_xml_add_int(xml, name, clone_data->promoted_node_max); free(name); } } diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c index 42b142af39..b9c3ebb91d 100644 --- a/lib/pacemaker/pcmk_sched_resource.c +++ b/lib/pacemaker/pcmk_sched_resource.c @@ -1,209 +1,250 @@ /* * Copyright 2014-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include "libpacemaker_private.h" // Shared implementation of resource_alloc_functions_t:colocated_resources() GList * pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *colocated_rscs) { GList *gIter = NULL; if (orig_rsc == NULL) { orig_rsc = rsc; } if ((rsc == NULL) || (g_list_find(colocated_rscs, rsc) != NULL)) { return colocated_rscs; } pe_rsc_trace(orig_rsc, "%s is in colocation chain with %s", rsc->id, orig_rsc->id); colocated_rscs = g_list_append(colocated_rscs, rsc); // Follow colocations where this resource is the dependent resource for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; pe_resource_t *primary = constraint->primary; if (primary == orig_rsc) { continue; // Break colocation loop } if ((constraint->score == INFINITY) && (pcmk__colocation_affects(rsc, primary, constraint, true) == pcmk__coloc_affects_location)) { colocated_rscs = primary->cmds->colocated_resources(primary, orig_rsc, colocated_rscs); } } // Follow colocations where this resource is the primary resource for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; pe_resource_t *dependent = constraint->dependent; if (dependent == orig_rsc) { continue; // Break colocation loop } if (pe_rsc_is_clone(rsc) && !pe_rsc_is_clone(dependent)) { continue; // We can't be sure whether dependent will be colocated } if ((constraint->score == INFINITY) && (pcmk__colocation_affects(dependent, rsc, constraint, true) == pcmk__coloc_affects_location)) { colocated_rscs = dependent->cmds->colocated_resources(dependent, orig_rsc, colocated_rscs); } } return colocated_rscs; } /*! * \internal * \brief Assign a specified primitive resource to a node * * Assign a specified primitive resource to a specified node, if the node can * run the resource (or unconditionally, if \p force is true). Mark the resource * as no longer provisional. If the primitive can't be assigned (or \p chosen is * NULL), unassign any previous assignment for it, set its next role to stopped, * and update any existing actions scheduled for it. This is not done * recursively for children, so it should be called only for primitives. * * \param[in] rsc Resource to assign * \param[in] chosen Node to assign \p rsc to * \param[in] force If true, assign to \p chosen even if unavailable * * \return true if \p rsc could be assigned, otherwise false * * \note Assigning a resource to the NULL node using this function is different * from calling pcmk__unassign_resource(), in that it will also update any * actions created for the resource. */ bool pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force) { pcmk__output_t *out = rsc->cluster->priv; CRM_ASSERT(rsc->variant == pe_native); if (!force && (chosen != NULL)) { if ((chosen->weight < 0) // Allow the graph to assume that guest node connections will come up || (!can_run_resources(chosen) && !pe__is_guest_node(chosen))) { crm_debug("All nodes for resource %s are unavailable, unclean or " "shutting down (%s can%s run resources, with weight %d)", rsc->id, chosen->details->uname, (can_run_resources(chosen)? "" : "not"), chosen->weight); pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability"); chosen = NULL; } } pcmk__unassign_resource(rsc); pe__clear_resource_flags(rsc, pe_rsc_provisional); if (chosen == NULL) { crm_debug("Could not allocate a node for %s", rsc->id); pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to allocate"); for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) { pe_action_t *op = (pe_action_t *) iter->data; crm_debug("Updating %s for allocation failure", op->uuid); if (pcmk__str_eq(op->task, RSC_STOP, pcmk__str_casei)) { pe__clear_action_flags(op, pe_action_optional); } else if (pcmk__str_eq(op->task, RSC_START, pcmk__str_casei)) { pe__clear_action_flags(op, pe_action_runnable); //pe__set_resource_flags(rsc, pe_rsc_block); } else { // Cancel recurring actions, unless for stopped state const char *interval_ms_s = NULL; const char *target_rc_s = NULL; char *rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING); interval_ms_s = g_hash_table_lookup(op->meta, XML_LRM_ATTR_INTERVAL_MS); target_rc_s = g_hash_table_lookup(op->meta, XML_ATTR_TE_TARGET_RC); if ((interval_ms_s != NULL) && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_none) && !pcmk__str_eq(rc_stopped, target_rc_s, pcmk__str_none)) { pe__clear_action_flags(op, pe_action_runnable); } free(rc_stopped); } } return false; } crm_debug("Assigning %s to %s", rsc->id, chosen->details->uname); rsc->allocated_to = pe__copy_node(chosen); chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc, rsc); chosen->details->num_resources++; chosen->count++; calculate_utilization(chosen->details->utilization, rsc->utilization, FALSE); if (pcmk_is_set(rsc->cluster->flags, pe_flag_show_utilization)) { out->message(out, "resource-util", rsc, chosen, __func__); } return true; } +/*! + * \internal + * \brief Assign a specified resource (of any variant) to a node + * + * Assign a specified resource and its children (if any) to a specified node, if + * the node can run the resource (or unconditionally, if \p force is true). Mark + * the resources as no longer provisional. If the resources can't be assigned + * (or \p chosen is NULL), unassign any previous assignments, set next role to + * stopped, and update any existing actions scheduled for them. + * + * \param[in] rsc Resource to assign + * \param[in] chosen Node to assign \p rsc to + * \param[in] force If true, assign to \p chosen even if unavailable + * + * \return true if \p rsc could be assigned, otherwise false + * + * \note Assigning a resource to the NULL node using this function is different + * from calling pcmk__unassign_resource(), in that it will also update any + * actions created for the resource. + */ +bool +pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force) +{ + bool changed = false; + + if (rsc->children == NULL) { + if (rsc->allocated_to != NULL) { + changed = true; + } + pcmk__assign_primitive(rsc, node, force); + + } else { + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) iter->data; + + changed |= pcmk__assign_resource(child_rsc, node, force); + } + } + return changed; +} + /*! * \internal * \brief Remove any assignment of a specified resource to a node * * If a specified resource has been assigned to a node, remove that assignment * and mark the resource as provisional again. This is not done recursively for * children, so it should be called only for primitives. * * \param[in] rsc Resource to unassign */ void pcmk__unassign_resource(pe_resource_t *rsc) { pe_node_t *old = rsc->allocated_to; if (old == NULL) { return; } crm_info("Unassigning %s from %s", rsc->id, old->details->uname); pe__set_resource_flags(rsc, pe_rsc_provisional); rsc->allocated_to = NULL; /* We're going to free the pe_node_t, but its details member is shared and * will remain, so update that appropriately first. */ old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, rsc); old->details->num_resources--; calculate_utilization(old->details->utilization, rsc->utilization, TRUE); free(old); }