diff --git a/lib/pacemaker/Makefile.am b/lib/pacemaker/Makefile.am index d583284789..6f13c46185 100644 --- a/lib/pacemaker/Makefile.am +++ b/lib/pacemaker/Makefile.am @@ -1,68 +1,69 @@ # -# Copyright 2004-2022 the Pacemaker project contributors +# Copyright 2004-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/mk/common.mk AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir) noinst_HEADERS = libpacemaker_private.h ## libraries lib_LTLIBRARIES = libpacemaker.la ## SOURCES libpacemaker_la_LDFLAGS = -version-info 6:0:5 libpacemaker_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libpacemaker_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libpacemaker_la_LIBADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/common/libcrmcommon.la # -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version # Use += rather than backlashed continuation lines for parsing by bumplibs libpacemaker_la_SOURCES = libpacemaker_la_SOURCES += pcmk_acl.c libpacemaker_la_SOURCES += pcmk_cluster_queries.c libpacemaker_la_SOURCES += pcmk_fence.c libpacemaker_la_SOURCES += pcmk_graph_consumer.c libpacemaker_la_SOURCES += pcmk_graph_logging.c libpacemaker_la_SOURCES += pcmk_graph_producer.c libpacemaker_la_SOURCES += pcmk_injections.c libpacemaker_la_SOURCES += pcmk_output.c libpacemaker_la_SOURCES += pcmk_resource.c libpacemaker_la_SOURCES += pcmk_result_code.c libpacemaker_la_SOURCES += pcmk_rule.c libpacemaker_la_SOURCES += pcmk_sched_actions.c libpacemaker_la_SOURCES += pcmk_sched_bundle.c libpacemaker_la_SOURCES += pcmk_sched_clone.c libpacemaker_la_SOURCES += pcmk_sched_colocation.c libpacemaker_la_SOURCES += pcmk_sched_constraints.c libpacemaker_la_SOURCES += pcmk_sched_fencing.c libpacemaker_la_SOURCES += pcmk_sched_group.c +libpacemaker_la_SOURCES += pcmk_sched_instances.c libpacemaker_la_SOURCES += pcmk_sched_location.c libpacemaker_la_SOURCES += pcmk_sched_migration.c libpacemaker_la_SOURCES += pcmk_sched_nodes.c libpacemaker_la_SOURCES += pcmk_sched_ordering.c libpacemaker_la_SOURCES += pcmk_sched_primitive.c libpacemaker_la_SOURCES += pcmk_sched_probes.c libpacemaker_la_SOURCES += pcmk_sched_promotable.c libpacemaker_la_SOURCES += pcmk_sched_recurring.c libpacemaker_la_SOURCES += pcmk_sched_remote.c libpacemaker_la_SOURCES += pcmk_sched_resource.c libpacemaker_la_SOURCES += pcmk_sched_tickets.c libpacemaker_la_SOURCES += pcmk_sched_utilization.c libpacemaker_la_SOURCES += pcmk_scheduler.c libpacemaker_la_SOURCES += pcmk_simulate.c libpacemaker_la_SOURCES += pcmk_status.c diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h index 801cc92cf6..b47165c854 100644 --- a/lib/pacemaker/libpacemaker_private.h +++ b/lib/pacemaker/libpacemaker_private.h @@ -1,830 +1,837 @@ /* * Copyright 2021-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__LIBPACEMAKER_PRIVATE__H # define PCMK__LIBPACEMAKER_PRIVATE__H /* This header is for the sole use of libpacemaker, so that functions can be * declared with G_GNUC_INTERNAL for efficiency. */ #include // pe_action_t, pe_node_t, pe_working_set_t // Flags to modify the behavior of pcmk__add_colocated_node_scores() enum pcmk__coloc_select { // With no other flags, apply all "with this" colocations pcmk__coloc_select_default = 0, // Apply "this with" colocations instead of "with this" colocations pcmk__coloc_select_this_with = (1 << 0), // Apply only colocations with non-negative scores pcmk__coloc_select_nonnegative = (1 << 1), // Apply only colocations with at least one matching node pcmk__coloc_select_active = (1 << 2), }; // Flags the update_ordered_actions() method can return enum pcmk__updated { pcmk__updated_none = 0, // Nothing changed pcmk__updated_first = (1 << 0), // First action was updated pcmk__updated_then = (1 << 1), // Then action was updated }; #define pcmk__set_updated_flags(au_flags, action, flags_to_set) do { \ au_flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Action update", \ (action)->uuid, au_flags, \ (flags_to_set), #flags_to_set); \ } while (0) #define pcmk__clear_updated_flags(au_flags, action, flags_to_clear) do { \ au_flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Action update", \ (action)->uuid, au_flags, \ (flags_to_clear), #flags_to_clear); \ } while (0) // Resource allocation methods struct resource_alloc_functions_s { /*! * \internal * \brief Assign a resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * * \return Node that \p rsc is assigned to, if assigned entirely to one node */ pe_node_t *(*assign)(pe_resource_t *rsc, const pe_node_t *prefer); /*! * \internal * \brief Create all actions needed for a given resource * * \param[in,out] rsc Resource to create actions for */ void (*create_actions)(pe_resource_t *rsc); /*! * \internal * \brief Schedule any probes needed for a resource on a node * * \param[in,out] rsc Resource to create probe for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool (*create_probe)(pe_resource_t *rsc, pe_node_t *node); /*! * \internal * \brief Create implicit constraints needed for a resource * * \param[in,out] rsc Resource to create implicit constraints for */ void (*internal_constraints)(pe_resource_t *rsc); /*! * \internal * \brief Apply a colocation's score to node weights or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node weights (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void (*apply_coloc_score) (pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); /*! * \internal * \brief Create list of all resources in colocations with a given resource * * Given a resource, create a list of all resources involved in mandatory * colocations with it, whether directly or indirectly via chained colocations. * * \param[in] rsc Resource to add to colocated list * \param[in] orig_rsc Resource originally requested * \param[in,out] colocated_rscs Existing list * * \return List of given resource and all resources involved in colocations * * \note This function is recursive; top-level callers should pass NULL as * \p colocated_rscs and \p orig_rsc, and the desired resource as * \p rsc. The recursive calls will use other values. */ GList *(*colocated_resources)(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *colocated_rscs); /*! * \internal * \brief Apply a location constraint to a resource's allowed node scores * * \param[in,out] rsc Resource to apply constraint to * \param[in,out] location Location constraint to apply */ void (*apply_location)(pe_resource_t *rsc, pe__location_t *location); /*! * \internal * \brief Return action flags for a given resource action * * \param[in,out] action Action to get flags for * \param[in] node If not NULL, limit effects to this node * * \return Flags appropriate to \p action on \p node * \note For primitives, this will be the same as action->flags regardless * of node. For collective resources, the flags can differ due to * multiple instances possibly being involved. */ enum pe_action_flags (*action_flags)(pe_action_t *action, const pe_node_t *node); /*! * \internal * \brief Update two actions according to an ordering between them * * Given information about an ordering of two actions, update the actions' * flags (and runnable_before members if appropriate) as appropriate for the * ordering. In some cases, the ordering could be disabled as well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this * node (only used when interleaving instances) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates * (may include pe_action_optional to affect only * mandatory actions, and pe_action_runnable to * affect only runnable actions) * \param[in] type Group of enum pe_ordering flags to apply * \param[in,out] data_set Cluster working set * * \return Group of enum pcmk__updated flags indicating what was updated */ uint32_t (*update_ordered_actions)(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set); void (*output_actions)(pe_resource_t *rsc); /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void (*add_actions_to_graph)(pe_resource_t *rsc); /*! * \internal * \brief Add meta-attributes relevant to transition graph actions to XML * * If a given resource supports variant-specific meta-attributes that are * needed for transition graph actions, add them to a given XML element. * * \param[in] rsc Resource whose meta-attributes should be added * \param[in,out] xml Transition graph action attributes XML to add to */ void (*add_graph_meta)(const pe_resource_t *rsc, xmlNode *xml); /*! * \internal * \brief Add a resource's utilization to a table of utilization values * * This function is used when summing the utilization of a resource and all * resources colocated with it, to determine whether a node has sufficient * capacity. Given a resource and a table of utilization values, it will add * the resource's utilization to the existing values, if the resource has * not yet been allocated to a node. * * \param[in] rsc Resource with utilization to add * \param[in] orig_rsc Resource being allocated (for logging only) * \param[in] all_rscs List of all resources that will be summed * \param[in,out] utilization Table of utilization values to add to */ void (*add_utilization)(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization); /*! * \internal * \brief Apply a shutdown lock for a resource, if appropriate * * \param[in,out] rsc Resource to check for shutdown lock */ void (*shutdown_lock)(pe_resource_t *rsc); }; // Actions (pcmk_sched_actions.c) G_GNUC_INTERNAL void pcmk__update_action_for_orderings(pe_action_t *action, pe_working_set_t *data_set); G_GNUC_INTERNAL uint32_t pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__log_action(const char *pre_text, const pe_action_t *action, bool details); G_GNUC_INTERNAL pe_action_t *pcmk__new_cancel_action(pe_resource_t *rsc, const char *name, guint interval_ms, const pe_node_t *node); G_GNUC_INTERNAL pe_action_t *pcmk__new_shutdown_action(pe_node_t *node); G_GNUC_INTERNAL bool pcmk__action_locks_rsc_to_node(const pe_action_t *action); G_GNUC_INTERNAL void pcmk__deduplicate_action_inputs(pe_action_t *action); G_GNUC_INTERNAL void pcmk__output_actions(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node, const xmlNode *xml_op); G_GNUC_INTERNAL void pcmk__handle_rsc_config_changes(pe_working_set_t *data_set); // Recurring actions (pcmk_sched_recurring.c) G_GNUC_INTERNAL void pcmk__create_recurring_actions(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__schedule_cancel(pe_resource_t *rsc, const char *call_id, const char *task, guint interval_ms, const pe_node_t *node, const char *reason); G_GNUC_INTERNAL void pcmk__reschedule_recurring(pe_resource_t *rsc, const char *task, guint interval_ms, pe_node_t *node); G_GNUC_INTERNAL bool pcmk__action_is_recurring(const pe_action_t *action); // Producing transition graphs (pcmk_graph_producer.c) G_GNUC_INTERNAL bool pcmk__graph_has_loop(const pe_action_t *init_action, const pe_action_t *action, pe_action_wrapper_t *input); G_GNUC_INTERNAL void pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__create_graph(pe_working_set_t *data_set); // Fencing (pcmk_sched_fencing.c) G_GNUC_INTERNAL void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_vs_unfence(const pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order); G_GNUC_INTERNAL void pcmk__fence_guest(pe_node_t *node); G_GNUC_INTERNAL bool pcmk__node_unfenced(const pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data); // Injected scheduler inputs (pcmk_sched_injections.c) void pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib, const pcmk_injections_t *injections); // Constraints of any type (pcmk_sched_constraints.c) G_GNUC_INTERNAL pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id); G_GNUC_INTERNAL xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj, const pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__valid_resource_or_tag(const pe_working_set_t *data_set, const char *id, pe_resource_t **rsc, pe_tag_t **tag); G_GNUC_INTERNAL bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr, bool convert_rsc, const pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__create_internal_constraints(pe_working_set_t *data_set); // Location constraints G_GNUC_INTERNAL void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc, int node_weight, const char *discover_mode, pe_node_t *foo_node, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_locations(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_location(pe_resource_t *rsc, pe__location_t *constraint); // Colocation constraints (pcmk_sched_colocation.c) enum pcmk__coloc_affects { pcmk__coloc_affects_nothing = 0, pcmk__coloc_affects_location, pcmk__coloc_affects_role, }; G_GNUC_INTERNAL enum pcmk__coloc_affects pcmk__colocation_affects(const pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool preview); G_GNUC_INTERNAL void pcmk__apply_coloc_to_weights(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__apply_coloc_to_priority(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__add_colocated_node_scores(pe_resource_t *rsc, const char *log_id, GHashTable **nodes, const char *attr, float factor, uint32_t flags); G_GNUC_INTERNAL void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__add_this_with(pe_resource_t *rsc, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__add_with_this(pe_resource_t *rsc, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__new_colocation(const char *id, const char *node_attr, int score, pe_resource_t *dependent, pe_resource_t *primary, const char *dependent_role, const char *primary_role, bool influence, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__block_colocation_dependents(pe_action_t *action, pe_working_set_t *data_set); /*! * \internal * \brief Check whether colocation's dependent preferences should be considered * * \param[in] colocation Colocation constraint * \param[in] rsc Primary instance (normally this will be * colocation->primary, which NULL will be treated as, * but for clones or bundles with multiple instances * this can be a particular instance) * * \return true if colocation influence should be effective, otherwise false */ static inline bool pcmk__colocation_has_influence(const pcmk__colocation_t *colocation, const pe_resource_t *rsc) { if (rsc == NULL) { rsc = colocation->primary; } /* A bundle replica colocates its remote connection with its container, * using a finite score so that the container can run on Pacemaker Remote * nodes. * * Moving a connection is lightweight and does not interrupt the service, * while moving a container is heavyweight and does interrupt the service, * so don't move a clean, active container based solely on the preferences * of its connection. * * This also avoids problematic scenarios where two containers want to * perpetually swap places. */ if (pcmk_is_set(colocation->dependent->flags, pe_rsc_allow_remote_remotes) && !pcmk_is_set(rsc->flags, pe_rsc_failed) && pcmk__list_of_1(rsc->running_on)) { return false; } /* The dependent in a colocation influences the primary's location * if the influence option is true or the primary is not yet active. */ return colocation->influence || (rsc->running_on == NULL); } // Ordering constraints (pcmk_sched_ordering.c) G_GNUC_INTERNAL void pcmk__new_ordering(pe_resource_t *first_rsc, char *first_task, pe_action_t *first_action, pe_resource_t *then_rsc, char *then_task, pe_action_t *then_action, uint32_t flags, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__disable_invalid_orderings(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_stops_before_shutdown(pe_node_t *node, pe_action_t *shutdown_op); G_GNUC_INTERNAL void pcmk__apply_orderings(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_after_each(pe_action_t *after, GList *list); /*! * \internal * \brief Create a new ordering between two resource actions * * \param[in,out] first_rsc Resource for 'first' action * \param[in,out] first_task Action key for 'first' action * \param[in] then_rsc Resource for 'then' action * \param[in,out] then_task Action key for 'then' action * \param[in] flags Bitmask of enum pe_ordering flags */ #define pcmk__order_resource_actions(first_rsc, first_task, \ then_rsc, then_task, flags) \ pcmk__new_ordering((first_rsc), \ pcmk__op_key((first_rsc)->id, (first_task), 0), \ NULL, \ (then_rsc), \ pcmk__op_key((then_rsc)->id, (then_task), 0), \ NULL, (flags), (first_rsc)->cluster) #define pcmk__order_starts(rsc1, rsc2, flags) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \ (rsc2), CRMD_ACTION_START, (flags)) #define pcmk__order_stops(rsc1, rsc2, flags) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \ (rsc2), CRMD_ACTION_STOP, (flags)) // Ticket constraints (pcmk_sched_tickets.c) G_GNUC_INTERNAL void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set); // Promotable clone resources (pcmk_sched_promotable.c) G_GNUC_INTERNAL void pcmk__add_promotion_scores(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__require_promotion_tickets(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__set_instance_roles(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__create_promotable_actions(pe_resource_t *clone); G_GNUC_INTERNAL void pcmk__promotable_restart_ordering(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__order_promotable_instances(pe_resource_t *clone); G_GNUC_INTERNAL void pcmk__update_dependent_with_promotable(const pe_resource_t *primary, pe_resource_t *dependent, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__update_promotable_dependent_priority(const pe_resource_t *primary, pe_resource_t *dependent, const pcmk__colocation_t *colocation); // Pacemaker Remote nodes (pcmk_sched_remote.c) G_GNUC_INTERNAL bool pcmk__is_failed_remote_node(const pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_remote_connection_actions(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__rsc_corresponds_to_guest(const pe_resource_t *rsc, const pe_node_t *node); G_GNUC_INTERNAL pe_node_t *pcmk__connection_host_for_action(const pe_action_t *action); G_GNUC_INTERNAL void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params); G_GNUC_INTERNAL void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, const pe_action_t *action); // Primitives (pcmk_sched_primitive.c) G_GNUC_INTERNAL pe_node_t *pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer); G_GNUC_INTERNAL void pcmk__primitive_create_actions(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__primitive_internal_constraints(pe_resource_t *rsc); G_GNUC_INTERNAL enum pe_action_flags pcmk__primitive_action_flags(pe_action_t *action, const pe_node_t *node); G_GNUC_INTERNAL void pcmk__primitive_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); G_GNUC_INTERNAL void pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, bool optional); G_GNUC_INTERNAL void pcmk__primitive_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml); G_GNUC_INTERNAL void pcmk__primitive_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization); G_GNUC_INTERNAL void pcmk__primitive_shutdown_lock(pe_resource_t *rsc); // Groups (pcmk_sched_group.c) G_GNUC_INTERNAL pe_node_t *pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer); G_GNUC_INTERNAL void pcmk__group_create_actions(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__group_internal_constraints(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__group_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); G_GNUC_INTERNAL void pcmk__group_apply_location(pe_resource_t *rsc, pe__location_t *location); G_GNUC_INTERNAL enum pe_action_flags pcmk__group_action_flags(pe_action_t *action, const pe_node_t *node); G_GNUC_INTERNAL uint32_t pcmk__group_update_ordered_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set); G_GNUC_INTERNAL GList *pcmk__group_colocated_resources(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *colocated_rscs); G_GNUC_INTERNAL void pcmk__group_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization); G_GNUC_INTERNAL void pcmk__group_shutdown_lock(pe_resource_t *rsc); // Clones (pcmk_sched_clone.c) G_GNUC_INTERNAL void pcmk__clone_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); // Bundles (pcmk_sched_bundle.c) G_GNUC_INTERNAL void pcmk__bundle_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); G_GNUC_INTERNAL void pcmk__output_bundle_actions(pe_resource_t *rsc); +// Clone instances or bundle replica containers (pcmk_sched_instances.c) + +G_GNUC_INTERNAL +void distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, + int max, int per_host_max, pe_working_set_t *data_set); + + // Injections (pcmk_injections.c) G_GNUC_INTERNAL xmlNode *pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid); G_GNUC_INTERNAL xmlNode *pcmk__inject_node_state_change(cib_t *cib_conn, const char *node, bool up); G_GNUC_INTERNAL xmlNode *pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node, const char *resource, const char *lrm_name, const char *rclass, const char *rtype, const char *rprovider); G_GNUC_INTERNAL void pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node, const char *resource, const char *task, guint interval_ms, int rc); G_GNUC_INTERNAL xmlNode *pcmk__inject_action_result(xmlNode *cib_resource, lrmd_event_data_t *op, int target_rc); // Nodes (pcmk_sched_nodes.c) G_GNUC_INTERNAL bool pcmk__node_available(const pe_node_t *node, bool consider_score, bool consider_guest); G_GNUC_INTERNAL bool pcmk__any_node_available(GHashTable *nodes); G_GNUC_INTERNAL GHashTable *pcmk__copy_node_table(GHashTable *nodes); G_GNUC_INTERNAL GList *pcmk__sort_nodes(GList *nodes, pe_node_t *active_node); G_GNUC_INTERNAL void pcmk__apply_node_health(pe_working_set_t *data_set); G_GNUC_INTERNAL pe_node_t *pcmk__top_allowed_node(const pe_resource_t *rsc, const pe_node_t *node); // Functions applying to more than one variant (pcmk_sched_resource.c) G_GNUC_INTERNAL void pcmk__set_allocation_methods(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node, const xmlNode *rsc_entry, bool active_on_node); G_GNUC_INTERNAL GList *pcmk__rscs_matching_id(const char *id, const pe_working_set_t *data_set); G_GNUC_INTERNAL GList *pcmk__colocated_resources(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *colocated_rscs); G_GNUC_INTERNAL void pcmk__noop_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml); G_GNUC_INTERNAL void pcmk__output_resource_actions(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__finalize_assignment(pe_resource_t *rsc, pe_node_t *chosen, bool force); G_GNUC_INTERNAL bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force); G_GNUC_INTERNAL void pcmk__unassign_resource(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__threshold_reached(pe_resource_t *rsc, const pe_node_t *node, pe_resource_t **failed); G_GNUC_INTERNAL void pcmk__sort_resources(pe_working_set_t *data_set); G_GNUC_INTERNAL gint pcmk__cmp_instance(gconstpointer a, gconstpointer b); G_GNUC_INTERNAL gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b); // Functions related to probes (pcmk_sched_probes.c) G_GNUC_INTERNAL bool pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_probes(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__probe_resource_list(GList *rscs, pe_node_t *node); G_GNUC_INTERNAL void pcmk__schedule_probes(pe_working_set_t *data_set); // Functions related to live migration (pcmk_sched_migration.c) void pcmk__create_migration_actions(pe_resource_t *rsc, const pe_node_t *current); void pcmk__abort_dangling_migration(void *data, void *user_data); bool pcmk__rsc_can_migrate(const pe_resource_t *rsc, const pe_node_t *current); void pcmk__order_migration_equivalents(pe__ordering_t *order); // Functions related to node utilization (pcmk_sched_utilization.c) G_GNUC_INTERNAL int pcmk__compare_node_capacities(const pe_node_t *node1, const pe_node_t *node2); G_GNUC_INTERNAL void pcmk__consume_node_capacity(GHashTable *current_utilization, const pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__release_node_capacity(GHashTable *current_utilization, const pe_resource_t *rsc); G_GNUC_INTERNAL const pe_node_t *pcmk__ban_insufficient_capacity(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__create_utilization_constraints(pe_resource_t *rsc, const GList *allowed_nodes); G_GNUC_INTERNAL void pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set); #endif // PCMK__LIBPACEMAKER_PRIVATE__H diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c index e5b9be6a78..d42770499a 100644 --- a/lib/pacemaker/pcmk_sched_bundle.c +++ b/lib/pacemaker/pcmk_sched_bundle.c @@ -1,1156 +1,1153 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include "libpacemaker_private.h" #define PE__VARIANT_BUNDLE 1 #include static bool is_bundle_node(pe__bundle_variant_data_t *data, pe_node_t *node) { for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (node->details == replica->node->details) { return TRUE; } } return FALSE; } -void distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, - int max, int per_host_max, pe_working_set_t * data_set); - static GList * get_container_list(const pe_resource_t *rsc) { GList *containers = NULL; if (rsc->variant == pe_container) { pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; containers = g_list_append(containers, replica->container); } } return containers; } static inline GList * get_containers_or_children(const pe_resource_t *rsc) { return (rsc->variant == pe_container)? get_container_list(rsc) : rsc->children; } /*! * \internal * \brief Assign a bundle resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * * \return Node that \p rsc is assigned to, if assigned entirely to one node */ pe_node_t * pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer) { GList *containers = NULL; GList *nodes = NULL; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return NULL); get_bundle_variant_data(bundle_data, rsc); pe__set_resource_flags(rsc, pe_rsc_allocating); containers = get_container_list(rsc); pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, rsc->cluster); nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = pcmk__sort_nodes(nodes, NULL); containers = g_list_sort(containers, pcmk__cmp_instance); distribute_children(rsc, containers, nodes, bundle_data->nreplicas, bundle_data->nreplicas_per_host, rsc->cluster); g_list_free(nodes); g_list_free(containers); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; pe_node_t *container_host = NULL; CRM_ASSERT(replica); if (replica->ip) { pe_rsc_trace(rsc, "Allocating bundle %s IP %s", rsc->id, replica->ip->id); replica->ip->cmds->assign(replica->ip, prefer); } container_host = replica->container->allocated_to; if (replica->remote && pe__is_guest_or_remote_node(container_host)) { /* We need 'nested' connection resources to be on the same * host because pacemaker-remoted only supports a single * active connection */ pcmk__new_colocation("child-remote-with-docker-remote", NULL, INFINITY, replica->remote, container_host->details->remote_rsc, NULL, NULL, true, rsc->cluster); } if (replica->remote) { pe_rsc_trace(rsc, "Allocating bundle %s connection %s", rsc->id, replica->remote->id); replica->remote->cmds->assign(replica->remote, prefer); } // Explicitly allocate replicas' children before bundle child if (replica->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, replica->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (node->details != replica->node->details) { node->weight = -INFINITY; } else if (!pcmk__threshold_reached(replica->child, node, NULL)) { node->weight = INFINITY; } } pe__set_resource_flags(replica->child->parent, pe_rsc_allocating); pe_rsc_trace(rsc, "Allocating bundle %s replica child %s", rsc->id, replica->child->id); replica->child->cmds->assign(replica->child, replica->node); pe__clear_resource_flags(replica->child->parent, pe_rsc_allocating); } } if (bundle_data->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, bundle_data->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (is_bundle_node(bundle_data, node)) { node->weight = 0; } else { node->weight = -INFINITY; } } pe_rsc_trace(rsc, "Allocating bundle %s child %s", rsc->id, bundle_data->child->id); bundle_data->child->cmds->assign(bundle_data->child, prefer); } pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional); return NULL; } void pcmk__bundle_create_actions(pe_resource_t *rsc) { pe_action_t *action = NULL; GList *containers = NULL; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); containers = get_container_list(rsc); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->ip) { replica->ip->cmds->create_actions(replica->ip); } if (replica->container) { replica->container->cmds->create_actions(replica->container); } if (replica->remote) { replica->remote->cmds->create_actions(replica->remote); } } clone_create_pseudo_actions(rsc, containers, NULL, NULL); if (bundle_data->child) { bundle_data->child->cmds->create_actions(bundle_data->child); if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) { /* promote */ pe__new_rsc_pseudo_action(rsc, RSC_PROMOTE, true, true); action = pe__new_rsc_pseudo_action(rsc, RSC_PROMOTED, true, true); action->priority = INFINITY; /* demote */ pe__new_rsc_pseudo_action(rsc, RSC_DEMOTE, true, true); action = pe__new_rsc_pseudo_action(rsc, RSC_DEMOTED, true, true); action->priority = INFINITY; } } g_list_free(containers); } void pcmk__bundle_internal_constraints(pe_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); if (bundle_data->child) { pcmk__order_resource_actions(rsc, RSC_START, bundle_data->child, RSC_START, pe_order_implies_first_printed); pcmk__order_resource_actions(rsc, RSC_STOP, bundle_data->child, RSC_STOP, pe_order_implies_first_printed); if (bundle_data->child->children) { pcmk__order_resource_actions(bundle_data->child, RSC_STARTED, rsc, RSC_STARTED, pe_order_implies_then_printed); pcmk__order_resource_actions(bundle_data->child, RSC_STOPPED, rsc, RSC_STOPPED, pe_order_implies_then_printed); } else { pcmk__order_resource_actions(bundle_data->child, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed); pcmk__order_resource_actions(bundle_data->child, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed); } } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); CRM_ASSERT(replica->container); replica->container->cmds->internal_constraints(replica->container); pcmk__order_starts(rsc, replica->container, pe_order_runnable_left|pe_order_implies_first_printed); if (replica->child) { pcmk__order_stops(rsc, replica->child, pe_order_implies_first_printed); } pcmk__order_stops(rsc, replica->container, pe_order_implies_first_printed); pcmk__order_resource_actions(replica->container, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed); pcmk__order_resource_actions(replica->container, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed); if (replica->ip) { replica->ip->cmds->internal_constraints(replica->ip); // Start IP then container pcmk__order_starts(replica->ip, replica->container, pe_order_runnable_left|pe_order_preserve); pcmk__order_stops(replica->container, replica->ip, pe_order_implies_first|pe_order_preserve); pcmk__new_colocation("ip-with-docker", NULL, INFINITY, replica->ip, replica->container, NULL, NULL, true, rsc->cluster); } if (replica->remote) { /* This handles ordering and colocating remote relative to container * (via "resource-with-container"). Since IP is also ordered and * colocated relative to the container, we don't need to do anything * explicit here with IP. */ replica->remote->cmds->internal_constraints(replica->remote); } if (replica->child) { CRM_ASSERT(replica->remote); // "Start remote then child" is implicit in scheduler's remote logic } } if (bundle_data->child) { bundle_data->child->cmds->internal_constraints(bundle_data->child); if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) { pcmk__promotable_restart_ordering(rsc); /* child demoted before global demoted */ pcmk__order_resource_actions(bundle_data->child, RSC_DEMOTED, rsc, RSC_DEMOTED, pe_order_implies_then_printed); /* global demote before child demote */ pcmk__order_resource_actions(rsc, RSC_DEMOTE, bundle_data->child, RSC_DEMOTE, pe_order_implies_first_printed); /* child promoted before global promoted */ pcmk__order_resource_actions(bundle_data->child, RSC_PROMOTED, rsc, RSC_PROMOTED, pe_order_implies_then_printed); /* global promote before child promote */ pcmk__order_resource_actions(rsc, RSC_PROMOTE, bundle_data->child, RSC_PROMOTE, pe_order_implies_first_printed); } } } static pe_resource_t * compatible_replica_for_node(const pe_resource_t *rsc_lh, const pe_node_t *candidate, const pe_resource_t *rsc, enum rsc_role_e filter, gboolean current) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(candidate != NULL, return NULL); get_bundle_variant_data(bundle_data, rsc); crm_trace("Looking for compatible child from %s for %s on %s", rsc_lh->id, rsc->id, pe__node_name(candidate)); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (is_child_compatible(replica->container, candidate, filter, current)) { crm_trace("Pairing %s with %s on %s", rsc_lh->id, replica->container->id, pe__node_name(candidate)); return replica->container; } } crm_trace("Can't pair %s with %s", rsc_lh->id, rsc->id); return NULL; } static pe_resource_t * compatible_replica(const pe_resource_t *rsc_lh, const pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set) { GList *scratch = NULL; pe_resource_t *pair = NULL; pe_node_t *active_node_lh = NULL; active_node_lh = rsc_lh->fns->location(rsc_lh, NULL, current); if (active_node_lh) { return compatible_replica_for_node(rsc_lh, active_node_lh, rsc, filter, current); } scratch = g_hash_table_get_values(rsc_lh->allowed_nodes); scratch = pcmk__sort_nodes(scratch, NULL); for (GList *gIter = scratch; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pair = compatible_replica_for_node(rsc_lh, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", rsc_lh->id, (rsc? rsc->id : "none")); done: g_list_free(scratch); return pair; } int copies_per_node(pe_resource_t * rsc) { /* Strictly speaking, there should be a 'copies_per_node' addition * to the resource function table and each case would be a * function. However that would be serious overkill to return an * int. In fact, it seems to me that both function tables * could/should be replaced by resources.{c,h} full of * rsc_{some_operation} functions containing a switch as below * which calls out to functions named {variant}_{some_operation} * as needed. */ switch(rsc->variant) { case pe_unknown: return 0; case pe_native: case pe_group: return 1; case pe_clone: { const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX); if (max_clones_node == NULL) { return 1; } else { int max_i; pcmk__scan_min_int(max_clones_node, &max_i, 0); return max_i; } } case pe_container: { pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); return data->nreplicas_per_host; } } return 0; } /*! * \internal * \brief Apply a colocation's score to node weights or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node weights (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void pcmk__bundle_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent) { GList *allocated_primaries = NULL; pe__bundle_variant_data_t *bundle_data = NULL; /* This should never be called for the bundle itself as a dependent. * Instead, we add its colocation constraints to its replicas and call the * apply_coloc_score() for the replicas as dependents. */ CRM_ASSERT(!for_dependent); CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL), return); CRM_ASSERT(dependent->variant == pe_native); if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { pe_rsc_trace(primary, "%s is still provisional", primary->id); return; } else if (colocation->dependent->variant > pe_group) { pe_resource_t *primary_replica = compatible_replica(dependent, primary, RSC_ROLE_UNKNOWN, FALSE, dependent->cluster); if (primary_replica) { pe_rsc_debug(primary, "Pairing %s with %s", dependent->id, primary_replica->id); dependent->cmds->apply_coloc_score(dependent, primary_replica, colocation, true); } else if (colocation->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", dependent->id, primary->id); pcmk__assign_resource(dependent, NULL, true); } else { pe_rsc_debug(primary, "Cannot pair %s with instance of %s", dependent->id, primary->id); } return; } get_bundle_variant_data(bundle_data, primary); pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d", colocation->id, dependent->id, primary->id, colocation->score); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (colocation->score < INFINITY) { replica->container->cmds->apply_coloc_score(dependent, replica->container, colocation, false); } else { pe_node_t *chosen = replica->container->fns->location(replica->container, NULL, FALSE); if ((chosen == NULL) || is_set_recursive(replica->container, pe_rsc_block, TRUE)) { continue; } if ((colocation->primary_role >= RSC_ROLE_PROMOTED) && (replica->child == NULL)) { continue; } if ((colocation->primary_role >= RSC_ROLE_PROMOTED) && (replica->child->next_role < RSC_ROLE_PROMOTED)) { continue; } pe_rsc_trace(primary, "Allowing %s: %s %d", colocation->id, pe__node_name(chosen), chosen->weight); allocated_primaries = g_list_prepend(allocated_primaries, chosen); } } if (colocation->score >= INFINITY) { node_list_exclude(dependent->allowed_nodes, allocated_primaries, FALSE); } g_list_free(allocated_primaries); } enum pe_action_flags pcmk__bundle_action_flags(pe_action_t *action, const pe_node_t *node) { GList *containers = NULL; enum pe_action_flags flags = 0; pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, action->rsc); if(data->child) { enum action_tasks task = get_complex_task(data->child, action->task, TRUE); switch(task) { case no_action: case action_notify: case action_notified: case action_promote: case action_promoted: case action_demote: case action_demoted: return summary_action_flags(action, data->child->children, node); default: break; } } containers = get_container_list(action->rsc); flags = summary_action_flags(action, containers, node); g_list_free(containers); return flags; } pe_resource_t * find_compatible_child_by_node(const pe_resource_t *local_child, const pe_node_t *local_node, const pe_resource_t *rsc, enum rsc_role_e filter, gboolean current) { GList *gIter = NULL; GList *children = NULL; if (local_node == NULL) { crm_err("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id); return NULL; } crm_trace("Looking for compatible child from %s for %s on %s", local_child->id, rsc->id, pe__node_name(local_node)); children = get_containers_or_children(rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if(is_child_compatible(child_rsc, local_node, filter, current)) { crm_trace("Pairing %s with %s on %s", local_child->id, child_rsc->id, pe__node_name(local_node)); return child_rsc; } } crm_trace("Can't pair %s with %s", local_child->id, rsc->id); if(children != rsc->children) { g_list_free(children); } return NULL; } static pe__bundle_replica_t * replica_for_container(const pe_resource_t *rsc, const pe_resource_t *container, const pe_node_t *node) { if (rsc->variant == pe_container) { const pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (replica->child && (container == replica->container) && pe__same_node(node, replica->node)) { return replica; } } } return NULL; } static uint32_t multi_update_interleave_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t filter, uint32_t type, pe_working_set_t *data_set) { GList *gIter = NULL; GList *children = NULL; gboolean current = FALSE; uint32_t changed = pcmk__updated_none; /* Fix this - lazy */ if (pcmk__ends_with(first->uuid, "_stopped_0") || pcmk__ends_with(first->uuid, "_demoted_0")) { current = TRUE; } children = get_containers_or_children(then->rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *then_child = gIter->data; pe_resource_t *first_child = find_compatible_child(then_child, first->rsc, RSC_ROLE_UNKNOWN, current); if (first_child == NULL && current) { crm_trace("Ignore"); } else if (first_child == NULL) { crm_debug("No match found for %s (%d / %s / %s)", then_child->id, current, first->uuid, then->uuid); /* Me no like this hack - but what else can we do? * * If there is no-one active or about to be active * on the same node as then_child, then they must * not be allowed to start */ if (pcmk_any_flags_set(type, pe_order_runnable_left|pe_order_implies_then) /* Mandatory */ ) { pe_rsc_info(then->rsc, "Inhibiting %s from being active", then_child->id); if (pcmk__assign_resource(then_child, NULL, true)) { pcmk__set_updated_flags(changed, first, pcmk__updated_then); } } } else { pe_action_t *first_action = NULL; pe_action_t *then_action = NULL; enum action_tasks task = clone_child_action(first); const char *first_task = task2text(task); pe__bundle_replica_t *first_replica = NULL; pe__bundle_replica_t *then_replica = NULL; first_replica = replica_for_container(first->rsc, first_child, node); if (strstr(first->task, "stop") && first_replica && first_replica->child) { /* Except for 'stopped' we should be looking at the * in-container resource, actions for the child will * happen later and are therefor more likely to align * with the user's intent. */ first_action = find_first_action(first_replica->child->actions, NULL, task2text(task), node); } else { first_action = find_first_action(first_child->actions, NULL, task2text(task), node); } then_replica = replica_for_container(then->rsc, then_child, node); if (strstr(then->task, "mote") && then_replica && then_replica->child) { /* Promote/demote actions will never be found for the * container resource, look in the child instead * * Alternatively treat: * 'XXXX then promote YYYY' as 'XXXX then start container for YYYY', and * 'demote XXXX then stop YYYY' as 'stop container for XXXX then stop YYYY' */ then_action = find_first_action(then_replica->child->actions, NULL, then->task, node); } else { then_action = find_first_action(then_child->actions, NULL, then->task, node); } if (first_action == NULL) { if (!pcmk_is_set(first_child->flags, pe_rsc_orphan) && !pcmk__str_any_of(first_task, RSC_STOP, RSC_DEMOTE, NULL)) { crm_err("Internal error: No action found for %s in %s (first)", first_task, first_child->id); } else { crm_trace("No action found for %s in %s%s (first)", first_task, first_child->id, pcmk_is_set(first_child->flags, pe_rsc_orphan)? " (ORPHAN)" : ""); } continue; } /* We're only interested if 'then' is neither stopping nor being demoted */ if (then_action == NULL) { if (!pcmk_is_set(then_child->flags, pe_rsc_orphan) && !pcmk__str_any_of(then->task, RSC_STOP, RSC_DEMOTE, NULL)) { crm_err("Internal error: No action found for %s in %s (then)", then->task, then_child->id); } else { crm_trace("No action found for %s in %s%s (then)", then->task, then_child->id, pcmk_is_set(then_child->flags, pe_rsc_orphan)? " (ORPHAN)" : ""); } continue; } if (order_actions(first_action, then_action, type)) { crm_debug("Created constraint for %s (%d) -> %s (%d) %.6x", first_action->uuid, pcmk_is_set(first_action->flags, pe_action_optional), then_action->uuid, pcmk_is_set(then_action->flags, pe_action_optional), type); pcmk__set_updated_flags(changed, first, pcmk__updated_first|pcmk__updated_then); } if(first_action && then_action) { changed |= then_child->cmds->update_ordered_actions(first_action, then_action, node, first_child->cmds->action_flags(first_action, node), filter, type, data_set); } else { crm_err("Nothing found either for %s (%p) or %s (%p) %s", first_child->id, first_action, then_child->id, then_action, task2text(task)); } } } if(children != then->rsc->children) { g_list_free(children); } return changed; } static bool can_interleave_actions(pe_action_t *first, pe_action_t *then) { bool interleave = FALSE; pe_resource_t *rsc = NULL; const char *interleave_s = NULL; if(first->rsc == NULL || then->rsc == NULL) { crm_trace("Not interleaving %s with %s (both must be resources)", first->uuid, then->uuid); return FALSE; } else if(first->rsc == then->rsc) { crm_trace("Not interleaving %s with %s (must belong to different resources)", first->uuid, then->uuid); return FALSE; } else if(first->rsc->variant < pe_clone || then->rsc->variant < pe_clone) { crm_trace("Not interleaving %s with %s (both sides must be clones or bundles)", first->uuid, then->uuid); return FALSE; } if (pcmk__ends_with(then->uuid, "_stop_0") || pcmk__ends_with(then->uuid, "_demote_0")) { rsc = first->rsc; } else { rsc = then->rsc; } interleave_s = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE); interleave = crm_is_true(interleave_s); crm_trace("Interleave %s -> %s: %s (based on %s)", first->uuid, then->uuid, interleave ? "yes" : "no", rsc->id); return interleave; } /*! * \internal * \brief Update two actions according to an ordering between them * * Given information about an ordering of two actions, update the actions' * flags (and runnable_before members if appropriate) as appropriate for the * ordering. In some cases, the ordering could be disabled as well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this node * (only used when interleaving instances) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates (may * include pe_action_optional to affect only mandatory * actions, and pe_action_runnable to affect only * runnable actions) * \param[in] type Group of enum pe_ordering flags to apply * \param[in,out] data_set Cluster working set * * \return Group of enum pcmk__updated flags indicating what was updated */ uint32_t pcmk__multi_update_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set) { uint32_t changed = pcmk__updated_none; crm_trace("%s -> %s", first->uuid, then->uuid); if(can_interleave_actions(first, then)) { changed = multi_update_interleave_actions(first, then, node, filter, type, data_set); } else if(then->rsc) { GList *gIter = NULL; GList *children = NULL; // Handle the 'primitive' ordering case changed |= pcmk__update_ordered_actions(first, then, node, flags, filter, type, data_set); // Now any children (or containers in the case of a bundle) children = get_containers_or_children(then->rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *then_child = (pe_resource_t *) gIter->data; uint32_t then_child_changed = pcmk__updated_none; pe_action_t *then_child_action = find_first_action(then_child->actions, NULL, then->task, node); if (then_child_action) { uint32_t then_child_flags = then_child->cmds->action_flags(then_child_action, node); if (pcmk_is_set(then_child_flags, pe_action_runnable)) { then_child_changed |= then_child->cmds->update_ordered_actions(first, then_child_action, node, flags, filter, type, data_set); } changed |= then_child_changed; if (pcmk_is_set(then_child_changed, pcmk__updated_then)) { for (GList *lpc = then_child_action->actions_after; lpc != NULL; lpc = lpc->next) { pe_action_wrapper_t *next = (pe_action_wrapper_t *) lpc->data; pcmk__update_action_for_orderings(next->action, data_set); } } } } if(children != then->rsc->children) { g_list_free(children); } } return changed; } void pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); pcmk__apply_location(rsc, constraint); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (replica->container) { replica->container->cmds->apply_location(replica->container, constraint); } if (replica->ip) { replica->ip->cmds->apply_location(replica->ip, constraint); } } if (bundle_data->child && ((constraint->role_filter == RSC_ROLE_UNPROMOTED) || (constraint->role_filter == RSC_ROLE_PROMOTED))) { bundle_data->child->cmds->apply_location(bundle_data->child, constraint); bundle_data->child->rsc_location = g_list_prepend(bundle_data->child->rsc_location, constraint); } } /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void pcmk__bundle_expand(pe_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); if (bundle_data->child) { bundle_data->child->cmds->add_actions_to_graph(bundle_data->child); } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->remote && replica->container && pe__bundle_needs_remote_name(replica->remote)) { /* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that * run pacemaker-remoted inside, without needing a separate IP for * the container. This is done by configuring the inner remote's * connection host as the magic string "#uname", then * replacing it with the underlying host when needed. */ xmlNode *nvpair = get_xpath_object("//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']", replica->remote->xml, LOG_ERR); const char *calculated_addr = NULL; // Replace the value in replica->remote->xml (if appropriate) calculated_addr = pe__add_bundle_remote_name(replica->remote, rsc->cluster, nvpair, "value"); if (calculated_addr) { /* Since this is for the bundle as a resource, and not any * particular action, replace the value in the default * parameters (not evaluated for node). create_graph_action() * will grab it from there to replace it in node-evaluated * parameters. */ GHashTable *params = pe_rsc_params(replica->remote, NULL, rsc->cluster); g_hash_table_replace(params, strdup(XML_RSC_ATTR_REMOTE_RA_ADDR), strdup(calculated_addr)); } else { /* The only way to get here is if the remote connection is * neither currently running nor scheduled to run. That means we * won't be doing any operations that require addr (only start * requires it; we additionally use it to compare digests when * unpacking status, promote, and migrate_from history, but * that's already happened by this point). */ crm_info("Unable to determine address for bundle %s remote connection", rsc->id); } } if (replica->ip) { replica->ip->cmds->add_actions_to_graph(replica->ip); } if (replica->container) { replica->container->cmds->add_actions_to_graph(replica->container); } if (replica->remote) { replica->remote->cmds->add_actions_to_graph(replica->remote); } } } /*! * \internal * * \brief Schedule any probes needed for a resource on a node * * \param[in,out] rsc Resource to create probe for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node) { bool any_created = false; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return false); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if ((replica->ip != NULL) && replica->ip->cmds->create_probe(replica->ip, node)) { any_created = true; } if ((replica->child != NULL) && (node->details == replica->node->details) && replica->child->cmds->create_probe(replica->child, node)) { any_created = true; } if ((replica->container != NULL) && replica->container->cmds->create_probe(replica->container, node)) { any_created = true; /* If we're limited to one replica per host (due to * the lack of an IP range probably), then we don't * want any of our peer containers starting until * we've established that no other copies are already * running. * * Partly this is to ensure that nreplicas_per_host is * observed, but also to ensure that the containers * don't fail to start because the necessary port * mappings (which won't include an IP for uniqueness) * are already taken */ for (GList *tIter = bundle_data->replicas; tIter && (bundle_data->nreplicas_per_host == 1); tIter = tIter->next) { pe__bundle_replica_t *other = tIter->data; if ((other != replica) && (other != NULL) && (other->container != NULL)) { pcmk__new_ordering(replica->container, pcmk__op_key(replica->container->id, RSC_STATUS, 0), NULL, other->container, pcmk__op_key(other->container->id, RSC_START, 0), NULL, pe_order_optional|pe_order_same_node, rsc->cluster); } } } if ((replica->container != NULL) && (replica->remote != NULL) && replica->remote->cmds->create_probe(replica->remote, node)) { /* Do not probe the remote resource until we know where the * container is running. This is required for REMOTE_CONTAINER_HACK * to correctly probe remote resources. */ char *probe_uuid = pcmk__op_key(replica->remote->id, RSC_STATUS, 0); pe_action_t *probe = find_first_action(replica->remote->actions, probe_uuid, NULL, node); free(probe_uuid); if (probe != NULL) { any_created = true; crm_trace("Ordering %s probe on %s", replica->remote->id, pe__node_name(node)); pcmk__new_ordering(replica->container, pcmk__op_key(replica->container->id, RSC_START, 0), NULL, replica->remote, NULL, probe, pe_order_probe, rsc->cluster); } } } return any_created; } void pcmk__output_bundle_actions(pe_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->ip != NULL) { replica->ip->cmds->output_actions(replica->ip); } if (replica->container != NULL) { replica->container->cmds->output_actions(replica->container); } if (replica->remote != NULL) { replica->remote->cmds->output_actions(replica->remote); } if (replica->child != NULL) { replica->child->cmds->output_actions(replica->child); } } } // Bundle implementation of resource_alloc_functions_t:add_utilization() void pcmk__bundle_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { pe__bundle_variant_data_t *bundle_data = NULL; pe__bundle_replica_t *replica = NULL; if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } get_bundle_variant_data(bundle_data, rsc); if (bundle_data->replicas == NULL) { return; } /* All bundle replicas are identical, so using the utilization of the first * is sufficient for any. Only the implicit container resource can have * utilization values. */ replica = (pe__bundle_replica_t *) bundle_data->replicas->data; if (replica->container != NULL) { replica->container->cmds->add_utilization(replica->container, orig_rsc, all_rscs, utilization); } } // Bundle implementation of resource_alloc_functions_t:shutdown_lock() void pcmk__bundle_shutdown_lock(pe_resource_t *rsc) { return; // Bundles currently don't support shutdown locks } diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c index a887f7d6d6..828420513e 100644 --- a/lib/pacemaker/pcmk_sched_clone.c +++ b/lib/pacemaker/pcmk_sched_clone.c @@ -1,1195 +1,649 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include "libpacemaker_private.h" #define VARIANT_CLONE 1 #include -static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all); - -static pe_node_t * -can_run_instance(pe_resource_t * rsc, pe_node_t * node, int limit) -{ - pe_node_t *local_node = NULL; - - if (node == NULL && rsc->allowed_nodes) { - GHashTableIter iter; - g_hash_table_iter_init(&iter, rsc->allowed_nodes); - while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) { - can_run_instance(rsc, local_node, limit); - } - return NULL; - } - - if (!node) { - /* make clang analyzer happy */ - goto bail; - - } else if (!pcmk__node_available(node, false, false)) { - goto bail; - - } else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { - goto bail; - } - - local_node = pcmk__top_allowed_node(rsc, node); - - if (local_node == NULL) { - crm_warn("%s cannot run on %s: node not allowed", - rsc->id, pe__node_name(node)); - goto bail; - - } else if (local_node->weight < 0) { - common_update_score(rsc, node->details->id, local_node->weight); - pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.", - rsc->id, pe__node_name(node)); - - } else if (local_node->count < limit) { - pe_rsc_trace(rsc, "%s can run on %s (already running %d)", - rsc->id, pe__node_name(node), local_node->count); - return local_node; - - } else { - pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)", - rsc->id, pe__node_name(node), local_node->count, limit); - } - - bail: - if (node) { - common_update_score(rsc, node->details->id, -INFINITY); - } - return NULL; -} - -static pe_node_t * -allocate_instance(pe_resource_t *rsc, pe_node_t *prefer, gboolean all_coloc, - int limit, pe_working_set_t *data_set) -{ - pe_node_t *chosen = NULL; - GHashTable *backup = NULL; - - CRM_ASSERT(rsc); - pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)", - rsc->id, (prefer? prefer->details->uname: "none"), - (all_coloc? "all" : "some")); - - if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { - return rsc->fns->location(rsc, NULL, FALSE); - - } else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { - pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); - return NULL; - } - - /* Only include positive colocation preferences of dependent resources - * if not every node will get a copy of the clone - */ - append_parent_colocation(rsc->parent, rsc, all_coloc); - - if (prefer) { - pe_node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); - - if (local_prefer == NULL || local_prefer->weight < 0) { - pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id, - pe__node_name(prefer)); - return NULL; - } - } - - can_run_instance(rsc, NULL, limit); - - backup = pcmk__copy_node_table(rsc->allowed_nodes); - pe_rsc_trace(rsc, "Allocating instance %s", rsc->id); - chosen = rsc->cmds->assign(rsc, prefer); - if (chosen && prefer && (chosen->details != prefer->details)) { - crm_info("Not pre-allocating %s to %s because %s is better", - rsc->id, pe__node_name(prefer), pe__node_name(chosen)); - g_hash_table_destroy(rsc->allowed_nodes); - rsc->allowed_nodes = backup; - pcmk__unassign_resource(rsc); - chosen = NULL; - backup = NULL; - } - if (chosen) { - pe_node_t *local_node = pcmk__top_allowed_node(rsc, chosen); - - if (local_node) { - local_node->count++; - - } else if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { - /* what to do? we can't enforce per-node limits in this case */ - pcmk__config_err("%s not found in %s (list of %d)", - chosen->details->id, rsc->parent->id, - g_hash_table_size(rsc->parent->allowed_nodes)); - } - } - - if(backup) { - g_hash_table_destroy(backup); - } - return chosen; -} - -static void -append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all) -{ - - GList *gIter = NULL; - - gIter = rsc->rsc_cons; - for (; gIter != NULL; gIter = gIter->next) { - pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; - - if (all || cons->score < 0 || cons->score == INFINITY) { - pcmk__add_this_with(child, cons); - } - } - - gIter = rsc->rsc_cons_lhs; - for (; gIter != NULL; gIter = gIter->next) { - pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; - - if (!pcmk__colocation_has_influence(cons, child)) { - continue; - } - if (all || cons->score < 0) { - pcmk__add_with_this(child, cons); - } - } -} - - -void -distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, - int max, int per_host_max, pe_working_set_t * data_set); - -void -distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, - int max, int per_host_max, pe_working_set_t * data_set) -{ - int loop_max = 0; - int allocated = 0; - int available_nodes = 0; - bool all_coloc = false; - - /* count now tracks the number of clones currently allocated */ - for(GList *nIter = nodes; nIter != NULL; nIter = nIter->next) { - pe_node_t *node = nIter->data; - - node->count = 0; - if (pcmk__node_available(node, false, false)) { - available_nodes++; - } - } - - all_coloc = (max < available_nodes) ? true : false; - - if(available_nodes) { - loop_max = max / available_nodes; - } - if (loop_max < 1) { - loop_max = 1; - } - - pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)", - max, rsc->id, available_nodes, per_host_max, loop_max); - - /* Pre-allocate as many instances as we can to their current location */ - for (GList *gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) { - pe_resource_t *child = (pe_resource_t *) gIter->data; - pe_node_t *child_node = NULL; - pe_node_t *local_node = NULL; - - if ((child->running_on == NULL) - || !pcmk_is_set(child->flags, pe_rsc_provisional) - || pcmk_is_set(child->flags, pe_rsc_failed)) { - - continue; - } - - child_node = pe__current_node(child); - local_node = pcmk__top_allowed_node(child, child_node); - - pe_rsc_trace(rsc, - "Checking pre-allocation of %s to %s (%d remaining of %d)", - child->id, pe__node_name(child_node), max - allocated, - max); - - if (!pcmk__node_available(child_node, true, false)) { - pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s", - pe__node_name(child_node), child->id); - continue; - } - - if ((local_node != NULL) && (local_node->count >= loop_max)) { - pe_rsc_trace(rsc, - "Not pre-allocating because %s already allocated " - "optimal instances", pe__node_name(child_node)); - continue; - } - - if (allocate_instance(child, child_node, all_coloc, per_host_max, - data_set)) { - pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id, - pe__node_name(child_node)); - allocated++; - } - } - - pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max); - - for (GList *gIter = children; gIter != NULL; gIter = gIter->next) { - pe_resource_t *child = (pe_resource_t *) gIter->data; - - if (child->running_on != NULL) { - pe_node_t *child_node = pe__current_node(child); - pe_node_t *local_node = pcmk__top_allowed_node(child, child_node); - - if (local_node == NULL) { - crm_err("%s is running on %s which isn't allowed", - child->id, pe__node_name(child_node)); - } - } - - if (!pcmk_is_set(child->flags, pe_rsc_provisional)) { - } else if (allocated >= max) { - pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max); - resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set); - } else { - if (allocate_instance(child, NULL, all_coloc, per_host_max, - data_set)) { - allocated++; - } - } - } - - pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d", - allocated, rsc->id, max); -} - /*! * \internal * \brief Assign a clone resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * * \return Node that \p rsc is assigned to, if assigned entirely to one node */ pe_node_t * pcmk__clone_allocate(pe_resource_t *rsc, const pe_node_t *prefer) { GList *nodes = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return NULL; } else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__add_promotion_scores(rsc); } pe__set_resource_flags(rsc, pe_rsc_allocating); /* This information is used by pcmk__cmp_instance() when deciding the order * in which to assign clone instances to nodes. */ for (GList *gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; pe_rsc_trace(rsc, "%s: Allocating %s first", rsc->id, constraint->primary->id); constraint->primary->cmds->assign(constraint->primary, prefer); } for (GList *gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (pcmk__colocation_has_influence(constraint, NULL)) { pe_resource_t *dependent = constraint->dependent; const char *attr = constraint->node_attribute; const float factor = constraint->score / (float) INFINITY; const uint32_t flags = pcmk__coloc_select_active |pcmk__coloc_select_nonnegative; pcmk__add_colocated_node_scores(dependent, rsc->id, &rsc->allowed_nodes, attr, factor, flags); } } pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, rsc->cluster); nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = pcmk__sort_nodes(nodes, NULL); rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance); distribute_children(rsc, rsc->children, nodes, clone_data->clone_max, clone_data->clone_node_max, rsc->cluster); g_list_free(nodes); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__set_instance_roles(rsc); } pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating); pe_rsc_trace(rsc, "Done allocating %s", rsc->id); return NULL; } -static void -clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting, - gboolean * active) -{ - GList *gIter = NULL; - - if (rsc->children) { - - gIter = rsc->children; - for (; gIter != NULL; gIter = gIter->next) { - pe_resource_t *child = (pe_resource_t *) gIter->data; - - clone_update_pseudo_status(child, stopping, starting, active); - } - - return; - } - - CRM_ASSERT(active != NULL); - CRM_ASSERT(starting != NULL); - CRM_ASSERT(stopping != NULL); - - if (rsc->running_on) { - *active = TRUE; - } - - gIter = rsc->actions; - for (; gIter != NULL; gIter = gIter->next) { - pe_action_t *action = (pe_action_t *) gIter->data; - - if (*starting && *stopping) { - return; - - } else if (pcmk_is_set(action->flags, pe_action_optional)) { - pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid); - continue; - - } else if (!pcmk_any_flags_set(action->flags, - pe_action_pseudo|pe_action_runnable)) { - pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid); - continue; - - } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) { - pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid); - *stopping = TRUE; - - } else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) { - if (!pcmk_is_set(action->flags, pe_action_runnable)) { - pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d", - action->uuid, - pcmk_is_set(action->flags, pe_action_runnable), - pcmk_is_set(action->flags, pe_action_pseudo)); - } else { - pe_rsc_trace(rsc, "Starting due to: %s", action->uuid); - pe_rsc_trace(rsc, "%s run=%d, pseudo=%d", - action->uuid, - pcmk_is_set(action->flags, pe_action_runnable), - pcmk_is_set(action->flags, pe_action_pseudo)); - *starting = TRUE; - } - } - } -} - static pe_action_t * find_rsc_action(pe_resource_t *rsc, const char *task) { pe_action_t *match = NULL; GList *actions = pe__resource_actions(rsc, NULL, task, FALSE); for (GList *item = actions; item != NULL; item = item->next) { pe_action_t *op = (pe_action_t *) item->data; if (!pcmk_is_set(op->flags, pe_action_optional)) { if (match != NULL) { // More than one match, don't return any match = NULL; break; } match = op; } } g_list_free(actions); return match; } static void child_ordering_constraints(pe_resource_t * rsc, pe_working_set_t * data_set) { pe_action_t *stop = NULL; pe_action_t *start = NULL; pe_action_t *last_stop = NULL; pe_action_t *last_start = NULL; GList *gIter = NULL; if (!pe__clone_is_ordered(rsc)) { return; } /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; stop = find_rsc_action(child, RSC_STOP); if (stop) { if (last_stop) { /* child/child relative stop */ order_actions(stop, last_stop, pe_order_optional); } last_stop = stop; } start = find_rsc_action(child, RSC_START); if (start) { if (last_start) { /* child/child relative start */ order_actions(last_start, start, pe_order_optional); } last_start = start; } } } void clone_create_actions(pe_resource_t *rsc) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_debug(rsc, "Creating actions for clone %s", rsc->id); clone_create_pseudo_actions(rsc, rsc->children, &clone_data->start_notify, &clone_data->stop_notify); child_ordering_constraints(rsc, rsc->cluster); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__create_promotable_actions(rsc); } } -void -clone_create_pseudo_actions(pe_resource_t *rsc, GList *children, - notify_data_t **start_notify, - notify_data_t **stop_notify) -{ - gboolean child_active = FALSE; - gboolean child_starting = FALSE; - gboolean child_stopping = FALSE; - gboolean allow_dependent_migrations = TRUE; - - pe_action_t *stop = NULL; - pe_action_t *stopped = NULL; - - pe_action_t *start = NULL; - pe_action_t *started = NULL; - - pe_rsc_trace(rsc, "Creating actions for %s", rsc->id); - - for (GList *gIter = children; gIter != NULL; gIter = gIter->next) { - pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; - gboolean starting = FALSE; - gboolean stopping = FALSE; - - child_rsc->cmds->create_actions(child_rsc); - clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active); - if (stopping && starting) { - allow_dependent_migrations = FALSE; - } - - child_stopping |= stopping; - child_starting |= starting; - } - - /* start */ - start = pe__new_rsc_pseudo_action(rsc, RSC_START, !child_starting, true); - started = pe__new_rsc_pseudo_action(rsc, RSC_STARTED, !child_starting, - false); - started->priority = INFINITY; - - if (child_active || child_starting) { - pe__set_action_flags(started, pe_action_runnable); - } - - if (start_notify != NULL && *start_notify == NULL) { - *start_notify = pe__clone_notif_pseudo_ops(rsc, RSC_START, start, - started); - } - - /* stop */ - stop = pe__new_rsc_pseudo_action(rsc, RSC_STOP, !child_stopping, true); - stopped = pe__new_rsc_pseudo_action(rsc, RSC_STOPPED, !child_stopping, - true); - stopped->priority = INFINITY; - if (allow_dependent_migrations) { - pe__set_action_flags(stop, pe_action_migrate_runnable); - } - - if (stop_notify != NULL && *stop_notify == NULL) { - *stop_notify = pe__clone_notif_pseudo_ops(rsc, RSC_STOP, stop, stopped); - - if (start_notify && *start_notify && *stop_notify) { - order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional); - } - } -} - void clone_internal_constraints(pe_resource_t *rsc) { pe_resource_t *last_rsc = NULL; GList *gIter; bool ordered = pe__clone_is_ordered(rsc); pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id); pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional); pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left); pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP, pe_order_optional); pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE, pe_order_runnable_left); } if (ordered) { /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->internal_constraints(child_rsc); pcmk__order_starts(rsc, child_rsc, pe_order_runnable_left|pe_order_implies_first_printed); pcmk__order_resource_actions(child_rsc, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed); if (ordered && (last_rsc != NULL)) { pcmk__order_starts(last_rsc, child_rsc, pe_order_optional); } pcmk__order_stops(rsc, child_rsc, pe_order_implies_first_printed); pcmk__order_resource_actions(child_rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed); if (ordered && (last_rsc != NULL)) { pcmk__order_stops(child_rsc, last_rsc, pe_order_optional); } last_rsc = child_rsc; } if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__order_promotable_instances(rsc); } } -gboolean -is_child_compatible(const pe_resource_t *child_rsc, const pe_node_t *local_node, - enum rsc_role_e filter, gboolean current) -{ - pe_node_t *node = NULL; - enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); - - CRM_CHECK(child_rsc && local_node, return FALSE); - if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { - /* We only want instances that haven't failed */ - node = child_rsc->fns->location(child_rsc, NULL, current); - } - - if (filter != RSC_ROLE_UNKNOWN && next_role != filter) { - crm_trace("Filtered %s", child_rsc->id); - return FALSE; - } - - if (node && (node->details == local_node->details)) { - return TRUE; - - } else if (node) { - crm_trace("%s - %s vs %s", child_rsc->id, pe__node_name(node), - pe__node_name(local_node)); - - } else { - crm_trace("%s - not allocated %d", child_rsc->id, current); - } - return FALSE; -} - -pe_resource_t * -find_compatible_child(const pe_resource_t *local_child, - const pe_resource_t *rsc, enum rsc_role_e filter, - gboolean current) -{ - pe_resource_t *pair = NULL; - GList *gIter = NULL; - GList *scratch = NULL; - pe_node_t *local_node = NULL; - - local_node = local_child->fns->location(local_child, NULL, current); - if (local_node) { - return find_compatible_child_by_node(local_child, local_node, rsc, filter, current); - } - - scratch = g_hash_table_get_values(local_child->allowed_nodes); - scratch = pcmk__sort_nodes(scratch, NULL); - - gIter = scratch; - for (; gIter != NULL; gIter = gIter->next) { - pe_node_t *node = (pe_node_t *) gIter->data; - - pair = find_compatible_child_by_node(local_child, node, rsc, filter, current); - if (pair) { - goto done; - } - } - - pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id); - done: - g_list_free(scratch); - return pair; -} - /*! * \internal * \brief Apply a colocation's score to node weights or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node weights (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void pcmk__clone_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent) { GList *gIter = NULL; gboolean do_interleave = FALSE; const char *interleave_s = NULL; /* This should never be called for the clone itself as a dependent. Instead, * we add its colocation constraints to its instances and call the * apply_coloc_score() for the instances as dependents. */ CRM_ASSERT(!for_dependent); CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL), return); CRM_CHECK(dependent->variant == pe_native, return); pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d", colocation->id, dependent->id, primary->id, colocation->score); if (pcmk_is_set(primary->flags, pe_rsc_promotable)) { if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { // We haven't placed the primary yet, so we can't apply colocation pe_rsc_trace(primary, "%s is still provisional", primary->id); return; } else if (colocation->primary_role == RSC_ROLE_UNKNOWN) { // This isn't a role-specfic colocation, so handle normally pe_rsc_trace(primary, "Handling %s as a clone colocation", colocation->id); } else if (pcmk_is_set(dependent->flags, pe_rsc_provisional)) { // We're placing the dependent pcmk__update_dependent_with_promotable(primary, dependent, colocation); return; } else if (colocation->dependent_role == RSC_ROLE_PROMOTED) { // We're choosing roles for the dependent pcmk__update_promotable_dependent_priority(primary, dependent, colocation); return; } } // Only the dependent needs to be marked for interleave interleave_s = g_hash_table_lookup(colocation->dependent->meta, XML_RSC_ATTR_INTERLEAVE); if (crm_is_true(interleave_s) && (colocation->dependent->variant > pe_group)) { /* @TODO Do we actually care about multiple primary copies sharing a * dependent copy anymore? */ if (copies_per_node(colocation->dependent) != copies_per_node(colocation->primary)) { pcmk__config_err("Cannot interleave %s and %s because they do not " "support the same number of instances per node", colocation->dependent->id, colocation->primary->id); } else { do_interleave = TRUE; } } if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { pe_rsc_trace(primary, "%s is still provisional", primary->id); return; } else if (do_interleave) { pe_resource_t *primary_instance = NULL; primary_instance = find_compatible_child(dependent, primary, RSC_ROLE_UNKNOWN, FALSE); if (primary_instance != NULL) { pe_rsc_debug(primary, "Pairing %s with %s", dependent->id, primary_instance->id); dependent->cmds->apply_coloc_score(dependent, primary_instance, colocation, true); } else if (colocation->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", dependent->id, primary->id); pcmk__assign_resource(dependent, NULL, true); } else { pe_rsc_debug(primary, "Cannot pair %s with instance of %s", dependent->id, primary->id); } return; } else if (colocation->score >= INFINITY) { GList *affected_nodes = NULL; gIter = primary->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { pe_rsc_trace(primary, "Allowing %s: %s %d", colocation->id, pe__node_name(chosen), chosen->weight); affected_nodes = g_list_prepend(affected_nodes, chosen); } } node_list_exclude(dependent->allowed_nodes, affected_nodes, FALSE); g_list_free(affected_nodes); return; } gIter = primary->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->apply_coloc_score(dependent, child_rsc, colocation, false); } } -enum action_tasks -clone_child_action(pe_action_t * action) -{ - enum action_tasks result = no_action; - pe_resource_t *child = (pe_resource_t *) action->rsc->children->data; - - if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) { - - /* Find the action we're notifying about instead */ - - int stop = 0; - char *key = action->uuid; - int lpc = strlen(key); - - for (; lpc > 0; lpc--) { - if (key[lpc] == '_' && stop == 0) { - stop = lpc; - - } else if (key[lpc] == '_') { - char *task_mutable = NULL; - - lpc++; - task_mutable = strdup(key + lpc); - task_mutable[stop - lpc] = 0; - - crm_trace("Extracted action '%s' from '%s'", task_mutable, key); - result = get_complex_task(child, task_mutable, TRUE); - free(task_mutable); - break; - } - } - - } else { - result = get_complex_task(child, action->task, TRUE); - } - return result; -} - -#define pe__clear_action_summary_flags(flags, action, flag) do { \ - flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ - "Action summary", action->rsc->id, \ - flags, flag, #flag); \ - } while (0) - -enum pe_action_flags -summary_action_flags(pe_action_t *action, GList *children, - const pe_node_t *node) -{ - GList *gIter = NULL; - gboolean any_runnable = FALSE; - gboolean check_runnable = TRUE; - enum action_tasks task = clone_child_action(action); - enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); - const char *task_s = task2text(task); - - for (gIter = children; gIter != NULL; gIter = gIter->next) { - pe_action_t *child_action = NULL; - pe_resource_t *child = (pe_resource_t *) gIter->data; - - child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node); - pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id, - pe__node_name(node), child_action?child_action->uuid:"NA"); - if (child_action) { - enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); - - if (pcmk_is_set(flags, pe_action_optional) - && !pcmk_is_set(child_flags, pe_action_optional)) { - pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid, - child_action->uuid); - pe__clear_action_summary_flags(flags, action, pe_action_optional); - pe__clear_action_flags(action, pe_action_optional); - } - if (pcmk_is_set(child_flags, pe_action_runnable)) { - any_runnable = TRUE; - } - } - } - - if (check_runnable && any_runnable == FALSE) { - pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid); - pe__clear_action_summary_flags(flags, action, pe_action_runnable); - if (node == NULL) { - pe__clear_action_flags(action, pe_action_runnable); - } - } - - return flags; -} - enum pe_action_flags clone_action_flags(pe_action_t *action, const pe_node_t *node) { return summary_action_flags(action, action->rsc->children, node); } void clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { GList *gIter = rsc->children; pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id); pcmk__apply_location(rsc, constraint); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->apply_location(child_rsc, constraint); } } /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void clone_expand(pe_resource_t *rsc) { GList *gIter = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); g_list_foreach(rsc->actions, (GFunc) rsc->cmds->action_flags, NULL); pe__create_notifications(rsc, clone_data->start_notify); pe__create_notifications(rsc, clone_data->stop_notify); pe__create_notifications(rsc, clone_data->promote_notify); pe__create_notifications(rsc, clone_data->demote_notify); /* Now that the notifcations have been created we can expand the children */ gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->add_actions_to_graph(child_rsc); } pcmk__add_rsc_actions_to_graph(rsc); /* The notifications are in the graph now, we can destroy the notify_data */ pe__free_notification_data(clone_data->demote_notify); clone_data->demote_notify = NULL; pe__free_notification_data(clone_data->stop_notify); clone_data->stop_notify = NULL; pe__free_notification_data(clone_data->start_notify); clone_data->start_notify = NULL; pe__free_notification_data(clone_data->promote_notify); clone_data->promote_notify = NULL; } // Check whether a resource or any of its children is known on node static bool rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node) { if (rsc->children) { for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; if (rsc_known_on(child, node)) { return TRUE; } } } else if (rsc->known_on) { GHashTableIter iter; pe_node_t *known_node = NULL; g_hash_table_iter_init(&iter, rsc->known_on); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) { if (node->details == known_node->details) { return TRUE; } } } return FALSE; } // Look for an instance of clone that is known on node static pe_resource_t * find_instance_on(const pe_resource_t *clone, const pe_node_t *node) { for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (rsc_known_on(child, node)) { return child; } } return NULL; } // For anonymous clones, only a single instance needs to be probed static bool probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { // First, check if we probed an instance on this node last time pe_resource_t *child = find_instance_on(rsc, node); // Otherwise, check if we plan to start an instance on this node if (child == NULL) { for (GList *child_iter = rsc->children; child_iter && !child; child_iter = child_iter->next) { pe_node_t *local_node = NULL; pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data; if (child_rsc) { /* make clang analyzer happy */ local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); if (local_node && (local_node->details == node->details)) { child = child_rsc; } } } } // Otherwise, use the first clone instance if (child == NULL) { child = rsc->children->data; } CRM_ASSERT(child); return child->cmds->create_probe(child, node); } /*! * \internal * * \brief Schedule any probes needed for a resource on a node * * \param[in,out] rsc Resource to create probe for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool clone_create_probe(pe_resource_t *rsc, pe_node_t *node) { CRM_ASSERT(rsc); rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); if (rsc->children == NULL) { pe_warn("Clone %s has no children", rsc->id); return false; } if (rsc->exclusive_discover) { pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on * * remove the node from allowed_nodes so that the * notification contains only nodes that we might ever run * on */ g_hash_table_remove(rsc->allowed_nodes, node->details->id); /* Bit of a shortcut - might as well take it */ return false; } } if (pcmk_is_set(rsc->flags, pe_rsc_unique)) { return pcmk__probe_resource_list(rsc->children, node); } else { return probe_anonymous_clone(rsc, node, rsc->cluster); } } void clone_append_meta(const pe_resource_t *rsc, xmlNode *xml) { char *name = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); name = crm_meta_name(XML_RSC_ATTR_UNIQUE); crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_unique)); free(name); name = crm_meta_name(XML_RSC_ATTR_NOTIFY); crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_notify)); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX); crm_xml_add_int(xml, name, clone_data->clone_max); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX); crm_xml_add_int(xml, name, clone_data->clone_node_max); free(name); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { int promoted_max = pe__clone_promoted_max(rsc); int promoted_node_max = pe__clone_promoted_node_max(rsc); name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX); crm_xml_add_int(xml, name, promoted_max); free(name); name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX); crm_xml_add_int(xml, name, promoted_node_max); free(name); /* @COMPAT Maintain backward compatibility with resource agents that * expect the old names (deprecated since 2.0.0). */ name = crm_meta_name(PCMK_XA_PROMOTED_MAX_LEGACY); crm_xml_add_int(xml, name, promoted_max); free(name); name = crm_meta_name(PCMK_XA_PROMOTED_NODE_MAX_LEGACY); crm_xml_add_int(xml, name, promoted_node_max); free(name); } } // Clone implementation of resource_alloc_functions_t:add_utilization() void pcmk__clone_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { bool existing = false; pe_resource_t *child = NULL; if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } // Look for any child already existing in the list for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { child = (pe_resource_t *) iter->data; if (g_list_find(all_rscs, child)) { existing = true; // Keep checking remaining children } else { // If this is a clone of a group, look for group's members for (GList *member_iter = child->children; member_iter != NULL; member_iter = member_iter->next) { pe_resource_t *member = (pe_resource_t *) member_iter->data; if (g_list_find(all_rscs, member) != NULL) { // Add *child's* utilization, not group member's child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization); existing = true; break; } } } } if (!existing && (rsc->children != NULL)) { // If nothing was found, still add first child's utilization child = (pe_resource_t *) rsc->children->data; child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization); } } // Clone implementation of resource_alloc_functions_t:shutdown_lock() void pcmk__clone_shutdown_lock(pe_resource_t *rsc) { return; // Clones currently don't support shutdown locks } diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c new file mode 100644 index 0000000000..bf7304eaa9 --- /dev/null +++ b/lib/pacemaker/pcmk_sched_instances.c @@ -0,0 +1,558 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +/* This file is intended for code usable with both clone instances and bundle + * replica containers. + */ + +#include +#include +#include +#include "libpacemaker_private.h" + +static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all); + +static pe_node_t * +can_run_instance(pe_resource_t * rsc, pe_node_t * node, int limit) +{ + pe_node_t *local_node = NULL; + + if (node == NULL && rsc->allowed_nodes) { + GHashTableIter iter; + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) { + can_run_instance(rsc, local_node, limit); + } + return NULL; + } + + if (!node) { + /* make clang analyzer happy */ + goto bail; + + } else if (!pcmk__node_available(node, false, false)) { + goto bail; + + } else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { + goto bail; + } + + local_node = pcmk__top_allowed_node(rsc, node); + + if (local_node == NULL) { + crm_warn("%s cannot run on %s: node not allowed", + rsc->id, pe__node_name(node)); + goto bail; + + } else if (local_node->weight < 0) { + common_update_score(rsc, node->details->id, local_node->weight); + pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.", + rsc->id, pe__node_name(node)); + + } else if (local_node->count < limit) { + pe_rsc_trace(rsc, "%s can run on %s (already running %d)", + rsc->id, pe__node_name(node), local_node->count); + return local_node; + + } else { + pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)", + rsc->id, pe__node_name(node), local_node->count, limit); + } + + bail: + if (node) { + common_update_score(rsc, node->details->id, -INFINITY); + } + return NULL; +} + +static pe_node_t * +allocate_instance(pe_resource_t *rsc, pe_node_t *prefer, gboolean all_coloc, + int limit, pe_working_set_t *data_set) +{ + pe_node_t *chosen = NULL; + GHashTable *backup = NULL; + + CRM_ASSERT(rsc); + pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)", + rsc->id, (prefer? prefer->details->uname: "none"), + (all_coloc? "all" : "some")); + + if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { + return rsc->fns->location(rsc, NULL, FALSE); + + } else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { + pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); + return NULL; + } + + /* Only include positive colocation preferences of dependent resources + * if not every node will get a copy of the clone + */ + append_parent_colocation(rsc->parent, rsc, all_coloc); + + if (prefer) { + pe_node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); + + if (local_prefer == NULL || local_prefer->weight < 0) { + pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id, + pe__node_name(prefer)); + return NULL; + } + } + + can_run_instance(rsc, NULL, limit); + + backup = pcmk__copy_node_table(rsc->allowed_nodes); + pe_rsc_trace(rsc, "Allocating instance %s", rsc->id); + chosen = rsc->cmds->assign(rsc, prefer); + if (chosen && prefer && (chosen->details != prefer->details)) { + crm_info("Not pre-allocating %s to %s because %s is better", + rsc->id, pe__node_name(prefer), pe__node_name(chosen)); + g_hash_table_destroy(rsc->allowed_nodes); + rsc->allowed_nodes = backup; + pcmk__unassign_resource(rsc); + chosen = NULL; + backup = NULL; + } + if (chosen) { + pe_node_t *local_node = pcmk__top_allowed_node(rsc, chosen); + + if (local_node) { + local_node->count++; + + } else if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { + /* what to do? we can't enforce per-node limits in this case */ + pcmk__config_err("%s not found in %s (list of %d)", + chosen->details->id, rsc->parent->id, + g_hash_table_size(rsc->parent->allowed_nodes)); + } + } + + if(backup) { + g_hash_table_destroy(backup); + } + return chosen; +} + +static void +append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all) +{ + + GList *gIter = NULL; + + gIter = rsc->rsc_cons; + for (; gIter != NULL; gIter = gIter->next) { + pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; + + if (all || cons->score < 0 || cons->score == INFINITY) { + pcmk__add_this_with(child, cons); + } + } + + gIter = rsc->rsc_cons_lhs; + for (; gIter != NULL; gIter = gIter->next) { + pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; + + if (!pcmk__colocation_has_influence(cons, child)) { + continue; + } + if (all || cons->score < 0) { + pcmk__add_with_this(child, cons); + } + } +} + +void +distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, + int max, int per_host_max, pe_working_set_t * data_set) +{ + int loop_max = 0; + int allocated = 0; + int available_nodes = 0; + bool all_coloc = false; + + /* count now tracks the number of clones currently allocated */ + for(GList *nIter = nodes; nIter != NULL; nIter = nIter->next) { + pe_node_t *node = nIter->data; + + node->count = 0; + if (pcmk__node_available(node, false, false)) { + available_nodes++; + } + } + + all_coloc = (max < available_nodes) ? true : false; + + if(available_nodes) { + loop_max = max / available_nodes; + } + if (loop_max < 1) { + loop_max = 1; + } + + pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)", + max, rsc->id, available_nodes, per_host_max, loop_max); + + /* Pre-allocate as many instances as we can to their current location */ + for (GList *gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) { + pe_resource_t *child = (pe_resource_t *) gIter->data; + pe_node_t *child_node = NULL; + pe_node_t *local_node = NULL; + + if ((child->running_on == NULL) + || !pcmk_is_set(child->flags, pe_rsc_provisional) + || pcmk_is_set(child->flags, pe_rsc_failed)) { + + continue; + } + + child_node = pe__current_node(child); + local_node = pcmk__top_allowed_node(child, child_node); + + pe_rsc_trace(rsc, + "Checking pre-allocation of %s to %s (%d remaining of %d)", + child->id, pe__node_name(child_node), max - allocated, + max); + + if (!pcmk__node_available(child_node, true, false)) { + pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s", + pe__node_name(child_node), child->id); + continue; + } + + if ((local_node != NULL) && (local_node->count >= loop_max)) { + pe_rsc_trace(rsc, + "Not pre-allocating because %s already allocated " + "optimal instances", pe__node_name(child_node)); + continue; + } + + if (allocate_instance(child, child_node, all_coloc, per_host_max, + data_set)) { + pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id, + pe__node_name(child_node)); + allocated++; + } + } + + pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max); + + for (GList *gIter = children; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child = (pe_resource_t *) gIter->data; + + if (child->running_on != NULL) { + pe_node_t *child_node = pe__current_node(child); + pe_node_t *local_node = pcmk__top_allowed_node(child, child_node); + + if (local_node == NULL) { + crm_err("%s is running on %s which isn't allowed", + child->id, pe__node_name(child_node)); + } + } + + if (!pcmk_is_set(child->flags, pe_rsc_provisional)) { + } else if (allocated >= max) { + pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max); + resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set); + } else { + if (allocate_instance(child, NULL, all_coloc, per_host_max, + data_set)) { + allocated++; + } + } + } + + pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d", + allocated, rsc->id, max); +} + +static void +clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting, + gboolean * active) +{ + GList *gIter = NULL; + + if (rsc->children) { + + gIter = rsc->children; + for (; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child = (pe_resource_t *) gIter->data; + + clone_update_pseudo_status(child, stopping, starting, active); + } + + return; + } + + CRM_ASSERT(active != NULL); + CRM_ASSERT(starting != NULL); + CRM_ASSERT(stopping != NULL); + + if (rsc->running_on) { + *active = TRUE; + } + + gIter = rsc->actions; + for (; gIter != NULL; gIter = gIter->next) { + pe_action_t *action = (pe_action_t *) gIter->data; + + if (*starting && *stopping) { + return; + + } else if (pcmk_is_set(action->flags, pe_action_optional)) { + pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid); + continue; + + } else if (!pcmk_any_flags_set(action->flags, + pe_action_pseudo|pe_action_runnable)) { + pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid); + continue; + + } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) { + pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid); + *stopping = TRUE; + + } else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) { + if (!pcmk_is_set(action->flags, pe_action_runnable)) { + pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d", + action->uuid, + pcmk_is_set(action->flags, pe_action_runnable), + pcmk_is_set(action->flags, pe_action_pseudo)); + } else { + pe_rsc_trace(rsc, "Starting due to: %s", action->uuid); + pe_rsc_trace(rsc, "%s run=%d, pseudo=%d", + action->uuid, + pcmk_is_set(action->flags, pe_action_runnable), + pcmk_is_set(action->flags, pe_action_pseudo)); + *starting = TRUE; + } + } + } +} + +void +clone_create_pseudo_actions(pe_resource_t *rsc, GList *children, + notify_data_t **start_notify, + notify_data_t **stop_notify) +{ + gboolean child_active = FALSE; + gboolean child_starting = FALSE; + gboolean child_stopping = FALSE; + gboolean allow_dependent_migrations = TRUE; + + pe_action_t *stop = NULL; + pe_action_t *stopped = NULL; + + pe_action_t *start = NULL; + pe_action_t *started = NULL; + + pe_rsc_trace(rsc, "Creating actions for %s", rsc->id); + + for (GList *gIter = children; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; + gboolean starting = FALSE; + gboolean stopping = FALSE; + + child_rsc->cmds->create_actions(child_rsc); + clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active); + if (stopping && starting) { + allow_dependent_migrations = FALSE; + } + + child_stopping |= stopping; + child_starting |= starting; + } + + /* start */ + start = pe__new_rsc_pseudo_action(rsc, RSC_START, !child_starting, true); + started = pe__new_rsc_pseudo_action(rsc, RSC_STARTED, !child_starting, + false); + started->priority = INFINITY; + + if (child_active || child_starting) { + pe__set_action_flags(started, pe_action_runnable); + } + + if (start_notify != NULL && *start_notify == NULL) { + *start_notify = pe__clone_notif_pseudo_ops(rsc, RSC_START, start, + started); + } + + /* stop */ + stop = pe__new_rsc_pseudo_action(rsc, RSC_STOP, !child_stopping, true); + stopped = pe__new_rsc_pseudo_action(rsc, RSC_STOPPED, !child_stopping, + true); + stopped->priority = INFINITY; + if (allow_dependent_migrations) { + pe__set_action_flags(stop, pe_action_migrate_runnable); + } + + if (stop_notify != NULL && *stop_notify == NULL) { + *stop_notify = pe__clone_notif_pseudo_ops(rsc, RSC_STOP, stop, stopped); + + if (start_notify && *start_notify && *stop_notify) { + order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional); + } + } +} + +gboolean +is_child_compatible(const pe_resource_t *child_rsc, const pe_node_t *local_node, + enum rsc_role_e filter, gboolean current) +{ + pe_node_t *node = NULL; + enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); + + CRM_CHECK(child_rsc && local_node, return FALSE); + if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { + /* We only want instances that haven't failed */ + node = child_rsc->fns->location(child_rsc, NULL, current); + } + + if (filter != RSC_ROLE_UNKNOWN && next_role != filter) { + crm_trace("Filtered %s", child_rsc->id); + return FALSE; + } + + if (node && (node->details == local_node->details)) { + return TRUE; + + } else if (node) { + crm_trace("%s - %s vs %s", child_rsc->id, pe__node_name(node), + pe__node_name(local_node)); + + } else { + crm_trace("%s - not allocated %d", child_rsc->id, current); + } + return FALSE; +} + +pe_resource_t * +find_compatible_child(const pe_resource_t *local_child, + const pe_resource_t *rsc, enum rsc_role_e filter, + gboolean current) +{ + pe_resource_t *pair = NULL; + GList *gIter = NULL; + GList *scratch = NULL; + pe_node_t *local_node = NULL; + + local_node = local_child->fns->location(local_child, NULL, current); + if (local_node) { + return find_compatible_child_by_node(local_child, local_node, rsc, filter, current); + } + + scratch = g_hash_table_get_values(local_child->allowed_nodes); + scratch = pcmk__sort_nodes(scratch, NULL); + + gIter = scratch; + for (; gIter != NULL; gIter = gIter->next) { + pe_node_t *node = (pe_node_t *) gIter->data; + + pair = find_compatible_child_by_node(local_child, node, rsc, filter, current); + if (pair) { + goto done; + } + } + + pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id); + done: + g_list_free(scratch); + return pair; +} + +enum action_tasks +clone_child_action(pe_action_t * action) +{ + enum action_tasks result = no_action; + pe_resource_t *child = (pe_resource_t *) action->rsc->children->data; + + if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) { + + /* Find the action we're notifying about instead */ + + int stop = 0; + char *key = action->uuid; + int lpc = strlen(key); + + for (; lpc > 0; lpc--) { + if (key[lpc] == '_' && stop == 0) { + stop = lpc; + + } else if (key[lpc] == '_') { + char *task_mutable = NULL; + + lpc++; + task_mutable = strdup(key + lpc); + task_mutable[stop - lpc] = 0; + + crm_trace("Extracted action '%s' from '%s'", task_mutable, key); + result = get_complex_task(child, task_mutable, TRUE); + free(task_mutable); + break; + } + } + + } else { + result = get_complex_task(child, action->task, TRUE); + } + return result; +} + +#define pe__clear_action_summary_flags(flags, action, flag) do { \ + flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ + "Action summary", action->rsc->id, \ + flags, flag, #flag); \ + } while (0) + +enum pe_action_flags +summary_action_flags(pe_action_t *action, GList *children, + const pe_node_t *node) +{ + GList *gIter = NULL; + gboolean any_runnable = FALSE; + gboolean check_runnable = TRUE; + enum action_tasks task = clone_child_action(action); + enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); + const char *task_s = task2text(task); + + for (gIter = children; gIter != NULL; gIter = gIter->next) { + pe_action_t *child_action = NULL; + pe_resource_t *child = (pe_resource_t *) gIter->data; + + child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node); + pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id, + pe__node_name(node), child_action?child_action->uuid:"NA"); + if (child_action) { + enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); + + if (pcmk_is_set(flags, pe_action_optional) + && !pcmk_is_set(child_flags, pe_action_optional)) { + pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid, + child_action->uuid); + pe__clear_action_summary_flags(flags, action, pe_action_optional); + pe__clear_action_flags(action, pe_action_optional); + } + if (pcmk_is_set(child_flags, pe_action_runnable)) { + any_runnable = TRUE; + } + } + } + + if (check_runnable && any_runnable == FALSE) { + pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid); + pe__clear_action_summary_flags(flags, action, pe_action_runnable); + if (node == NULL) { + pe__clear_action_flags(action, pe_action_runnable); + } + } + + return flags; +}