Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/include/pcmki/pcmki_sched_allocate.h b/include/pcmki/pcmki_sched_allocate.h
index baee7008b4..5b5a4f8425 100644
--- a/include/pcmki/pcmki_sched_allocate.h
+++ b/include/pcmki/pcmki_sched_allocate.h
@@ -1,82 +1,81 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__PCMKI_PCMKI_SCHED_ALLOCATE__H
# define PCMK__PCMKI_PCMKI_SCHED_ALLOCATE__H
# include <glib.h>
# include <crm/common/xml.h>
# include <crm/pengine/status.h>
# include <crm/pengine/complex.h>
# include <crm/common/xml_internal.h>
# include <crm/pengine/internal.h>
# include <crm/common/xml.h>
# include <pcmki/pcmki_scheduler.h>
-void native_internal_constraints(pe_resource_t *rsc);
extern enum pe_action_flags native_action_flags(pe_action_t * action, pe_node_t * node);
void native_rsc_location(pe_resource_t *rsc, pe__location_t *constraint);
extern void native_append_meta(pe_resource_t * rsc, xmlNode * xml);
void pcmk__primitive_add_utilization(pe_resource_t *rsc,
pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization);
void pcmk__primitive_shutdown_lock(pe_resource_t *rsc);
pe_node_t *pcmk__group_allocate(pe_resource_t *rsc, pe_node_t *prefer);
void group_create_actions(pe_resource_t *rsc);
void group_internal_constraints(pe_resource_t *rsc);
extern enum pe_action_flags group_action_flags(pe_action_t * action, pe_node_t * node);
void group_rsc_location(pe_resource_t *rsc, pe__location_t *constraint);
extern void group_append_meta(pe_resource_t * rsc, xmlNode * xml);
void pcmk__group_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
void pcmk__group_shutdown_lock(pe_resource_t *rsc);
pe_node_t *pcmk__bundle_allocate(pe_resource_t *rsc, pe_node_t *prefer);
void pcmk__bundle_create_actions(pe_resource_t *rsc);
bool pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node);
void pcmk__bundle_internal_constraints(pe_resource_t *rsc);
void pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint);
enum pe_action_flags pcmk__bundle_action_flags(pe_action_t *action,
pe_node_t *node);
void pcmk__bundle_expand(pe_resource_t *rsc);
void pcmk__bundle_append_meta(pe_resource_t *rsc, xmlNode *xml);
void pcmk__bundle_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
void pcmk__bundle_shutdown_lock(pe_resource_t *rsc);
pe_node_t *pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *prefer);
void clone_create_actions(pe_resource_t *rsc);
void clone_internal_constraints(pe_resource_t *rsc);
void clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint);
extern enum pe_action_flags clone_action_flags(pe_action_t * action, pe_node_t * node);
void clone_expand(pe_resource_t *rsc);
bool clone_create_probe(pe_resource_t *rsc, pe_node_t *node);
extern void clone_append_meta(pe_resource_t * rsc, xmlNode * xml);
void pcmk__clone_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
void pcmk__clone_shutdown_lock(pe_resource_t *rsc);
void pcmk__add_promotion_scores(pe_resource_t *rsc);
uint32_t group_update_actions(pe_action_t *first, pe_action_t *then,
pe_node_t *node, uint32_t flags, uint32_t filter,
uint32_t type, pe_working_set_t *data_set);
uint32_t pcmk__multi_update_actions(pe_action_t *first,
pe_action_t *then,
pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set);
void pcmk__log_transition_summary(const char *filename);
void clone_create_pseudo_actions(pe_resource_t *rsc, GList *children,
notify_data_t **start_notify,
notify_data_t **stop_notify);
#endif
diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h
index 8134eb8475..5ab41cfa68 100644
--- a/lib/pacemaker/libpacemaker_private.h
+++ b/lib/pacemaker/libpacemaker_private.h
@@ -1,741 +1,750 @@
/*
* Copyright 2021-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__LIBPACEMAKER_PRIVATE__H
# define PCMK__LIBPACEMAKER_PRIVATE__H
/* This header is for the sole use of libpacemaker, so that functions can be
* declared with G_GNUC_INTERNAL for efficiency.
*/
#include <crm/pengine/pe_types.h> // pe_action_t, pe_node_t, pe_working_set_t
// Flags to modify the behavior of the add_colocated_node_scores() method
enum pcmk__coloc_select {
// With no other flags, apply all "with this" colocations
pcmk__coloc_select_default = 0,
// Apply "this with" colocations instead of "with this" colocations
pcmk__coloc_select_this_with = (1 << 0),
// Apply only colocations with non-negative scores
pcmk__coloc_select_nonnegative = (1 << 1),
// Apply only colocations with at least one matching node
pcmk__coloc_select_active = (1 << 2),
};
// Flags the update_ordered_actions() method can return
enum pcmk__updated {
pcmk__updated_none = 0, // Nothing changed
pcmk__updated_first = (1 << 0), // First action was updated
pcmk__updated_then = (1 << 1), // Then action was updated
};
#define pcmk__set_updated_flags(au_flags, action, flags_to_set) do { \
au_flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Action update", \
(action)->uuid, au_flags, \
(flags_to_set), #flags_to_set); \
} while (0)
#define pcmk__clear_updated_flags(au_flags, action, flags_to_clear) do { \
au_flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Action update", \
(action)->uuid, au_flags, \
(flags_to_clear), #flags_to_clear); \
} while (0)
// Resource allocation methods
struct resource_alloc_functions_s {
/*!
* \internal
* \brief Assign a resource to a node
*
* \param[in] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*/
pe_node_t *(*assign)(pe_resource_t *rsc, pe_node_t *prefer);
/*!
* \internal
* \brief Create all actions needed for a given resource
*
* \param[in,out] rsc Resource to create actions for
*/
void (*create_actions)(pe_resource_t *rsc);
/*!
* \internal
* \brief Schedule any probes needed for a resource on a node
*
* \param[in] rsc Resource to create probe for
* \param[in] node Node to create probe on
*
* \return true if any probe was created, otherwise false
*/
bool (*create_probe)(pe_resource_t *rsc, pe_node_t *node);
+ /*!
+ * \internal
+ * \brief Create implicit constraints needed for a resource
+ *
+ * \param[in,out] rsc Resource to create implicit constraints for
+ */
void (*internal_constraints)(pe_resource_t *rsc);
/*!
* \internal
* \brief Apply a colocation's score to node weights or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node weights (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void (*apply_coloc_score) (pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent);
/*!
* \internal
* \brief Update nodes with scores of colocated resources' nodes
*
* Given a table of nodes and a resource, update the nodes' scores with the
* scores of the best nodes matching the attribute used for each of the
* resource's relevant colocations.
*
* \param[in,out] rsc Resource to check colocations for
* \param[in] log_id Resource ID to use in logs (if NULL, use rsc ID)
* \param[in,out] nodes Nodes to update
* \param[in] attr Colocation attribute (NULL to use default)
* \param[in] factor Incorporate scores multiplied by this factor
* \param[in] flags Bitmask of enum pcmk__coloc_select values
*
* \note The caller remains responsible for freeing \p *nodes.
*/
void (*add_colocated_node_scores)(pe_resource_t *rsc, const char *log_id,
GHashTable **nodes, const char *attr,
float factor,
enum pcmk__coloc_select flags);
/*!
* \internal
* \brief Create list of all resources in colocations with a given resource
*
* Given a resource, create a list of all resources involved in mandatory
* colocations with it, whether directly or indirectly via chained colocations.
*
* \param[in] rsc Resource to add to colocated list
* \param[in] orig_rsc Resource originally requested
* \param[in] colocated_rscs Existing list
*
* \return List of given resource and all resources involved in colocations
*
* \note This function is recursive; top-level callers should pass NULL as
* \p colocated_rscs and \p orig_rsc, and the desired resource as
* \p rsc. The recursive calls will use other values.
*/
GList *(*colocated_resources)(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs);
void (*rsc_location) (pe_resource_t *, pe__location_t *);
enum pe_action_flags (*action_flags) (pe_action_t *, pe_node_t *);
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two actions, update the actions'
* flags (and runnable_before members if appropriate) as appropriate for the
* ordering. In some cases, the ordering could be disabled as well.
*
* \param[in] first 'First' action in an ordering
* \param[in] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t (*update_ordered_actions)(pe_action_t *first, pe_action_t *then,
pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set);
void (*output_actions)(pe_resource_t *rsc);
/*!
* \internal
* \brief Add a resource's actions to the transition graph
*
* \param[in] rsc Resource whose actions should be added
*/
void (*add_actions_to_graph)(pe_resource_t *rsc);
void (*append_meta) (pe_resource_t * rsc, xmlNode * xml);
/*!
* \internal
* \brief Add a resource's utilization to a table of utilization values
*
* This function is used when summing the utilization of a resource and all
* resources colocated with it, to determine whether a node has sufficient
* capacity. Given a resource and a table of utilization values, it will add
* the resource's utilization to the existing values, if the resource has
* not yet been allocated to a node.
*
* \param[in] rsc Resource with utilization to add
* \param[in] orig_rsc Resource being allocated (for logging only)
* \param[in] all_rscs List of all resources that will be summed
* \param[in] utilization Table of utilization values to add to
*/
void (*add_utilization)(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
/*!
* \internal
* \brief Apply a shutdown lock for a resource, if appropriate
*
* \param[in] rsc Resource to check for shutdown lock
*/
void (*shutdown_lock)(pe_resource_t *rsc);
};
// Actions (pcmk_sched_actions.c)
G_GNUC_INTERNAL
void pcmk__update_action_for_orderings(pe_action_t *action,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
uint32_t pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then,
pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__log_action(const char *pre_text, pe_action_t *action, bool details);
G_GNUC_INTERNAL
pe_action_t *pcmk__new_cancel_action(pe_resource_t *rsc, const char *name,
guint interval_ms, const pe_node_t *node);
G_GNUC_INTERNAL
pe_action_t *pcmk__new_shutdown_action(pe_node_t *node);
G_GNUC_INTERNAL
bool pcmk__action_locks_rsc_to_node(const pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__deduplicate_action_inputs(pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__output_actions(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node,
xmlNode *xml_op);
G_GNUC_INTERNAL
void pcmk__handle_rsc_config_changes(pe_working_set_t *data_set);
// Recurring actions (pcmk_sched_recurring.c)
G_GNUC_INTERNAL
void pcmk__create_recurring_actions(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__schedule_cancel(pe_resource_t *rsc, const char *call_id,
const char *task, guint interval_ms,
const pe_node_t *node, const char *reason);
G_GNUC_INTERNAL
void pcmk__reschedule_recurring(pe_resource_t *rsc, const char *task,
guint interval_ms, pe_node_t *node);
G_GNUC_INTERNAL
bool pcmk__action_is_recurring(const pe_action_t *action);
// Producing transition graphs (pcmk_graph_producer.c)
G_GNUC_INTERNAL
bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action,
pe_action_wrapper_t *input);
G_GNUC_INTERNAL
void pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__create_graph(pe_working_set_t *data_set);
// Fencing (pcmk_sched_fencing.c)
G_GNUC_INTERNAL
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *action, enum pe_ordering order);
G_GNUC_INTERNAL
void pcmk__fence_guest(pe_node_t *node);
G_GNUC_INTERNAL
bool pcmk__node_unfenced(pe_node_t *node);
// Injected scheduler inputs (pcmk_sched_injections.c)
void pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib,
pcmk_injections_t *injections);
// Constraints of any type (pcmk_sched_constraints.c)
G_GNUC_INTERNAL
pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id);
G_GNUC_INTERNAL
xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__valid_resource_or_tag(pe_working_set_t *data_set, const char *id,
pe_resource_t **rsc, pe_tag_t **tag);
G_GNUC_INTERNAL
bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr,
bool convert_rsc, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__create_internal_constraints(pe_working_set_t *data_set);
// Location constraints
G_GNUC_INTERNAL
void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc,
int node_weight, const char *discover_mode,
pe_node_t *foo_node,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_locations(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_location(pe__location_t *constraint, pe_resource_t *rsc);
// Colocation constraints (pcmk_sched_colocation.c)
enum pcmk__coloc_affects {
pcmk__coloc_affects_nothing = 0,
pcmk__coloc_affects_location,
pcmk__coloc_affects_role,
};
G_GNUC_INTERNAL
enum pcmk__coloc_affects pcmk__colocation_affects(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint,
bool preview);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_weights(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_priority(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint);
G_GNUC_INTERNAL
void pcmk__add_colocated_node_scores(pe_resource_t *rsc, const char *log_id,
GHashTable **nodes, const char *attr,
float factor, uint32_t flags);
G_GNUC_INTERNAL
void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__new_colocation(const char *id, const char *node_attr, int score,
pe_resource_t *dependent, pe_resource_t *primary,
const char *dependent_role, const char *primary_role,
bool influence, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__block_colocated_starts(pe_action_t *action,
pe_working_set_t *data_set);
/*!
* \internal
* \brief Check whether colocation's dependent preferences should be considered
*
* \param[in] colocation Colocation constraint
* \param[in] rsc Primary instance (normally this will be
* colocation->primary, which NULL will be treated as,
* but for clones or bundles with multiple instances
* this can be a particular instance)
*
* \return true if colocation influence should be effective, otherwise false
*/
static inline bool
pcmk__colocation_has_influence(const pcmk__colocation_t *colocation,
const pe_resource_t *rsc)
{
if (rsc == NULL) {
rsc = colocation->primary;
}
/* A bundle replica colocates its remote connection with its container,
* using a finite score so that the container can run on Pacemaker Remote
* nodes.
*
* Moving a connection is lightweight and does not interrupt the service,
* while moving a container is heavyweight and does interrupt the service,
* so don't move a clean, active container based solely on the preferences
* of its connection.
*
* This also avoids problematic scenarios where two containers want to
* perpetually swap places.
*/
if (pcmk_is_set(colocation->dependent->flags, pe_rsc_allow_remote_remotes)
&& !pcmk_is_set(rsc->flags, pe_rsc_failed)
&& pcmk__list_of_1(rsc->running_on)) {
return false;
}
/* The dependent in a colocation influences the primary's location
* if the influence option is true or the primary is not yet active.
*/
return colocation->influence || (rsc->running_on == NULL);
}
// Ordering constraints (pcmk_sched_ordering.c)
G_GNUC_INTERNAL
void pcmk__new_ordering(pe_resource_t *first_rsc, char *first_task,
pe_action_t *first_action, pe_resource_t *then_rsc,
char *then_task, pe_action_t *then_action,
enum pe_ordering type, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__disable_invalid_orderings(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_stops_before_shutdown(pe_node_t *node,
pe_action_t *shutdown_op);
G_GNUC_INTERNAL
void pcmk__apply_orderings(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_after_each(pe_action_t *after, GList *list);
/*!
* \internal
* \brief Create a new ordering between two resource actions
*
* \param[in] first_rsc Resource for 'first' action
* \param[in] then_rsc Resource for 'then' action
* \param[in] first_task Action key for 'first' action
* \param[in] then_task Action key for 'then' action
* \param[in] flags Bitmask of enum pe_ordering flags
* \param[in] data_set Cluster working set to add ordering to
*/
#define pcmk__order_resource_actions(first_rsc, first_task, \
then_rsc, then_task, flags) \
pcmk__new_ordering((first_rsc), \
pcmk__op_key((first_rsc)->id, (first_task), 0), \
NULL, \
(then_rsc), \
pcmk__op_key((then_rsc)->id, (then_task), 0), \
NULL, (flags), (first_rsc)->cluster)
#define pcmk__order_starts(rsc1, rsc2, type) \
pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \
(rsc2), CRMD_ACTION_START, (type))
#define pcmk__order_stops(rsc1, rsc2, type) \
pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \
(rsc2), CRMD_ACTION_STOP, (type))
// Ticket constraints (pcmk_sched_tickets.c)
G_GNUC_INTERNAL
void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set);
// Promotable clone resources (pcmk_sched_promotable.c)
G_GNUC_INTERNAL
void pcmk__require_promotion_tickets(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__set_instance_roles(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__create_promotable_actions(pe_resource_t *clone);
G_GNUC_INTERNAL
void pcmk__promotable_restart_ordering(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__order_promotable_instances(pe_resource_t *clone);
G_GNUC_INTERNAL
void pcmk__update_dependent_with_promotable(pe_resource_t *primary,
pe_resource_t *dependent,
pcmk__colocation_t *colocation);
G_GNUC_INTERNAL
void pcmk__update_promotable_dependent_priority(pe_resource_t *primary,
pe_resource_t *dependent,
pcmk__colocation_t *colocation);
// Pacemaker Remote nodes (pcmk_sched_remote.c)
G_GNUC_INTERNAL
bool pcmk__is_failed_remote_node(pe_node_t *node);
G_GNUC_INTERNAL
void pcmk__order_remote_connection_actions(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__rsc_corresponds_to_guest(pe_resource_t *rsc, pe_node_t *node);
G_GNUC_INTERNAL
pe_node_t *pcmk__connection_host_for_action(pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params);
G_GNUC_INTERNAL
void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, pe_action_t *action);
// Primitives (pcmk_sched_primitive.c)
G_GNUC_INTERNAL
pe_node_t *pcmk__primitive_assign(pe_resource_t *rsc, pe_node_t *prefer);
G_GNUC_INTERNAL
void pcmk__primitive_create_actions(pe_resource_t *rsc);
+G_GNUC_INTERNAL
+void pcmk__primitive_internal_constraints(pe_resource_t *rsc);
+
G_GNUC_INTERNAL
void pcmk__primitive_apply_coloc_score(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent);
// Groups (pcmk_sched_group.c)
G_GNUC_INTERNAL
void pcmk__group_apply_coloc_score(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent);
G_GNUC_INTERNAL
void pcmk__group_add_colocated_node_scores(pe_resource_t *rsc,
const char *log_id,
GHashTable **nodes, const char *attr,
float factor, uint32_t flags);
G_GNUC_INTERNAL
GList *pcmk__group_colocated_resources(pe_resource_t *rsc,
pe_resource_t *orig_rsc,
GList *colocated_rscs);
// Clones (pcmk_sched_clone.c)
G_GNUC_INTERNAL
void pcmk__clone_apply_coloc_score(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent);
// Bundles (pcmk_sched_bundle.c)
G_GNUC_INTERNAL
void pcmk__bundle_apply_coloc_score(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent);
G_GNUC_INTERNAL
void pcmk__output_bundle_actions(pe_resource_t *rsc);
// Injections (pcmk_injections.c)
G_GNUC_INTERNAL
xmlNode *pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_node_state_change(cib_t *cib_conn, const char *node,
bool up);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node,
const char *resource,
const char *lrm_name,
const char *rclass,
const char *rtype,
const char *rprovider);
G_GNUC_INTERNAL
void pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node,
const char *resource, const char *task,
guint interval_ms, int rc);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_action_result(xmlNode *cib_resource,
lrmd_event_data_t *op, int target_rc);
// Nodes (pcmk_sched_nodes.c)
G_GNUC_INTERNAL
bool pcmk__node_available(const pe_node_t *node, bool consider_score,
bool consider_guest);
G_GNUC_INTERNAL
bool pcmk__any_node_available(GHashTable *nodes);
G_GNUC_INTERNAL
GHashTable *pcmk__copy_node_table(GHashTable *nodes);
G_GNUC_INTERNAL
GList *pcmk__sort_nodes(GList *nodes, pe_node_t *active_node);
G_GNUC_INTERNAL
void pcmk__apply_node_health(pe_working_set_t *data_set);
G_GNUC_INTERNAL
pe_node_t *pcmk__top_allowed_node(const pe_resource_t *rsc,
const pe_node_t *node);
// Functions applying to more than one variant (pcmk_sched_resource.c)
G_GNUC_INTERNAL
void pcmk__set_allocation_methods(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node,
const xmlNode *rsc_entry, bool active_on_node);
G_GNUC_INTERNAL
GList *pcmk__rscs_matching_id(const char *id, pe_working_set_t *data_set);
G_GNUC_INTERNAL
GList *pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs);
G_GNUC_INTERNAL
void pcmk__output_resource_actions(pe_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force);
G_GNUC_INTERNAL
bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force);
G_GNUC_INTERNAL
void pcmk__unassign_resource(pe_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node,
pe_resource_t **failed);
G_GNUC_INTERNAL
void pcmk__sort_resources(pe_working_set_t *data_set);
G_GNUC_INTERNAL
gint pcmk__cmp_instance(gconstpointer a, gconstpointer b);
G_GNUC_INTERNAL
gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b);
// Functions related to probes (pcmk_sched_probes.c)
G_GNUC_INTERNAL
bool pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node);
G_GNUC_INTERNAL
void pcmk__order_probes(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__probe_resource_list(GList *rscs, pe_node_t *node);
G_GNUC_INTERNAL
void pcmk__schedule_probes(pe_working_set_t *data_set);
// Functions related to live migration (pcmk_sched_migration.c)
void pcmk__create_migration_actions(pe_resource_t *rsc,
const pe_node_t *current);
void pcmk__abort_dangling_migration(void *data, void *user_data);
bool pcmk__rsc_can_migrate(const pe_resource_t *rsc, const pe_node_t *current);
void pcmk__order_migration_equivalents(pe__ordering_t *order);
// Functions related to node utilization (pcmk_sched_utilization.c)
G_GNUC_INTERNAL
int pcmk__compare_node_capacities(const pe_node_t *node1,
const pe_node_t *node2);
G_GNUC_INTERNAL
void pcmk__consume_node_capacity(GHashTable *current_utilization,
pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__release_node_capacity(GHashTable *current_utilization,
pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__ban_insufficient_capacity(pe_resource_t *rsc, pe_node_t **prefer);
G_GNUC_INTERNAL
void pcmk__create_utilization_constraints(pe_resource_t *rsc,
GList *allowed_nodes);
G_GNUC_INTERNAL
void pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set);
#endif // PCMK__LIBPACEMAKER_PRIVATE__H
diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c
index 5bb7444a0d..f2bb58082a 100644
--- a/lib/pacemaker/pcmk_sched_primitive.c
+++ b/lib/pacemaker/pcmk_sched_primitive.c
@@ -1,1428 +1,1435 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
gboolean DeleteRsc(pe_resource_t *rsc, const pe_node_t *node, gboolean optional,
pe_working_set_t *data_set);
static bool StopRsc(pe_resource_t *rsc, pe_node_t *next, bool optional);
static bool StartRsc(pe_resource_t *rsc, pe_node_t *next, bool optional);
static bool DemoteRsc(pe_resource_t *rsc, pe_node_t *next, bool optional);
static bool PromoteRsc(pe_resource_t *rsc, pe_node_t *next, bool optional);
static bool RoleError(pe_resource_t *rsc, pe_node_t *next, bool optional);
static bool NullOp(pe_resource_t *rsc, pe_node_t *next, bool optional);
/* This array says what the *next* role should be when transitioning from one
* role to another. For example going from Stopped to Promoted, the next role is
* RSC_ROLE_UNPROMOTED, because the resource must be started before being promoted.
* The current state then becomes Started, which is fed into this array again,
* giving a next role of RSC_ROLE_PROMOTED.
*/
static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* Current state Next state*/
/* Unknown Stopped Started Unpromoted Promoted */
/* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED },
/* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_UNPROMOTED },
/* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_PROMOTED },
/* Unpromoted */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_UNPROMOTED, RSC_ROLE_PROMOTED },
/* Promoted */ { RSC_ROLE_STOPPED, RSC_ROLE_UNPROMOTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_PROMOTED },
};
typedef bool (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *next,
bool optional);
// This array picks the function needed to transition from one role to another
static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* Current state Next state */
/* Unknown Stopped Started Unpromoted Promoted */
/* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, },
/* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, },
/* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, },
/* Unpromoted */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, },
/* Promoted */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp, },
};
/*!
* \internal
* \brief Get a list of a resource's allowed nodes sorted by node weight
*
* \param[in] rsc Resource to check
*
* \return List of allowed nodes sorted by node weight
*/
static GList *
sorted_allowed_nodes(const pe_resource_t *rsc)
{
if (rsc->allowed_nodes != NULL) {
GList *nodes = g_hash_table_get_values(rsc->allowed_nodes);
if (nodes != NULL) {
return pcmk__sort_nodes(nodes, pe__current_node(rsc));
}
}
return NULL;
}
/*!
* \internal
* \brief Assign a resource to its best allowed node, if possible
*
* \param[in] rsc Resource to choose a node for
* \param[in] prefer If not NULL, prefer this node when all else equal
*
* \return true if \p rsc could be assigned to a node, otherwise false
*/
static bool
assign_best_node(pe_resource_t *rsc, pe_node_t *prefer)
{
GList *nodes = NULL;
pe_node_t *chosen = NULL;
pe_node_t *best = NULL;
bool result = false;
pcmk__ban_insufficient_capacity(rsc, &prefer);
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
// We've already finished assignment of resources to nodes
return rsc->allocated_to != NULL;
}
// Sort allowed nodes by weight
nodes = sorted_allowed_nodes(rsc);
if (nodes != NULL) {
best = (pe_node_t *) nodes->data; // First node has best score
}
if ((prefer != NULL) && (nodes != NULL)) {
// Get the allowed node version of prefer
chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
if (chosen == NULL) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unknown",
pe__node_name(prefer), rsc->id);
/* Favor the preferred node as long as its weight is at least as good as
* the best allowed node's.
*
* An alternative would be to favor the preferred node even if the best
* node is better, when the best node's weight is less than INFINITY.
*/
} else if (chosen->weight < best->weight) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
pe__node_name(chosen), rsc->id);
chosen = NULL;
} else if (!pcmk__node_available(chosen, true, false)) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable",
pe__node_name(chosen), rsc->id);
chosen = NULL;
} else {
pe_rsc_trace(rsc,
"Chose preferred node %s for %s (ignoring %d candidates)",
pe__node_name(chosen), rsc->id, g_list_length(nodes));
}
}
if ((chosen == NULL) && (best != NULL)) {
/* Either there is no preferred node, or the preferred node is not
* suitable, but another node is allowed to run the resource.
*/
chosen = best;
if (!pe_rsc_is_unique_clone(rsc->parent)
&& (chosen->weight > 0) // Zero not acceptable
&& pcmk__node_available(chosen, false, false)) {
/* If the resource is already running on a node, prefer that node if
* it is just as good as the chosen node.
*
* We don't do this for unique clone instances, because
* distribute_children() has already assigned instances to their
* running nodes when appropriate, and if we get here, we don't want
* remaining unassigned instances to prefer a node that's already
* running another instance.
*/
pe_node_t *running = pe__current_node(rsc);
if (running == NULL) {
// Nothing to do
} else if (!pcmk__node_available(running, true, false)) {
pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources",
rsc->id, pe__node_name(running));
} else {
int nodes_with_best_score = 1;
for (GList *iter = nodes->next; iter; iter = iter->next) {
pe_node_t *allowed = (pe_node_t *) iter->data;
if (allowed->weight != chosen->weight) {
// The nodes are sorted by weight, so no more are equal
break;
}
if (allowed->details == running->details) {
// Scores are equal, so prefer the current node
chosen = allowed;
}
nodes_with_best_score++;
}
if (nodes_with_best_score > 1) {
do_crm_log(((chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO),
"Chose %s for %s from %d nodes with score %s",
pe__node_name(chosen), rsc->id,
nodes_with_best_score,
pcmk_readable_score(chosen->weight));
}
}
}
pe_rsc_trace(rsc, "Chose %s for %s from %d candidates",
pe__node_name(chosen), rsc->id, g_list_length(nodes));
}
result = pcmk__assign_primitive(rsc, chosen, false);
g_list_free(nodes);
return result;
}
/*!
* \internal
* \brief Apply a "this with" colocation to a node's allowed node scores
*
* \param[in] data Colocation to apply
* \param[in] user_data Resource being assigned
*/
static void
apply_this_with(void *data, void *user_data)
{
pcmk__colocation_t *colocation = (pcmk__colocation_t *) data;
pe_resource_t *rsc = (pe_resource_t *) user_data;
GHashTable *archive = NULL;
pe_resource_t *other = colocation->primary;
// In certain cases, we will need to revert the node scores
if ((colocation->dependent_role >= RSC_ROLE_PROMOTED)
|| ((colocation->score < 0) && (colocation->score > -INFINITY))) {
archive = pcmk__copy_node_table(rsc->allowed_nodes);
}
pe_rsc_trace(rsc,
"%s: Assigning colocation %s primary %s first"
"(score=%d role=%s)",
rsc->id, colocation->id, other->id,
colocation->score, role2text(colocation->dependent_role));
other->cmds->assign(other, NULL);
// Apply the colocation score to this resource's allowed node scores
rsc->cmds->apply_coloc_score(rsc, other, colocation, true);
if ((archive != NULL)
&& !pcmk__any_node_available(rsc->allowed_nodes)) {
pe_rsc_info(rsc,
"%s: Reverting scores from colocation with %s "
"because no nodes allowed",
rsc->id, other->id);
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = archive;
archive = NULL;
}
if (archive != NULL) {
g_hash_table_destroy(archive);
}
}
/*!
* \internal
* \brief Apply a "with this" colocation to a node's allowed node scores
*
* \param[in] data Colocation to apply
* \param[in] user_data Resource being assigned
*/
static void
apply_with_this(void *data, void *user_data)
{
pcmk__colocation_t *colocation = (pcmk__colocation_t *) data;
pe_resource_t *rsc = (pe_resource_t *) user_data;
pe_resource_t *other = colocation->dependent;
const float factor = colocation->score / (float) INFINITY;
if (!pcmk__colocation_has_influence(colocation, NULL)) {
return;
}
pe_rsc_trace(rsc,
"%s: Incorporating attenuated %s assignment scores due "
"to colocation %s", rsc->id, other->id, colocation->id);
other->cmds->add_colocated_node_scores(other, rsc->id,
&rsc->allowed_nodes,
colocation->node_attribute,
factor, pcmk__coloc_select_active);
}
/*!
* \internal
* \brief Update a Pacemaker Remote node once its connection has been assigned
*
* \param[in] connection Connection resource that has been assigned
*/
static void
remote_connection_assigned(pe_resource_t *connection)
{
pe_node_t *remote_node = pe_find_node(connection->cluster->nodes,
connection->id);
CRM_CHECK(remote_node != NULL, return);
if ((connection->allocated_to != NULL)
&& (connection->next_role != RSC_ROLE_STOPPED)) {
crm_trace("Pacemaker Remote node %s will be online",
remote_node->details->id);
remote_node->details->online = TRUE;
if (remote_node->details->unseen) {
// Avoid unnecessary fence, since we will attempt connection
remote_node->details->unclean = FALSE;
}
} else {
crm_trace("Pacemaker Remote node %s will be shut down "
"(%sassigned connection's next role is %s)",
remote_node->details->id,
((connection->allocated_to == NULL)? "un" : ""),
role2text(connection->next_role));
remote_node->details->shutdown = TRUE;
}
}
/*!
* \internal
* \brief Assign a primitive resource to a node
*
* \param[in] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*/
pe_node_t *
pcmk__primitive_assign(pe_resource_t *rsc, pe_node_t *prefer)
{
CRM_ASSERT(rsc != NULL);
// Never assign a child without parent being assigned first
if ((rsc->parent != NULL)
&& !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "%s: Assigning parent %s first",
rsc->id, rsc->parent->id);
rsc->parent->cmds->assign(rsc->parent, prefer);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return rsc->allocated_to; // Assignment has already been done
}
// Ensure we detect assignment loops
if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
return NULL;
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
pe__show_node_weights(true, rsc, "Pre-assignment", rsc->allowed_nodes,
rsc->cluster);
g_list_foreach(rsc->rsc_cons, apply_this_with, rsc);
pe__show_node_weights(true, rsc, "Post-this-with", rsc->allowed_nodes,
rsc->cluster);
g_list_foreach(rsc->rsc_cons_lhs, apply_with_this, rsc);
if (rsc->next_role == RSC_ROLE_STOPPED) {
pe_rsc_trace(rsc,
"Banning %s from all nodes because it will be stopped",
rsc->id);
resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE,
rsc->cluster);
} else if ((rsc->next_role > rsc->role)
&& !pcmk_is_set(rsc->cluster->flags, pe_flag_have_quorum)
&& (rsc->cluster->no_quorum_policy == no_quorum_freeze)) {
crm_notice("Resource %s cannot be elevated from %s to %s due to "
"no-quorum-policy=freeze",
rsc->id, role2text(rsc->role), role2text(rsc->next_role));
pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze");
}
pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
// Unmanage resource if fencing is enabled but no device is configured
if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)
&& !pcmk_is_set(rsc->cluster->flags, pe_flag_have_stonith_resource)) {
pe__clear_resource_flags(rsc, pe_rsc_managed);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
// Unmanaged resources stay on their current node
const char *reason = NULL;
pe_node_t *assign_to = NULL;
pe__set_next_role(rsc, rsc->role, "unmanaged");
assign_to = pe__current_node(rsc);
if (assign_to == NULL) {
reason = "inactive";
} else if (rsc->role == RSC_ROLE_PROMOTED) {
reason = "promoted";
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
reason = "failed";
} else {
reason = "active";
}
pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
(assign_to? assign_to->details->uname : "no node"), reason);
pcmk__assign_primitive(rsc, assign_to, true);
} else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stop_everything)) {
pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources", rsc->id);
pcmk__assign_primitive(rsc, NULL, true);
} else if (pcmk_is_set(rsc->flags, pe_rsc_provisional)
&& assign_best_node(rsc, prefer)) {
// Assignment successful
} else if (rsc->allocated_to == NULL) {
if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
} else if (rsc->running_on != NULL) {
pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id);
}
} else {
pe_rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id,
pe__node_name(rsc->allocated_to));
}
pe__clear_resource_flags(rsc, pe_rsc_allocating);
if (rsc->is_remote_node) {
remote_connection_assigned(rsc);
}
return rsc->allocated_to;
}
/*!
* \internal
* \brief Schedule actions to bring resource down and back to current role
*
* \param[in] rsc Resource to restart
* \param[in] current Node that resource should be brought down on
* \param[in] chosen Node that resource should be brought up on
* \param[in] need_stop Whether the resource must be stopped
* \param[in] need_promote Whether the resource must be promoted
*
* \return Role that resource would have after scheduled actions are taken
*/
static void
schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
pe_node_t *chosen, bool need_stop, bool need_promote)
{
enum rsc_role_e role = rsc->role;
enum rsc_role_e next_role;
pe__set_resource_flags(rsc, pe_rsc_restarting);
// Bring resource down to a stop on its current node
while (role != RSC_ROLE_STOPPED) {
next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
(need_stop? "required" : "optional"), rsc->id,
role2text(role), role2text(next_role));
if (!rsc_action_matrix[role][next_role](rsc, current, !need_stop)) {
break;
}
role = next_role;
}
// Bring resource up to its next role on its next node
while ((rsc->role <= rsc->next_role) && (role != rsc->role)
&& !pcmk_is_set(rsc->flags, pe_rsc_block)) {
bool required = need_stop;
next_role = rsc_state_matrix[role][rsc->role];
if ((next_role == RSC_ROLE_PROMOTED) && need_promote) {
required = true;
}
pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
(required? "required" : "optional"), rsc->id,
role2text(role), role2text(next_role));
if (!rsc_action_matrix[role][next_role](rsc, chosen, !required)) {
break;
}
role = next_role;
}
pe__clear_resource_flags(rsc, pe_rsc_restarting);
}
/*!
* \internal
* \brief If a resource's next role is not explicitly specified, set a default
*
* \param[in,out] rsc Resource to set next role for
*
* \return "explicit" if next role was explicitly set, otherwise "implicit"
*/
static const char *
set_default_next_role(pe_resource_t *rsc)
{
if (rsc->next_role != RSC_ROLE_UNKNOWN) {
return "explicit";
}
if (rsc->allocated_to == NULL) {
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "assignment");
} else {
pe__set_next_role(rsc, RSC_ROLE_STARTED, "assignment");
}
return "implicit";
}
/*!
* \internal
* \brief Create an action to represent an already pending start
*
* \param[in,out] rsc Resource to create start action for
*/
static void
create_pending_start(pe_resource_t *rsc)
{
pe_action_t *start = NULL;
pe_rsc_trace(rsc,
"Creating action for %s to represent already pending start",
rsc->id);
start = start_action(rsc, rsc->allocated_to, TRUE);
pe__set_action_flags(start, pe_action_print_always);
}
/*!
* \internal
* \brief Schedule actions needed to take a resource to its next role
*
* \param[in,out] rsc Resource to schedule actions for
*/
static void
schedule_role_transition_actions(pe_resource_t *rsc)
{
enum rsc_role_e role = rsc->role;
while (role != rsc->next_role) {
enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role];
pe_rsc_trace(rsc,
"Creating action to take %s from %s to %s (ending at %s)",
rsc->id, role2text(role), role2text(next_role),
role2text(rsc->next_role));
if (!rsc_action_matrix[role][next_role](rsc, rsc->allocated_to,
false)) {
break;
}
role = next_role;
}
}
/*!
* \internal
* \brief Create all actions needed for a given primitive resource
*
* \param[in,out] rsc Primitive resource to create actions for
*/
void
pcmk__primitive_create_actions(pe_resource_t *rsc)
{
bool need_stop = false;
bool need_promote = false;
bool is_moving = false;
bool allow_migrate = false;
bool multiply_active = false;
pe_node_t *current = NULL;
unsigned int num_all_active = 0;
unsigned int num_clean_active = 0;
const char *next_role_source = NULL;
CRM_ASSERT(rsc != NULL);
next_role_source = set_default_next_role(rsc);
pe_rsc_trace(rsc,
"Creating all actions for %s transition from %s to %s "
"(%s) on %s",
rsc->id, role2text(rsc->role), role2text(rsc->next_role),
next_role_source, pe__node_name(rsc->allocated_to));
current = pe__find_active_on(rsc, &num_all_active, &num_clean_active);
g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration,
rsc);
if ((current != NULL) && (rsc->allocated_to != NULL)
&& (current->details != rsc->allocated_to->details)
&& (rsc->next_role >= RSC_ROLE_STARTED)) {
pe_rsc_trace(rsc, "Moving %s from %s to %s",
rsc->id, pe__node_name(current),
pe__node_name(rsc->allocated_to));
is_moving = true;
allow_migrate = pcmk__rsc_can_migrate(rsc, current);
// This is needed even if migrating (though I'm not sure why ...)
need_stop = true;
}
// Check whether resource is partially migrated and/or multiply active
if ((rsc->partial_migration_source != NULL)
&& (rsc->partial_migration_target != NULL)
&& allow_migrate && (num_all_active == 2)
&& (current->details == rsc->partial_migration_source->details)
&& (rsc->allocated_to->details == rsc->partial_migration_target->details)) {
/* A partial migration is in progress, and the migration target remains
* the same as when the migration began.
*/
pe_rsc_trace(rsc, "Partial migration of %s from %s to %s will continue",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
} else if ((rsc->partial_migration_source != NULL)
|| (rsc->partial_migration_target != NULL)) {
// A partial migration is in progress but can't be continued
if (num_all_active > 2) {
// The resource is migrating *and* multiply active!
crm_notice("Forcing recovery of %s because it is migrating "
"from %s to %s and possibly active elsewhere",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
} else {
// The migration source or target isn't available
crm_notice("Forcing recovery of %s because it can no longer "
"migrate from %s to %s",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
}
need_stop = true;
rsc->partial_migration_source = rsc->partial_migration_target = NULL;
allow_migrate = false;
} else if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) {
multiply_active = (num_all_active > 1);
} else {
/* If a resource has "requires" set to nothing or quorum, don't consider
* it active on unclean nodes (similar to how all resources behave when
* stonith-enabled is false). We can start such resources elsewhere
* before fencing completes, and if we considered the resource active on
* the failed node, we would attempt recovery for being active on
* multiple nodes.
*/
multiply_active = (num_clean_active > 1);
}
if (multiply_active) {
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
// Resource was (possibly) incorrectly multiply active
pe_proc_err("%s resource %s might be active on %u nodes (%s)",
pcmk__s(class, "Untyped"), rsc->id, num_all_active,
recovery2text(rsc->recovery_type));
crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ"
"#Resource_is_Too_Active for more information");
switch (rsc->recovery_type) {
case recovery_stop_start:
need_stop = true;
break;
case recovery_stop_unexpected:
need_stop = true; // StopRsc() will skip expected node
pe__set_resource_flags(rsc, pe_rsc_stop_unexpected);
break;
default:
break;
}
} else {
pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
}
if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
create_pending_start(rsc);
}
if (is_moving) {
// Remaining tests are only for resources staying where they are
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
if (pcmk_is_set(rsc->flags, pe_rsc_stop)) {
need_stop = true;
pe_rsc_trace(rsc, "Recovering %s", rsc->id);
} else {
pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
if (rsc->next_role == RSC_ROLE_PROMOTED) {
need_promote = true;
}
}
} else if (pcmk_is_set(rsc->flags, pe_rsc_block)) {
pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
need_stop = true;
} else if ((rsc->role > RSC_ROLE_STARTED) && (current != NULL)
&& (rsc->allocated_to != NULL)) {
pe_action_t *start = NULL;
pe_rsc_trace(rsc, "Creating start action for promoted resource %s",
rsc->id);
start = start_action(rsc, rsc->allocated_to, TRUE);
if (!pcmk_is_set(start->flags, pe_action_optional)) {
// Recovery of a promoted resource
pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
need_stop = true;
}
}
// Create any actions needed to bring resource down and back up to same role
schedule_restart_actions(rsc, current, rsc->allocated_to, need_stop,
need_promote);
// Create any actions needed to take resource from this role to the next
schedule_role_transition_actions(rsc);
pcmk__create_recurring_actions(rsc);
if (allow_migrate) {
pcmk__create_migration_actions(rsc, current);
}
}
/*!
* \internal
* \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes
*
* \param[in] rsc Resource to check
*/
static void
rsc_avoids_remote_nodes(const pe_resource_t *rsc)
{
GHashTableIter iter;
pe_node_t *node = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (node->details->remote_rsc != NULL) {
node->weight = -INFINITY;
}
}
}
/*!
* \internal
* \brief Return allowed nodes as (possibly sorted) list
*
* Convert a resource's hash table of allowed nodes to a list. If printing to
* stdout, sort the list, to keep action ID numbers consistent for regression
* test output (while avoiding the performance hit on a live cluster).
*
* \param[in] rsc Resource to check for allowed nodes
* \param[in] data_set Cluster working set
*
* \return List of resource's allowed nodes
* \note Callers should take care not to rely on the list being sorted.
*/
static GList *
allowed_nodes_as_list(pe_resource_t *rsc, pe_working_set_t *data_set)
{
GList *allowed_nodes = NULL;
if (rsc->allowed_nodes) {
allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes);
}
if (!pcmk__is_daemon) {
allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
}
return allowed_nodes;
}
+/*!
+ * \internal
+ * \brief Create implicit constraints needed for a primitive resource
+ *
+ * \param[in,out] rsc Primitive resource to create implicit constraints for
+ */
void
-native_internal_constraints(pe_resource_t *rsc)
+pcmk__primitive_internal_constraints(pe_resource_t *rsc)
{
- /* This function is on the critical path and worth optimizing as much as possible */
-
pe_resource_t *top = NULL;
GList *allowed_nodes = NULL;
- bool check_unfencing = FALSE;
+ bool check_unfencing = false;
bool check_utilization = false;
+ CRM_ASSERT(rsc != NULL);
+
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc,
- "Skipping native constraints for unmanaged resource: %s",
+ "Skipping implicit constraints for unmanaged resource %s",
rsc->id);
return;
}
top = uber_parent(rsc);
// Whether resource requires unfencing
check_unfencing = !pcmk_is_set(rsc->flags, pe_rsc_fence_device)
&& pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing)
&& pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing);
// Whether a non-default placement strategy is used
check_utilization = (g_hash_table_size(rsc->utilization) > 0)
&& !pcmk__str_eq(rsc->cluster->placement_strategy,
"default", pcmk__str_casei);
// Order stops before starts (i.e. restart)
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
pe_order_optional|pe_order_implies_then|pe_order_restart,
rsc->cluster);
// Promotable ordering: demote before stop, start before promote
if (pcmk_is_set(top->flags, pe_rsc_promotable)
|| (rsc->role > RSC_ROLE_UNPROMOTED)) {
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_DEMOTE, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
pe_order_promoted_implies_first, rsc->cluster);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_PROMOTE, 0), NULL,
pe_order_runnable_left, rsc->cluster);
}
// Don't clear resource history if probing on same node
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0),
NULL, rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0),
NULL, pe_order_same_node|pe_order_then_cancels_first,
rsc->cluster);
// Certain checks need allowed nodes
- if (check_unfencing || check_utilization || rsc->container) {
+ if (check_unfencing || check_utilization || (rsc->container != NULL)) {
allowed_nodes = allowed_nodes_as_list(rsc, rsc->cluster);
}
if (check_unfencing) {
- /* Check if the node needs to be unfenced first */
+ // Check whether the node needs to be unfenced
for (GList *item = allowed_nodes; item; item = item->next) {
pe_node_t *node = item->data;
pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE,
rsc->cluster);
crm_debug("Ordering any stops of %s before %s, and any starts after",
rsc->id, unfence->uuid);
/*
* It would be more efficient to order clone resources once,
* rather than order each instance, but ordering the instance
* allows us to avoid unnecessary dependencies that might conflict
* with user constraints.
*
* @TODO: This constraint can still produce a transition loop if the
* resource has a stop scheduled on the node being unfenced, and
* there is a user ordering constraint to start some other resource
* (which will be ordered after the unfence) before stopping this
* resource. An example is "start some slow-starting cloned service
* before stopping an associated virtual IP that may be moving to
* it":
* stop this -> unfencing -> start that -> stop this
*/
pcmk__new_ordering(rsc, stop_key(rsc), NULL,
NULL, strdup(unfence->uuid), unfence,
pe_order_optional|pe_order_same_node,
rsc->cluster);
pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
rsc, start_key(rsc), NULL,
pe_order_implies_then_on_node|pe_order_same_node,
rsc->cluster);
}
}
if (check_utilization) {
pcmk__create_utilization_constraints(rsc, allowed_nodes);
}
- if (rsc->container) {
+ if (rsc->container != NULL) {
pe_resource_t *remote_rsc = NULL;
if (rsc->is_remote_node) {
// rsc is the implicit remote connection for a guest or bundle node
- /* Do not allow a guest resource to live on a Pacemaker Remote node,
- * to avoid nesting remotes. However, allow bundles to run on remote
- * nodes.
+ /* Guest resources are not allowed to run on Pacemaker Remote nodes,
+ * to avoid nesting remotes. However, bundles are allowed.
*/
if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
rsc_avoids_remote_nodes(rsc->container);
}
/* If someone cleans up a guest or bundle node's container, we will
* likely schedule a (re-)probe of the container and recovery of the
* connection. Order the connection stop after the container probe,
* so that if we detect the container running, we will trigger a new
* transition and avoid the unnecessary recovery.
*/
pcmk__order_resource_actions(rsc->container, RSC_STATUS, rsc,
RSC_STOP, pe_order_optional);
/* A user can specify that a resource must start on a Pacemaker Remote
* node by explicitly configuring it with the container=NODENAME
* meta-attribute. This is of questionable merit, since location
* constraints can accomplish the same thing. But we support it, so here
* we check whether a resource (that is not itself a remote connection)
* has container set to a remote node or guest node resource.
*/
} else if (rsc->container->is_remote_node) {
remote_rsc = rsc->container;
} else {
remote_rsc = pe__resource_contains_guest_node(rsc->cluster,
rsc->container);
}
- if (remote_rsc) {
+ if (remote_rsc != NULL) {
/* Force the resource on the Pacemaker Remote node instead of
* colocating the resource with the container resource.
*/
for (GList *item = allowed_nodes; item; item = item->next) {
pe_node_t *node = item->data;
if (node->details->remote_rsc != remote_rsc) {
node->weight = -INFINITY;
}
}
} else {
/* This resource is either a filler for a container that does NOT
* represent a Pacemaker Remote node, or a Pacemaker Remote
* connection resource for a guest node or bundle.
*/
int score;
crm_trace("Order and colocate %s relative to its container %s",
rsc->id, rsc->container->id);
pcmk__new_ordering(rsc->container,
pcmk__op_key(rsc->container->id, RSC_START, 0),
NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0),
NULL,
pe_order_implies_then|pe_order_runnable_left,
rsc->cluster);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
rsc->container,
pcmk__op_key(rsc->container->id, RSC_STOP, 0),
NULL, pe_order_implies_first, rsc->cluster);
if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
score = 10000; /* Highly preferred but not essential */
} else {
score = INFINITY; /* Force them to run on the same host */
}
pcmk__new_colocation("resource-with-container", NULL, score, rsc,
- rsc->container, NULL, NULL, true, rsc->cluster);
+ rsc->container, NULL, NULL, true,
+ rsc->cluster);
}
}
if (rsc->is_remote_node || pcmk_is_set(rsc->flags, pe_rsc_fence_device)) {
- /* don't allow remote nodes to run stonith devices
- * or remote connection resources.*/
+ /* Remote connections and fencing devices are not allowed to run on
+ * Pacemaker Remote nodes
+ */
rsc_avoids_remote_nodes(rsc);
}
g_list_free(allowed_nodes);
}
/*!
* \internal
* \brief Apply a colocation's score to node weights or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node weights (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__primitive_apply_coloc_score(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent)
{
enum pcmk__coloc_affects filter_results;
CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL),
return);
if (for_dependent) {
// Always process on behalf of primary resource
primary->cmds->apply_coloc_score(dependent, primary, colocation, false);
return;
}
filter_results = pcmk__colocation_affects(dependent, primary, colocation,
false);
pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
((colocation->score > 0)? "Colocating" : "Anti-colocating"),
dependent->id, primary->id, colocation->id, colocation->score,
filter_results);
switch (filter_results) {
case pcmk__coloc_affects_role:
pcmk__apply_coloc_to_priority(dependent, primary, colocation);
break;
case pcmk__coloc_affects_location:
pcmk__apply_coloc_to_weights(dependent, primary, colocation);
break;
default: // pcmk__coloc_affects_nothing
return;
}
}
enum pe_action_flags
native_action_flags(pe_action_t * action, pe_node_t * node)
{
return action->flags;
}
void
native_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
{
pcmk__apply_location(constraint, rsc);
}
/*!
* \internal
* \brief Check whether a node is a multiply active resource's expected node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p rsc is multiply active with multiple-active set to
* stop_unexpected, and \p node is the node where it will remain active
* \note This assumes that the resource's next role cannot be changed to stopped
* after this is called, which should be reasonable if status has already
* been unpacked and resources have been assigned to nodes.
*/
static bool
is_expected_node(const pe_resource_t *rsc, const pe_node_t *node)
{
return pcmk_all_flags_set(rsc->flags,
pe_rsc_stop_unexpected|pe_rsc_restarting)
&& (rsc->next_role > RSC_ROLE_STOPPED)
&& (rsc->allocated_to != NULL) && (node != NULL)
&& (rsc->allocated_to->details == node->details);
}
static bool
StopRsc(pe_resource_t *rsc, pe_node_t *next, bool optional)
{
GList *gIter = NULL;
CRM_ASSERT(rsc);
for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
pe_node_t *current = (pe_node_t *) gIter->data;
pe_action_t *stop;
if (is_expected_node(rsc, current)) {
/* We are scheduling restart actions for a multiply active resource
* with multiple-active=stop_unexpected, and this is where it should
* not be stopped.
*/
pe_rsc_trace(rsc,
"Skipping stop of multiply active resource %s "
"on expected node %s",
rsc->id, pe__node_name(current));
continue;
}
if (rsc->partial_migration_target) {
if (rsc->partial_migration_target->details == current->details
// Only if the allocated node still is the migration target.
&& rsc->allocated_to
&& rsc->allocated_to->details == rsc->partial_migration_target->details) {
pe_rsc_trace(rsc,
"Skipping stop of %s on %s "
"because migration to %s in progress",
rsc->id, pe__node_name(current),
pe__node_name(next));
continue;
} else {
pe_rsc_trace(rsc,
"Forcing stop of %s on %s "
"because migration target changed",
rsc->id, pe__node_name(current));
optional = false;
}
}
pe_rsc_trace(rsc, "Scheduling stop of %s on %s",
rsc->id, pe__node_name(current));
stop = stop_action(rsc, current, optional);
if(rsc->allocated_to == NULL) {
pe_action_set_reason(stop, "node availability", TRUE);
} else if (pcmk_all_flags_set(rsc->flags, pe_rsc_restarting
|pe_rsc_stop_unexpected)) {
/* We are stopping a multiply active resource on a node that is
* not its expected node, and we are still scheduling restart
* actions, so the stop is for being multiply active.
*/
pe_action_set_reason(stop, "being multiply active", TRUE);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe__clear_action_flags(stop, pe_action_runnable);
}
if (pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop)) {
DeleteRsc(rsc, current, optional, rsc->cluster);
}
if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) {
pe_action_t *unfence = pe_fence_op(current, "on", TRUE, NULL, FALSE,
rsc->cluster);
order_actions(stop, unfence, pe_order_implies_first);
if (!pcmk__node_unfenced(current)) {
pe_proc_err("Stopping %s until %s can be unfenced",
rsc->id, pe__node_name(current));
}
}
}
return true;
}
static bool
StartRsc(pe_resource_t *rsc, pe_node_t *next, bool optional)
{
pe_action_t *start = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (weight=%d)",
(optional? "optional" : "required"), rsc->id,
pe__node_name(next),
((next == NULL)? 0 : next->weight));
start = start_action(rsc, next, TRUE);
pcmk__order_vs_unfence(rsc, next, start, pe_order_implies_then);
if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) {
pe__clear_action_flags(start, pe_action_optional);
}
if (is_expected_node(rsc, next)) {
/* This could be a problem if the start becomes necessary for other
* reasons later.
*/
pe_rsc_trace(rsc,
"Start of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, pe__node_name(next));
pe__set_action_flags(start, pe_action_pseudo);
}
return true;
}
static bool
PromoteRsc(pe_resource_t *rsc, pe_node_t *next, bool optional)
{
GList *gIter = NULL;
gboolean runnable = TRUE;
GList *action_list = NULL;
CRM_ASSERT(rsc);
CRM_CHECK(next != NULL, return false);
pe_rsc_trace(rsc, "%s on %s", rsc->id, pe__node_name(next));
action_list = pe__resource_actions(rsc, next, RSC_START, TRUE);
for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
pe_action_t *start = (pe_action_t *) gIter->data;
if (!pcmk_is_set(start->flags, pe_action_runnable)) {
runnable = FALSE;
}
}
g_list_free(action_list);
if (runnable) {
pe_action_t *promote = promote_action(rsc, next, optional);
if (is_expected_node(rsc, next)) {
/* This could be a problem if the promote becomes necessary for
* other reasons later.
*/
pe_rsc_trace(rsc,
"Promotion of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, pe__node_name(next));
pe__set_action_flags(promote, pe_action_pseudo);
}
return true;
}
pe_rsc_debug(rsc, "%s\tPromote %s (canceled)",
pe__node_name(next), rsc->id);
action_list = pe__resource_actions(rsc, next, RSC_PROMOTE, TRUE);
for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
pe_action_t *promote = (pe_action_t *) gIter->data;
pe__clear_action_flags(promote, pe_action_runnable);
}
g_list_free(action_list);
return true;
}
static bool
DemoteRsc(pe_resource_t *rsc, pe_node_t *next, bool optional)
{
GList *gIter = NULL;
CRM_ASSERT(rsc);
if (is_expected_node(rsc, next)) {
pe_rsc_trace(rsc,
"Skipping demote of multiply active resource %s "
"on expected node %s",
rsc->id, pe__node_name(next));
return true;
}
pe_rsc_trace(rsc, "%s", rsc->id);
/* CRM_CHECK(rsc->next_role == RSC_ROLE_UNPROMOTED, return FALSE); */
for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
pe_node_t *current = (pe_node_t *) gIter->data;
pe_rsc_trace(rsc, "%s on %s", rsc->id, pe__node_name(next));
demote_action(rsc, current, optional);
}
return true;
}
static bool
RoleError(pe_resource_t *rsc, pe_node_t *next, bool optional)
{
CRM_ASSERT(rsc);
crm_err("%s on %s", rsc->id, pe__node_name(next));
CRM_CHECK(false, return false);
return false;
}
static bool
NullOp(pe_resource_t *rsc, pe_node_t *next, bool optional)
{
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "%s", rsc->id);
return FALSE;
}
gboolean
DeleteRsc(pe_resource_t *rsc, const pe_node_t *node, gboolean optional,
pe_working_set_t *data_set)
{
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed",
rsc->id, pe__node_name(node));
return FALSE;
} else if (node == NULL) {
pe_rsc_trace(rsc, "Resource %s not deleted: NULL node", rsc->id);
return FALSE;
} else if (node->details->unclean || node->details->online == FALSE) {
pe_rsc_trace(rsc, "Resource %s not deleted from %s: unrunnable",
rsc->id, pe__node_name(node));
return FALSE;
}
crm_notice("Removing %s from %s", rsc->id, pe__node_name(node));
delete_action(rsc, node, optional);
pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_DELETE,
optional? pe_order_implies_then : pe_order_optional);
pcmk__order_resource_actions(rsc, RSC_DELETE, rsc, RSC_START,
optional? pe_order_implies_then : pe_order_optional);
return TRUE;
}
void
native_append_meta(pe_resource_t * rsc, xmlNode * xml)
{
char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION);
pe_resource_t *parent;
if (value) {
char *name = NULL;
name = crm_meta_name(XML_RSC_ATTR_INCARNATION);
crm_xml_add(xml, name, value);
free(name);
}
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE);
if (value) {
char *name = NULL;
name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE);
crm_xml_add(xml, name, value);
free(name);
}
for (parent = rsc; parent != NULL; parent = parent->parent) {
if (parent->container) {
crm_xml_add(xml, CRM_META"_"XML_RSC_ATTR_CONTAINER, parent->container->id);
}
}
}
// Primitive implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__primitive_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization)
{
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return;
}
pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization",
orig_rsc->id, rsc->id);
pcmk__release_node_capacity(utilization, rsc);
}
/*!
* \internal
* \brief Get epoch time of node's shutdown attribute (or now if none)
*
* \param[in] node Node to check
* \param[in] data_set Cluster working set
*
* \return Epoch time corresponding to shutdown attribute if set or now if not
*/
static time_t
shutdown_time(pe_node_t *node, pe_working_set_t *data_set)
{
const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
time_t result = 0;
if (shutdown != NULL) {
long long result_ll;
if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) {
result = (time_t) result_ll;
}
}
return (result == 0)? get_effective_time(data_set) : result;
}
// Primitive implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__primitive_shutdown_lock(pe_resource_t *rsc)
{
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
// Fence devices and remote connections can't be locked
if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches)
|| pe__resource_is_remote_conn(rsc, rsc->cluster)) {
return;
}
if (rsc->lock_node != NULL) {
// The lock was obtained from resource history
if (rsc->running_on != NULL) {
/* The resource was started elsewhere even though it is now
* considered locked. This shouldn't be possible, but as a
* failsafe, we don't want to disturb the resource now.
*/
pe_rsc_info(rsc,
"Cancelling shutdown lock because %s is already active",
rsc->id);
pe__clear_resource_history(rsc, rsc->lock_node, rsc->cluster);
rsc->lock_node = NULL;
rsc->lock_time = 0;
}
// Only a resource active on exactly one node can be locked
} else if (pcmk__list_of_1(rsc->running_on)) {
pe_node_t *node = rsc->running_on->data;
if (node->details->shutdown) {
if (node->details->unclean) {
pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
rsc->id, pe__node_name(node));
} else {
rsc->lock_node = node;
rsc->lock_time = shutdown_time(node, rsc->cluster);
}
}
}
if (rsc->lock_node == NULL) {
// No lock needed
return;
}
if (rsc->cluster->shutdown_lock > 0) {
time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock;
pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
rsc->id, pe__node_name(rsc->lock_node),
(long long) lock_expiration);
pe__update_recheck_time(++lock_expiration, rsc->cluster);
} else {
pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
rsc->id, pe__node_name(rsc->lock_node));
}
// If resource is locked to one node, ban it from all other nodes
for (GList *item = rsc->cluster->nodes; item != NULL; item = item->next) {
pe_node_t *node = item->data;
if (strcmp(node->details->uname, rsc->lock_node->details->uname)) {
resource_location(rsc, node, -CRM_SCORE_INFINITY,
XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster);
}
}
}
diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c
index cdf3b229d8..bd149c609a 100644
--- a/lib/pacemaker/pcmk_sched_resource.c
+++ b/lib/pacemaker/pcmk_sched_resource.c
@@ -1,1087 +1,1087 @@
/*
* Copyright 2014-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdlib.h>
#include <string.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
// Resource allocation methods that vary by resource variant
static resource_alloc_functions_t allocation_methods[] = {
{
pcmk__primitive_assign,
pcmk__primitive_create_actions,
pcmk__probe_rsc_on_node,
- native_internal_constraints,
+ pcmk__primitive_internal_constraints,
pcmk__primitive_apply_coloc_score,
pcmk__add_colocated_node_scores,
pcmk__colocated_resources,
native_rsc_location,
native_action_flags,
pcmk__update_ordered_actions,
pcmk__output_resource_actions,
pcmk__add_rsc_actions_to_graph,
native_append_meta,
pcmk__primitive_add_utilization,
pcmk__primitive_shutdown_lock,
},
{
pcmk__group_allocate,
group_create_actions,
pcmk__probe_rsc_on_node,
group_internal_constraints,
pcmk__group_apply_coloc_score,
pcmk__group_add_colocated_node_scores,
pcmk__group_colocated_resources,
group_rsc_location,
group_action_flags,
group_update_actions,
pcmk__output_resource_actions,
pcmk__add_rsc_actions_to_graph,
group_append_meta,
pcmk__group_add_utilization,
pcmk__group_shutdown_lock,
},
{
pcmk__clone_allocate,
clone_create_actions,
clone_create_probe,
clone_internal_constraints,
pcmk__clone_apply_coloc_score,
pcmk__add_colocated_node_scores,
pcmk__colocated_resources,
clone_rsc_location,
clone_action_flags,
pcmk__multi_update_actions,
pcmk__output_resource_actions,
clone_expand,
clone_append_meta,
pcmk__clone_add_utilization,
pcmk__clone_shutdown_lock,
},
{
pcmk__bundle_allocate,
pcmk__bundle_create_actions,
pcmk__bundle_create_probe,
pcmk__bundle_internal_constraints,
pcmk__bundle_apply_coloc_score,
pcmk__add_colocated_node_scores,
pcmk__colocated_resources,
pcmk__bundle_rsc_location,
pcmk__bundle_action_flags,
pcmk__multi_update_actions,
pcmk__output_bundle_actions,
pcmk__bundle_expand,
pcmk__bundle_append_meta,
pcmk__bundle_add_utilization,
pcmk__bundle_shutdown_lock,
}
};
/*!
* \internal
* \brief Check whether a resource's agent standard, provider, or type changed
*
* \param[in] rsc Resource to check
* \param[in] node Node needing unfencing/restart if agent changed
* \param[in] rsc_entry XML with previously known agent information
* \param[in] active_on_node Whether \p rsc is active on \p node
*
* \return true if agent for \p rsc changed, otherwise false
*/
bool
pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node,
const xmlNode *rsc_entry, bool active_on_node)
{
bool changed = false;
const char *attr_list[] = {
XML_ATTR_TYPE,
XML_AGENT_ATTR_CLASS,
XML_AGENT_ATTR_PROVIDER
};
for (int i = 0; i < PCMK__NELEM(attr_list); i++) {
const char *value = crm_element_value(rsc->xml, attr_list[i]);
const char *old_value = crm_element_value(rsc_entry, attr_list[i]);
if (!pcmk__str_eq(value, old_value, pcmk__str_none)) {
changed = true;
trigger_unfencing(rsc, node, "Device definition changed", NULL,
rsc->cluster);
if (active_on_node) {
crm_notice("Forcing restart of %s on %s "
"because %s changed from '%s' to '%s'",
rsc->id, pe__node_name(node), attr_list[i],
pcmk__s(old_value, ""), pcmk__s(value, ""));
}
}
}
if (changed && active_on_node) {
// Make sure the resource is restarted
custom_action(rsc, stop_key(rsc), CRMD_ACTION_STOP, node, FALSE, TRUE,
rsc->cluster);
pe__set_resource_flags(rsc, pe_rsc_start_pending);
}
return changed;
}
/*!
* \internal
* \brief Add resource (and any matching children) to list if it matches ID
*
* \param[in] result List to add resource to
* \param[in] rsc Resource to check
* \param[in] id ID to match
*
* \return (Possibly new) head of list
*/
static GList *
add_rsc_if_matching(GList *result, pe_resource_t *rsc, const char *id)
{
if ((strcmp(rsc->id, id) == 0)
|| ((rsc->clone_name != NULL) && (strcmp(rsc->clone_name, id) == 0))) {
result = g_list_prepend(result, rsc);
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
result = add_rsc_if_matching(result, child, id);
}
return result;
}
/*!
* \internal
* \brief Find all resources matching a given ID by either ID or clone name
*
* \param[in] id Resource ID to check
* \param[in] data_set Cluster working set
*
* \return List of all resources that match \p id
* \note The caller is responsible for freeing the return value with
* g_list_free().
*/
GList *
pcmk__rscs_matching_id(const char *id, pe_working_set_t *data_set)
{
GList *result = NULL;
CRM_CHECK((id != NULL) && (data_set != NULL), return NULL);
for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
result = add_rsc_if_matching(result, (pe_resource_t *) iter->data, id);
}
return result;
}
/*!
* \internal
* \brief Set the variant-appropriate allocation methods for a resource
*
* \param[in] rsc Resource to set allocation methods for
* \param[in] ignored Only here so function can be used with g_list_foreach()
*/
static void
set_allocation_methods_for_rsc(pe_resource_t *rsc, void *ignored)
{
rsc->cmds = &allocation_methods[rsc->variant];
g_list_foreach(rsc->children, (GFunc) set_allocation_methods_for_rsc, NULL);
}
/*!
* \internal
* \brief Set the variant-appropriate allocation methods for all resources
*
* \param[in] data_set Cluster working set
*/
void
pcmk__set_allocation_methods(pe_working_set_t *data_set)
{
g_list_foreach(data_set->resources, (GFunc) set_allocation_methods_for_rsc,
NULL);
}
// Shared implementation of resource_alloc_functions_t:colocated_resources()
GList *
pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs)
{
GList *gIter = NULL;
if (orig_rsc == NULL) {
orig_rsc = rsc;
}
if ((rsc == NULL) || (g_list_find(colocated_rscs, rsc) != NULL)) {
return colocated_rscs;
}
pe_rsc_trace(orig_rsc, "%s is in colocation chain with %s",
rsc->id, orig_rsc->id);
colocated_rscs = g_list_append(colocated_rscs, rsc);
// Follow colocations where this resource is the dependent resource
for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pe_resource_t *primary = constraint->primary;
if (primary == orig_rsc) {
continue; // Break colocation loop
}
if ((constraint->score == INFINITY) &&
(pcmk__colocation_affects(rsc, primary, constraint,
true) == pcmk__coloc_affects_location)) {
colocated_rscs = primary->cmds->colocated_resources(primary,
orig_rsc,
colocated_rscs);
}
}
// Follow colocations where this resource is the primary resource
for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pe_resource_t *dependent = constraint->dependent;
if (dependent == orig_rsc) {
continue; // Break colocation loop
}
if (pe_rsc_is_clone(rsc) && !pe_rsc_is_clone(dependent)) {
continue; // We can't be sure whether dependent will be colocated
}
if ((constraint->score == INFINITY) &&
(pcmk__colocation_affects(dependent, rsc, constraint,
true) == pcmk__coloc_affects_location)) {
colocated_rscs = dependent->cmds->colocated_resources(dependent,
orig_rsc,
colocated_rscs);
}
}
return colocated_rscs;
}
void
pcmk__output_resource_actions(pe_resource_t *rsc)
{
pcmk__output_t *out = rsc->cluster->priv;
pe_node_t *next = NULL;
pe_node_t *current = NULL;
if (rsc->children != NULL) {
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
child->cmds->output_actions(child);
}
return;
}
next = rsc->allocated_to;
if (rsc->running_on) {
current = pe__current_node(rsc);
if (rsc->role == RSC_ROLE_STOPPED) {
/* This can occur when resources are being recovered because
* the current role can change in pcmk__primitive_create_actions()
*/
rsc->role = RSC_ROLE_STARTED;
}
}
if ((current == NULL) && pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
/* Don't log stopped orphans */
return;
}
out->message(out, "rsc-action", rsc, current, next);
}
/*!
* \internal
* \brief Assign a specified primitive resource to a node
*
* Assign a specified primitive resource to a specified node, if the node can
* run the resource (or unconditionally, if \p force is true). Mark the resource
* as no longer provisional. If the primitive can't be assigned (or \p chosen is
* NULL), unassign any previous assignment for it, set its next role to stopped,
* and update any existing actions scheduled for it. This is not done
* recursively for children, so it should be called only for primitives.
*
* \param[in] rsc Resource to assign
* \param[in] chosen Node to assign \p rsc to
* \param[in] force If true, assign to \p chosen even if unavailable
*
* \return true if \p rsc could be assigned, otherwise false
*
* \note Assigning a resource to the NULL node using this function is different
* from calling pcmk__unassign_resource(), in that it will also update any
* actions created for the resource.
*/
bool
pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force)
{
pcmk__output_t *out = rsc->cluster->priv;
CRM_ASSERT(rsc->variant == pe_native);
if (!force && (chosen != NULL)) {
if ((chosen->weight < 0)
// Allow the graph to assume that guest node connections will come up
|| (!pcmk__node_available(chosen, true, false)
&& !pe__is_guest_node(chosen))) {
crm_debug("All nodes for resource %s are unavailable, unclean or "
"shutting down (%s can%s run resources, with weight %d)",
rsc->id, pe__node_name(chosen),
(pcmk__node_available(chosen, true, false)? "" : "not"),
chosen->weight);
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability");
chosen = NULL;
}
}
pcmk__unassign_resource(rsc);
pe__clear_resource_flags(rsc, pe_rsc_provisional);
if (chosen == NULL) {
crm_debug("Could not allocate a node for %s", rsc->id);
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to allocate");
for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
pe_action_t *op = (pe_action_t *) iter->data;
crm_debug("Updating %s for allocation failure", op->uuid);
if (pcmk__str_eq(op->task, RSC_STOP, pcmk__str_casei)) {
pe__clear_action_flags(op, pe_action_optional);
} else if (pcmk__str_eq(op->task, RSC_START, pcmk__str_casei)) {
pe__clear_action_flags(op, pe_action_runnable);
//pe__set_resource_flags(rsc, pe_rsc_block);
} else {
// Cancel recurring actions, unless for stopped state
const char *interval_ms_s = NULL;
const char *target_rc_s = NULL;
char *rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING);
interval_ms_s = g_hash_table_lookup(op->meta,
XML_LRM_ATTR_INTERVAL_MS);
target_rc_s = g_hash_table_lookup(op->meta,
XML_ATTR_TE_TARGET_RC);
if ((interval_ms_s != NULL)
&& !pcmk__str_eq(interval_ms_s, "0", pcmk__str_none)
&& !pcmk__str_eq(rc_stopped, target_rc_s, pcmk__str_none)) {
pe__clear_action_flags(op, pe_action_runnable);
}
free(rc_stopped);
}
}
return false;
}
crm_debug("Assigning %s to %s", rsc->id, pe__node_name(chosen));
rsc->allocated_to = pe__copy_node(chosen);
chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc,
rsc);
chosen->details->num_resources++;
chosen->count++;
pcmk__consume_node_capacity(chosen->details->utilization, rsc);
if (pcmk_is_set(rsc->cluster->flags, pe_flag_show_utilization)) {
out->message(out, "resource-util", rsc, chosen, __func__);
}
return true;
}
/*!
* \internal
* \brief Assign a specified resource (of any variant) to a node
*
* Assign a specified resource and its children (if any) to a specified node, if
* the node can run the resource (or unconditionally, if \p force is true). Mark
* the resources as no longer provisional. If the resources can't be assigned
* (or \p chosen is NULL), unassign any previous assignments, set next role to
* stopped, and update any existing actions scheduled for them.
*
* \param[in] rsc Resource to assign
* \param[in] chosen Node to assign \p rsc to
* \param[in] force If true, assign to \p chosen even if unavailable
*
* \return true if \p rsc could be assigned, otherwise false
*
* \note Assigning a resource to the NULL node using this function is different
* from calling pcmk__unassign_resource(), in that it will also update any
* actions created for the resource.
*/
bool
pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force)
{
bool changed = false;
if (rsc->children == NULL) {
if (rsc->allocated_to != NULL) {
changed = true;
}
pcmk__assign_primitive(rsc, node, force);
} else {
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
changed |= pcmk__assign_resource(child_rsc, node, force);
}
}
return changed;
}
/*!
* \internal
* \brief Remove any assignment of a specified resource to a node
*
* If a specified resource has been assigned to a node, remove that assignment
* and mark the resource as provisional again. This is not done recursively for
* children, so it should be called only for primitives.
*
* \param[in] rsc Resource to unassign
*/
void
pcmk__unassign_resource(pe_resource_t *rsc)
{
pe_node_t *old = rsc->allocated_to;
if (old == NULL) {
return;
}
crm_info("Unassigning %s from %s", rsc->id, pe__node_name(old));
pe__set_resource_flags(rsc, pe_rsc_provisional);
rsc->allocated_to = NULL;
/* We're going to free the pe_node_t, but its details member is shared and
* will remain, so update that appropriately first.
*/
old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc,
rsc);
old->details->num_resources--;
pcmk__release_node_capacity(old->details->utilization, rsc);
free(old);
}
/*!
* \internal
* \brief Check whether a resource has reached its migration threshold on a node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
* \param[out] failed If the threshold has been reached, this will be set to
* the resource that failed (possibly a parent of \p rsc)
*
* \return true if the migration threshold has been reached, false otherwise
*/
bool
pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node,
pe_resource_t **failed)
{
int fail_count, remaining_tries;
pe_resource_t *rsc_to_ban = rsc;
// Migration threshold of 0 means never force away
if (rsc->migration_threshold == 0) {
return false;
}
// If we're ignoring failures, also ignore the migration threshold
if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) {
return false;
}
// If there are no failures, there's no need to force away
fail_count = pe_get_failcount(node, rsc, NULL,
pe_fc_effective|pe_fc_fillers, NULL,
rsc->cluster);
if (fail_count <= 0) {
return false;
}
// If failed resource is anonymous clone instance, we'll force clone away
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
rsc_to_ban = uber_parent(rsc);
}
// How many more times recovery will be tried on this node
remaining_tries = rsc->migration_threshold - fail_count;
if (remaining_tries <= 0) {
crm_warn("%s cannot run on %s due to reaching migration threshold "
"(clean up resource to allow again)"
CRM_XS " failures=%d migration-threshold=%d",
rsc_to_ban->id, pe__node_name(node), fail_count,
rsc->migration_threshold);
if (failed != NULL) {
*failed = rsc_to_ban;
}
return true;
}
crm_info("%s can fail %d more time%s on "
"%s before reaching migration threshold (%d)",
rsc_to_ban->id, remaining_tries, pcmk__plural_s(remaining_tries),
pe__node_name(node), rsc->migration_threshold);
return false;
}
static void *
convert_const_pointer(const void *ptr)
{
/* Worst function ever */
return (void *)ptr;
}
/*!
* \internal
* \brief Get a node's weight
*
* \param[in] node Unweighted node to check (for node ID)
* \param[in] nodes List of weighted nodes to look for \p node in
*
* \return Node's weight, or -INFINITY if not found
*/
static int
get_node_weight(pe_node_t *node, GHashTable *nodes)
{
pe_node_t *weighted_node = NULL;
if ((node != NULL) && (nodes != NULL)) {
weighted_node = g_hash_table_lookup(nodes, node->details->id);
}
return (weighted_node == NULL)? -INFINITY : weighted_node->weight;
}
/*!
* \internal
* \brief Compare two resources according to which should be allocated first
*
* \param[in] a First resource to compare
* \param[in] b Second resource to compare
* \param[in] data Sorted list of all nodes in cluster
*
* \return -1 if \p a should be allocated before \b, 0 if they are equal,
* or +1 if \p a should be allocated after \b
*/
static gint
cmp_resources(gconstpointer a, gconstpointer b, gpointer data)
{
const pe_resource_t *resource1 = a;
const pe_resource_t *resource2 = b;
GList *nodes = (GList *) data;
int rc = 0;
int r1_weight = -INFINITY;
int r2_weight = -INFINITY;
pe_node_t *r1_node = NULL;
pe_node_t *r2_node = NULL;
GHashTable *r1_nodes = NULL;
GHashTable *r2_nodes = NULL;
const char *reason = NULL;
// Resources with highest priority should be allocated first
reason = "priority";
r1_weight = resource1->priority;
r2_weight = resource2->priority;
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
// We need nodes to make any other useful comparisons
reason = "no node list";
if (nodes == NULL) {
goto done;
}
// Calculate and log node weights
pcmk__add_colocated_node_scores(convert_const_pointer(resource1),
resource1->id, &r1_nodes, NULL, 1,
pcmk__coloc_select_this_with);
pcmk__add_colocated_node_scores(convert_const_pointer(resource2),
resource2->id, &r2_nodes, NULL, 1,
pcmk__coloc_select_this_with);
pe__show_node_weights(true, NULL, resource1->id, r1_nodes,
resource1->cluster);
pe__show_node_weights(true, NULL, resource2->id, r2_nodes,
resource2->cluster);
// The resource with highest score on its current node goes first
reason = "current location";
if (resource1->running_on != NULL) {
r1_node = pe__current_node(resource1);
}
if (resource2->running_on != NULL) {
r2_node = pe__current_node(resource2);
}
r1_weight = get_node_weight(r1_node, r1_nodes);
r2_weight = get_node_weight(r2_node, r2_nodes);
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
// Otherwise a higher weight on any node will do
reason = "score";
for (GList *iter = nodes; iter != NULL; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
r1_weight = get_node_weight(node, r1_nodes);
r2_weight = get_node_weight(node, r2_nodes);
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
}
done:
crm_trace("%s (%d)%s%s %c %s (%d)%s%s: %s",
resource1->id, r1_weight,
((r1_node == NULL)? "" : " on "),
((r1_node == NULL)? "" : r1_node->details->id),
((rc < 0)? '>' : ((rc > 0)? '<' : '=')),
resource2->id, r2_weight,
((r2_node == NULL)? "" : " on "),
((r2_node == NULL)? "" : r2_node->details->id),
reason);
if (r1_nodes != NULL) {
g_hash_table_destroy(r1_nodes);
}
if (r2_nodes != NULL) {
g_hash_table_destroy(r2_nodes);
}
return rc;
}
/*!
* \internal
* \brief Sort resources in the order they should be allocated to nodes
*
* \param[in] data_set Cluster working set
*/
void
pcmk__sort_resources(pe_working_set_t *data_set)
{
GList *nodes = g_list_copy(data_set->nodes);
nodes = pcmk__sort_nodes(nodes, NULL);
data_set->resources = g_list_sort_with_data(data_set->resources,
cmp_resources, nodes);
g_list_free(nodes);
}
/*!
* \internal
* \brief Create a hash table with a single node in it
*
* \param[in] node Node to copy into new table
*
* \return Newly created hash table containing a copy of \p node
* \note The caller is responsible for freeing the result with
* g_hash_table_destroy().
*/
static GHashTable *
new_node_table(pe_node_t *node)
{
GHashTable *table = pcmk__strkey_table(NULL, free);
node = pe__copy_node(node);
g_hash_table_insert(table, (gpointer) node->details->id, node);
return table;
}
/*!
* \internal
* \brief Apply a resource's parent's colocation scores to a node table
*
* \param[in] rsc Resource whose colocations should be applied
* \param[in,out] nodes Node table to apply colocations to
*/
static void
apply_parent_colocations(const pe_resource_t *rsc, GHashTable **nodes)
{
GList *iter = NULL;
pcmk__colocation_t *colocation = NULL;
for (iter = rsc->parent->rsc_cons; iter != NULL; iter = iter->next) {
colocation = (pcmk__colocation_t *) iter->data;
pcmk__add_colocated_node_scores(colocation->primary, rsc->id, nodes,
colocation->node_attribute,
colocation->score / (float) INFINITY,
pcmk__coloc_select_default);
}
for (iter = rsc->parent->rsc_cons_lhs; iter != NULL; iter = iter->next) {
colocation = (pcmk__colocation_t *) iter->data;
if (!pcmk__colocation_has_influence(colocation, rsc)) {
continue;
}
pcmk__add_colocated_node_scores(colocation->dependent, rsc->id, nodes,
colocation->node_attribute,
colocation->score / (float) INFINITY,
pcmk__coloc_select_nonnegative);
}
}
/*!
* \internal
* \brief Compare clone or bundle instances based on colocation scores
*
* Determine the relative order in which two clone or bundle instances should be
* assigned to nodes, considering the scores of colocation constraints directly
* or indirectly involving them.
*
* \param[in] instance1 First instance to compare
* \param[in] instance2 Second instance to compare
*
* \return A negative number if \p instance1 should be assigned first,
* a positive number if \p instance2 should be assigned first,
* or 0 if assignment order doesn't matter
*/
static int
cmp_instance_by_colocation(const pe_resource_t *instance1,
const pe_resource_t *instance2)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
pe_node_t *current_node1 = pe__current_node(instance1);
pe_node_t *current_node2 = pe__current_node(instance2);
GHashTable *colocated_scores1 = NULL;
GHashTable *colocated_scores2 = NULL;
CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL)
&& (instance2 != NULL) && (instance2->parent != NULL)
&& (current_node1 != NULL) && (current_node2 != NULL));
// Create node tables initialized with each node
colocated_scores1 = new_node_table(current_node1);
colocated_scores2 = new_node_table(current_node2);
// Apply parental colocations
apply_parent_colocations(instance1, &colocated_scores1);
apply_parent_colocations(instance2, &colocated_scores2);
// Find original nodes again, with scores updated for colocations
node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id);
node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id);
// Compare nodes by updated scores
if (node1->weight < node2->weight) {
crm_trace("Assign %s (%d on %s) after %s (%d on %s)",
instance1->id, node1->weight, pe__node_name(node1),
instance2->id, node2->weight, pe__node_name(node2));
rc = 1;
} else if (node1->weight > node2->weight) {
crm_trace("Assign %s (%d on %s) before %s (%d on %s)",
instance1->id, node1->weight, pe__node_name(node1),
instance2->id, node2->weight, pe__node_name(node2));
rc = -1;
}
g_hash_table_destroy(colocated_scores1);
g_hash_table_destroy(colocated_scores2);
return rc;
}
/*!
* \internal
* \brief Check whether a resource or any of its children are failed
*
* \param[in] rsc Resource to check
*
* \return true if \p rsc or any of its children are failed, otherwise false
*/
static bool
did_fail(const pe_resource_t * rsc)
{
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
return true;
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
if (did_fail((pe_resource_t *) iter->data)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether a node is allowed to run a resource
*
* \param[in] rsc Resource to check
* \param[in,out] node Node to check (will be set NULL if not allowed)
*
* \return true if *node is either NULL or allowed for \p rsc, otherwise false
*/
static bool
node_is_allowed(const pe_resource_t *rsc, pe_node_t **node)
{
if (*node != NULL) {
pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes,
(*node)->details->id);
if ((allowed == NULL) || (allowed->weight < 0)) {
pe_rsc_trace(rsc, "%s: current location (%s) is unavailable",
rsc->id, pe__node_name(*node));
*node = NULL;
return false;
}
}
return true;
}
/*!
* \internal
* \brief Compare two clone or bundle instances' instance numbers
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a's instance number is lower,
* a positive number if \p b's instance number is lower,
* or 0 if their instance numbers are the same
*/
gint
pcmk__cmp_instance_number(gconstpointer a, gconstpointer b)
{
const pe_resource_t *instance1 = (const pe_resource_t *) a;
const pe_resource_t *instance2 = (const pe_resource_t *) b;
char *div1 = NULL;
char *div2 = NULL;
CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
// Clone numbers are after a colon, bundle numbers after a dash
div1 = strrchr(instance1->id, ':');
if (div1 == NULL) {
div1 = strrchr(instance1->id, '-');
}
div2 = strrchr(instance2->id, ':');
if (div2 == NULL) {
div2 = strrchr(instance2->id, '-');
}
CRM_ASSERT((div1 != NULL) && (div2 != NULL));
return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10));
}
/*!
* \internal
* \brief Compare clone or bundle instances according to assignment order
*
* Compare two clone or bundle instances according to the order they should be
* assigned to nodes, preferring (in order):
*
* - Active instance that is less multiply active
* - Instance that is not active on a disallowed node
* - Instance with higher configured priority
* - Active instance whose current node can run resources
* - Active instance whose parent is allowed on current node
* - Active instance whose current node has fewer other instances
* - Active instance
* - Failed instance
* - Instance whose colocations result in higher score on current node
* - Instance with lower ID in lexicographic order
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a should be assigned first,
* a positive number if \p b should be assigned first,
* or 0 if assignment order doesn't matter
*/
gint
pcmk__cmp_instance(gconstpointer a, gconstpointer b)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
unsigned int nnodes1 = 0;
unsigned int nnodes2 = 0;
bool can1 = true;
bool can2 = true;
const pe_resource_t *instance1 = (const pe_resource_t *) a;
const pe_resource_t *instance2 = (const pe_resource_t *) b;
CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
node1 = pe__find_active_on(instance1, &nnodes1, NULL);
node2 = pe__find_active_on(instance2, &nnodes2, NULL);
/* If both instances are running and at least one is multiply
* active, prefer instance that's running on fewer nodes.
*/
if ((nnodes1 > 0) && (nnodes2 > 0)) {
if (nnodes1 < nnodes2) {
crm_trace("Assign %s (active on %d) before %s (active on %d): "
"less multiply active",
instance1->id, nnodes1, instance2->id, nnodes2);
return -1;
} else if (nnodes1 > nnodes2) {
crm_trace("Assign %s (active on %d) after %s (active on %d): "
"more multiply active",
instance1->id, nnodes1, instance2->id, nnodes2);
return 1;
}
}
/* An instance that is either inactive or active on an allowed node is
* preferred over an instance that is active on a no-longer-allowed node.
*/
can1 = node_is_allowed(instance1, &node1);
can2 = node_is_allowed(instance2, &node2);
if (can1 && !can2) {
crm_trace("Assign %s before %s: not active on a disallowed node",
instance1->id, instance2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("Assign %s after %s: active on a disallowed node",
instance1->id, instance2->id);
return 1;
}
// Prefer instance with higher configured priority
if (instance1->priority > instance2->priority) {
crm_trace("Assign %s before %s: priority (%d > %d)",
instance1->id, instance2->id,
instance1->priority, instance2->priority);
return -1;
} else if (instance1->priority < instance2->priority) {
crm_trace("Assign %s after %s: priority (%d < %d)",
instance1->id, instance2->id,
instance1->priority, instance2->priority);
return 1;
}
// Prefer active instance
if ((node1 == NULL) && (node2 == NULL)) {
crm_trace("No assignment preference for %s vs. %s: inactive",
instance1->id, instance2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("Assign %s after %s: active", instance1->id, instance2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("Assign %s before %s: active", instance1->id, instance2->id);
return -1;
}
// Prefer instance whose current node can run resources
can1 = pcmk__node_available(node1, false, false);
can2 = pcmk__node_available(node2, false, false);
if (can1 && !can2) {
crm_trace("Assign %s before %s: current node can run resources",
instance1->id, instance2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("Assign %s after %s: current node can't run resources",
instance1->id, instance2->id);
return 1;
}
// Prefer instance whose parent is allowed to run on instance's current node
node1 = pcmk__top_allowed_node(instance1, node1);
node2 = pcmk__top_allowed_node(instance2, node2);
if ((node1 == NULL) && (node2 == NULL)) {
crm_trace("No assignment preference for %s vs. %s: "
"parent not allowed on either instance's current node",
instance1->id, instance2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("Assign %s after %s: parent not allowed on current node",
instance1->id, instance2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("Assign %s before %s: parent allowed on current node",
instance1->id, instance2->id);
return -1;
}
// Prefer instance whose current node is running fewer other instances
if (node1->count < node2->count) {
crm_trace("Assign %s before %s: fewer active instances on current node",
instance1->id, instance2->id);
return -1;
} else if (node1->count > node2->count) {
crm_trace("Assign %s after %s: more active instances on current node",
instance1->id, instance2->id);
return 1;
}
// Prefer failed instance
can1 = did_fail(instance1);
can2 = did_fail(instance2);
if (!can1 && can2) {
crm_trace("Assign %s before %s: failed", instance1->id, instance2->id);
return -1;
} else if (can1 && !can2) {
crm_trace("Assign %s after %s: not failed",
instance1->id, instance2->id);
return 1;
}
// Prefer instance with higher cumulative colocation score on current node
rc = cmp_instance_by_colocation(instance1, instance2);
if (rc != 0) {
return rc;
}
// Prefer instance with lower instance number
rc = pcmk__cmp_instance_number(instance1, instance2);
if (rc < 0) {
crm_trace("Assign %s before %s: instance number",
instance1->id, instance2->id);
} else if (rc > 0) {
crm_trace("Assign %s after %s: instance number",
instance1->id, instance2->id);
} else {
crm_trace("No assignment preference for %s vs. %s",
instance1->id, instance2->id);
}
return rc;
}

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jan 25, 6:17 AM (1 d, 1 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1320577
Default Alt Text
(123 KB)

Event Timeline