Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h
index e32064821e..02b012a05b 100644
--- a/lib/pacemaker/libpacemaker_private.h
+++ b/lib/pacemaker/libpacemaker_private.h
@@ -1,1159 +1,1162 @@
/*
* Copyright 2021-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__PACEMAKER_LIBPACEMAKER_PRIVATE__H
#define PCMK__PACEMAKER_LIBPACEMAKER_PRIVATE__H
/* This header is for the sole use of libpacemaker, so that functions can be
* declared with G_GNUC_INTERNAL for efficiency.
*/
#include <stdio.h> // NULL
#include <stdint.h> // uint32_t
#include <stdbool.h> // bool, false
#include <glib.h> // guint, gpointer, GList, GHashTable
#include <libxml/tree.h> // xmlNode
#include <crm/common/scheduler.h> // pcmk_action_t, pcmk_node_t, etc.
#include <crm/common/scheduler_internal.h> // pcmk__location_t, etc.
#include <crm/cib.h> // cib_t
#include <crm/lrmd_events.h> // lrmd_event_data_t
#include <crm/pengine/internal.h> // pe__const_top_resource(), etc.
#include <pacemaker.h> // pcmk_injections_t
#include <pacemaker-internal.h> // pcmk__colocation_t
#ifdef __cplusplus
extern "C" {
#endif
// Colocation flags
enum pcmk__coloc_flags {
pcmk__coloc_none = 0U,
// Primary is affected even if already active
pcmk__coloc_influence = (1U << 0),
// Colocation was explicitly configured in CIB
pcmk__coloc_explicit = (1U << 1),
};
// Flags to modify the behavior of add_colocated_node_scores()
enum pcmk__coloc_select {
// With no other flags, apply all "with this" colocations
pcmk__coloc_select_default = 0,
// Apply "this with" colocations instead of "with this" colocations
pcmk__coloc_select_this_with = (1 << 0),
// Apply only colocations with non-negative scores
pcmk__coloc_select_nonnegative = (1 << 1),
// Apply only colocations with at least one matching node
pcmk__coloc_select_active = (1 << 2),
};
// Flags the update_ordered_actions() method can return
enum pcmk__updated {
pcmk__updated_none = 0, // Nothing changed
pcmk__updated_first = (1 << 0), // First action was updated
pcmk__updated_then = (1 << 1), // Then action was updated
};
#define pcmk__set_updated_flags(au_flags, action, flags_to_set) do { \
au_flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Action update", \
(action)->uuid, au_flags, \
(flags_to_set), #flags_to_set); \
} while (0)
#define pcmk__clear_updated_flags(au_flags, action, flags_to_clear) do { \
au_flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Action update", \
(action)->uuid, au_flags, \
(flags_to_clear), #flags_to_clear); \
} while (0)
// Resource assignment methods
struct pcmk__assignment_methods {
/*!
* \internal
* \brief Assign a resource to a node
*
* \param[in,out] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
* \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a
* node, set next role to stopped and update
* existing actions (if \p rsc is not a
* primitive, this applies to its primitive
* descendants instead)
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource()
* can completely undo the assignment. A successful assignment can be
* either undone or left alone as final. A failed assignment has the
* same effect as calling pcmk__unassign_resource(); there are no side
* effects on roles or actions.
*/
pcmk_node_t *(*assign)(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
bool stop_if_fail);
/*!
* \internal
* \brief Create all actions needed for a given resource
*
* \param[in,out] rsc Resource to create actions for
*/
void (*create_actions)(pcmk_resource_t *rsc);
/*!
* \internal
* \brief Schedule any probes needed for a resource on a node
*
* \param[in,out] rsc Resource to create probe for
* \param[in,out] node Node to create probe on
*
* \return true if any probe was created, otherwise false
*/
bool (*create_probe)(pcmk_resource_t *rsc, pcmk_node_t *node);
/*!
* \internal
* \brief Create implicit constraints needed for a resource
*
* \param[in,out] rsc Resource to create implicit constraints for
*/
void (*internal_constraints)(pcmk_resource_t *rsc);
/*!
* \internal
* \brief Apply a colocation's score to node scores or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node scores (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*
* \return The score added to the dependent's priority
*/
int (*apply_coloc_score)(pcmk_resource_t *dependent,
const pcmk_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent);
/*!
* \internal
* \brief Create list of all resources in colocations with a given resource
*
* Given a resource, create a list of all resources involved in mandatory
* colocations with it, whether directly or via chained colocations.
*
* \param[in] rsc Resource to add to colocated list
* \param[in] orig_rsc Resource originally requested
* \param[in,out] colocated_rscs Existing list
*
* \return List of given resource and all resources involved in colocations
*
* \note This function is recursive; top-level callers should pass NULL as
* \p colocated_rscs and \p orig_rsc, and the desired resource as
* \p rsc. The recursive calls will use other values.
*/
GList *(*colocated_resources)(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList *colocated_rscs);
/*!
* \internal
* \brief Add colocations affecting a resource as primary to a list
*
* Given a resource being assigned (\p orig_rsc) and a resource somewhere in
* its chain of ancestors (\p rsc, which may be \p orig_rsc), get
* colocations that affect the ancestor as primary and should affect the
* resource, and add them to a given list.
*
* \param[in] rsc Resource whose colocations should be added
* \param[in] orig_rsc Affected resource (\p rsc or a descendant)
* \param[in,out] list List of colocations to add to
*
* \note All arguments should be non-NULL.
* \note The pcmk__with_this_colocations() wrapper should usually be used
* instead of using this method directly.
*/
void (*with_this_colocations)(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList **list);
/*!
* \internal
* \brief Add colocations affecting a resource as dependent to a list
*
* Given a resource being assigned (\p orig_rsc) and a resource somewhere in
* its chain of ancestors (\p rsc, which may be \p orig_rsc), get
* colocations that affect the ancestor as dependent and should affect the
* resource, and add them to a given list.
*
*
* \param[in] rsc Resource whose colocations should be added
* \param[in] orig_rsc Affected resource (\p rsc or a descendant)
* \param[in,out] list List of colocations to add to
*
* \note All arguments should be non-NULL.
* \note The pcmk__this_with_colocations() wrapper should usually be used
* instead of using this method directly.
*/
void (*this_with_colocations)(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList **list);
/*!
* \internal
* \brief Update nodes with scores of colocated resources' nodes
*
* Given a table of nodes and a resource, update the nodes' scores with the
* scores of the best nodes matching the attribute used for each of the
* resource's relevant colocations.
*
* \param[in,out] source_rsc Resource whose node scores to add
* \param[in] target_rsc Resource on whose behalf to update \p *nodes
* \param[in] log_id Resource ID for logs (if \c NULL, use
* \p source_rsc ID)
* \param[in,out] nodes Nodes to update (set initial contents to
* \c NULL to copy allowed nodes from
* \p source_rsc)
* \param[in] colocation Original colocation constraint (used to get
* configured primary resource's stickiness, and
* to get colocation node attribute; if \c NULL,
* <tt>source_rsc</tt>'s own matching node scores
* will not be added, and \p *nodes must be
* \c NULL as well)
* \param[in] factor Incorporate scores multiplied by this factor
* \param[in] flags Bitmask of enum pcmk__coloc_select values
*
* \note \c NULL \p target_rsc, \c NULL \p *nodes, \c NULL \p colocation,
* and the \c pcmk__coloc_select_this_with flag are used together (and
* only by \c cmp_resources()).
* \note The caller remains responsible for freeing \p *nodes.
*/
void (*add_colocated_node_scores)(pcmk_resource_t *source_rsc,
const pcmk_resource_t *target_rsc,
const char *log_id, GHashTable **nodes,
const pcmk__colocation_t *colocation,
float factor, uint32_t flags);
/*!
* \internal
* \brief Apply a location constraint to a resource's allowed node scores
*
* \param[in,out] rsc Resource to apply constraint to
* \param[in,out] location Location constraint to apply
*/
void (*apply_location)(pcmk_resource_t *rsc, pcmk__location_t *location);
/*!
* \internal
* \brief Return action flags for a given resource action
*
* \param[in,out] action Action to get flags for
* \param[in] node If not NULL, limit effects to this node
*
* \return Flags appropriate to \p action on \p node
* \note For primitives, this will be the same as action->flags regardless
* of node. For collective resources, the flags can differ due to
* multiple instances possibly being involved.
*/
uint32_t (*action_flags)(pcmk_action_t *action, const pcmk_node_t *node);
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two actions, update the actions'
* flags (and runnable_before members if appropriate) as appropriate for the
* ordering. Effects may cascade to other orderings involving the actions as
* well.
*
* \param[in,out] first 'First' action in an ordering
* \param[in,out] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this
* node (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates
* (may include pcmk__action_optional to affect
* only mandatory actions and
* pcmk__action_runnable to affect only runnable
* actions)
* \param[in] type Group of enum pcmk__action_relation_flags
* \param[in,out] scheduler Scheduler data
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t (*update_ordered_actions)(pcmk_action_t *first,
pcmk_action_t *then,
const pcmk_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pcmk_scheduler_t *scheduler);
/*!
* \internal
* \brief Output a summary of scheduled actions for a resource
*
* \param[in,out] rsc Resource to output actions for
*/
void (*output_actions)(pcmk_resource_t *rsc);
/*!
* \internal
* \brief Add a resource's actions to the transition graph
*
* \param[in,out] rsc Resource whose actions should be added
*/
void (*add_actions_to_graph)(pcmk_resource_t *rsc);
/*!
* \internal
* \brief Add meta-attributes relevant to transition graph actions to XML
*
* If a given resource supports variant-specific meta-attributes that are
* needed for transition graph actions, add them to a given XML element.
*
* \param[in] rsc Resource whose meta-attributes should be added
* \param[in,out] xml Transition graph action attributes XML to add to
*/
void (*add_graph_meta)(const pcmk_resource_t *rsc, xmlNode *xml);
/*!
* \internal
* \brief Add a resource's utilization to a table of utilization values
*
* This function is used when summing the utilization of a resource and all
* resources colocated with it, to determine whether a node has sufficient
* capacity. Given a resource and a table of utilization values, it will add
* the resource's utilization to the existing values, if the resource has
* not yet been assigned to a node.
*
* \param[in] rsc Resource with utilization to add
* \param[in] orig_rsc Resource being assigned (for logging only)
* \param[in] all_rscs List of all resources that will be summed
* \param[in,out] utilization Table of utilization values to add to
*/
void (*add_utilization)(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization);
/*!
* \internal
* \brief Apply a shutdown lock for a resource, if appropriate
*
* \param[in,out] rsc Resource to check for shutdown lock
*/
void (*shutdown_lock)(pcmk_resource_t *rsc);
};
// Actions (pcmk_sched_actions.c)
G_GNUC_INTERNAL
void pcmk__update_action_for_orderings(pcmk_action_t *action,
pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
uint32_t pcmk__update_ordered_actions(pcmk_action_t *first, pcmk_action_t *then,
const pcmk_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
void pcmk__log_action(const char *pre_text, const pcmk_action_t *action,
bool details);
G_GNUC_INTERNAL
pcmk_action_t *pcmk__new_cancel_action(pcmk_resource_t *rsc, const char *name,
guint interval_ms,
const pcmk_node_t *node);
G_GNUC_INTERNAL
pcmk_action_t *pcmk__new_shutdown_action(pcmk_node_t *node);
G_GNUC_INTERNAL
bool pcmk__action_locks_rsc_to_node(const pcmk_action_t *action);
G_GNUC_INTERNAL
void pcmk__deduplicate_action_inputs(pcmk_action_t *action);
G_GNUC_INTERNAL
void pcmk__output_actions(pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
bool pcmk__check_action_config(pcmk_resource_t *rsc, pcmk_node_t *node,
const xmlNode *xml_op);
G_GNUC_INTERNAL
void pcmk__handle_rsc_config_changes(pcmk_scheduler_t *scheduler);
// Recurring actions (pcmk_sched_recurring.c)
G_GNUC_INTERNAL
void pcmk__create_recurring_actions(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__schedule_cancel(pcmk_resource_t *rsc, const char *call_id,
const char *task, guint interval_ms,
const pcmk_node_t *node, const char *reason);
G_GNUC_INTERNAL
void pcmk__reschedule_recurring(pcmk_resource_t *rsc, const char *task,
guint interval_ms, pcmk_node_t *node);
G_GNUC_INTERNAL
bool pcmk__action_is_recurring(const pcmk_action_t *action);
// Producing transition graphs (pcmk_graph_producer.c)
G_GNUC_INTERNAL
bool pcmk__graph_has_loop(const pcmk_action_t *init_action,
const pcmk_action_t *action,
pcmk__related_action_t *input);
G_GNUC_INTERNAL
void pcmk__add_rsc_actions_to_graph(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__create_graph(pcmk_scheduler_t *scheduler);
// Fencing (pcmk_sched_fencing.c)
G_GNUC_INTERNAL
void pcmk__order_vs_fence(pcmk_action_t *stonith_op,
pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
void pcmk__order_vs_unfence(const pcmk_resource_t *rsc, pcmk_node_t *node,
pcmk_action_t *action,
enum pcmk__action_relation_flags order);
G_GNUC_INTERNAL
void pcmk__fence_guest(pcmk_node_t *node);
G_GNUC_INTERNAL
bool pcmk__node_unfenced(const pcmk_node_t *node);
G_GNUC_INTERNAL
void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data);
// Injected scheduler inputs (pcmk_sched_injections.c)
G_GNUC_INTERNAL
void pcmk__inject_scheduler_input(pcmk_scheduler_t *scheduler, cib_t *cib,
const pcmk_injections_t *injections);
// Constraints of any type (pcmk_sched_constraints.c)
G_GNUC_INTERNAL
pcmk_resource_t *pcmk__find_constraint_resource(GList *rsc_list,
const char *id);
G_GNUC_INTERNAL
int pcmk__parse_constraint_role(const char *id, const char *role_spec,
enum rsc_role_e *role);
G_GNUC_INTERNAL
xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj,
const pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
bool pcmk__valid_resource_or_tag(const pcmk_scheduler_t *scheduler,
const char *id, pcmk_resource_t **rsc,
pcmk__idref_t **tag);
G_GNUC_INTERNAL
bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr,
bool convert_rsc, const pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
void pcmk__create_internal_constraints(pcmk_scheduler_t *scheduler);
// Location constraints
G_GNUC_INTERNAL
void pcmk__unpack_location(xmlNode *xml_obj, pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
pcmk__location_t *pcmk__new_location(const char *id, pcmk_resource_t *rsc,
int node_score, const char *discover_mode,
pcmk_node_t *foo_node);
G_GNUC_INTERNAL
void pcmk__apply_locations(pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
void pcmk__apply_location(pcmk_resource_t *rsc, pcmk__location_t *constraint);
// Colocation constraints (pcmk_sched_colocation.c)
enum pcmk__coloc_affects {
pcmk__coloc_affects_nothing = 0,
pcmk__coloc_affects_location,
pcmk__coloc_affects_role,
};
G_GNUC_INTERNAL
const char *pcmk__colocation_node_attr(const pcmk_node_t *node,
const char *attr,
const pcmk_resource_t *rsc);
G_GNUC_INTERNAL
enum pcmk__coloc_affects pcmk__colocation_affects(const pcmk_resource_t
*dependent,
const pcmk_resource_t
*primary,
const pcmk__colocation_t
*colocation,
bool preview);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_scores(pcmk_resource_t *dependent,
const pcmk_resource_t *primary,
const pcmk__colocation_t *colocation);
G_GNUC_INTERNAL
int pcmk__apply_coloc_to_priority(pcmk_resource_t *dependent,
const pcmk_resource_t *primary,
const pcmk__colocation_t *colocation);
G_GNUC_INTERNAL
void pcmk__add_colocated_node_scores(pcmk_resource_t *source_rsc,
const pcmk_resource_t *target_rsc,
const char *log_id, GHashTable **nodes,
const pcmk__colocation_t *colocation,
float factor, uint32_t flags);
G_GNUC_INTERNAL
void pcmk__add_dependent_scores(gpointer data, gpointer user_data);
G_GNUC_INTERNAL
void pcmk__colocation_intersect_nodes(pcmk_resource_t *dependent,
const pcmk_resource_t *primary,
const pcmk__colocation_t *colocation,
const GList *primary_nodes,
bool merge_scores);
G_GNUC_INTERNAL
void pcmk__unpack_colocation(xmlNode *xml_obj, pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
void pcmk__add_this_with(GList **list, const pcmk__colocation_t *colocation,
const pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__add_this_with_list(GList **list, GList *addition,
const pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__add_with_this(GList **list, const pcmk__colocation_t *colocation,
const pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__add_with_this_list(GList **list, GList *addition,
const pcmk_resource_t *rsc);
G_GNUC_INTERNAL
GList *pcmk__with_this_colocations(const pcmk_resource_t *rsc);
G_GNUC_INTERNAL
GList *pcmk__this_with_colocations(const pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__new_colocation(const char *id, const char *node_attr, int score,
pcmk_resource_t *dependent, pcmk_resource_t *primary,
const char *dependent_role_spec,
const char *primary_role_spec, uint32_t flags);
G_GNUC_INTERNAL
void pcmk__block_colocation_dependents(pcmk_action_t *action);
G_GNUC_INTERNAL
bool pcmk__colocation_has_influence(const pcmk__colocation_t *colocation,
const pcmk_resource_t *rsc);
// Ordering constraints (pcmk_sched_ordering.c)
G_GNUC_INTERNAL
void pcmk__new_ordering(pcmk_resource_t *first_rsc, char *first_task,
pcmk_action_t *first_action, pcmk_resource_t *then_rsc,
char *then_task, pcmk_action_t *then_action,
uint32_t flags, pcmk_scheduler_t *sched);
G_GNUC_INTERNAL
void pcmk__unpack_ordering(xmlNode *xml_obj, pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
void pcmk__disable_invalid_orderings(pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
void pcmk__order_stops_before_shutdown(pcmk_node_t *node,
pcmk_action_t *shutdown_op);
G_GNUC_INTERNAL
void pcmk__apply_orderings(pcmk_scheduler_t *sched);
G_GNUC_INTERNAL
void pcmk__order_after_each(pcmk_action_t *after, GList *list);
/*!
* \internal
* \brief Create a new ordering between two resource actions
*
* \param[in,out] first_rsc Resource for 'first' action
* \param[in,out] first_task Action key for 'first' action
* \param[in] then_rsc Resource for 'then' action
* \param[in,out] then_task Action key for 'then' action
* \param[in] flags Group of enum pcmk__action_relation_flags
*/
#define pcmk__order_resource_actions(first_rsc, first_task, \
then_rsc, then_task, flags) \
pcmk__new_ordering((first_rsc), \
pcmk__op_key((first_rsc)->id, (first_task), 0), \
NULL, \
(then_rsc), \
pcmk__op_key((then_rsc)->id, (then_task), 0), \
NULL, (flags), (first_rsc)->priv->scheduler)
#define pcmk__order_starts(rsc1, rsc2, flags) \
pcmk__order_resource_actions((rsc1), PCMK_ACTION_START, \
(rsc2), PCMK_ACTION_START, (flags))
#define pcmk__order_stops(rsc1, rsc2, flags) \
pcmk__order_resource_actions((rsc1), PCMK_ACTION_STOP, \
(rsc2), PCMK_ACTION_STOP, (flags))
// Ticket constraints (pcmk_sched_tickets.c)
G_GNUC_INTERNAL
void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pcmk_scheduler_t *scheduler);
// Promotable clone resources (pcmk_sched_promotable.c)
G_GNUC_INTERNAL
void pcmk__add_promotion_scores(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__require_promotion_tickets(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__set_instance_roles(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__create_promotable_actions(pcmk_resource_t *clone);
G_GNUC_INTERNAL
void pcmk__promotable_restart_ordering(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__order_promotable_instances(pcmk_resource_t *clone);
G_GNUC_INTERNAL
void pcmk__update_dependent_with_promotable(const pcmk_resource_t *primary,
pcmk_resource_t *dependent,
const pcmk__colocation_t
*colocation);
G_GNUC_INTERNAL
int pcmk__update_promotable_dependent_priority(const pcmk_resource_t *primary,
pcmk_resource_t *dependent,
const pcmk__colocation_t
*colocation);
// Pacemaker Remote nodes (pcmk_sched_remote.c)
G_GNUC_INTERNAL
bool pcmk__is_failed_remote_node(const pcmk_node_t *node);
G_GNUC_INTERNAL
void pcmk__order_remote_connection_actions(pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
bool pcmk__rsc_corresponds_to_guest(const pcmk_resource_t *rsc,
const pcmk_node_t *node);
G_GNUC_INTERNAL
pcmk_node_t *pcmk__connection_host_for_action(const pcmk_action_t *action);
G_GNUC_INTERNAL
void pcmk__substitute_remote_addr(pcmk_resource_t *rsc, GHashTable *params);
G_GNUC_INTERNAL
void pcmk__add_guest_meta_to_xml(xmlNode *args_xml,
const pcmk_action_t *action);
// Primitives (pcmk_sched_primitive.c)
G_GNUC_INTERNAL
pcmk_node_t *pcmk__primitive_assign(pcmk_resource_t *rsc,
const pcmk_node_t *prefer,
bool stop_if_fail);
G_GNUC_INTERNAL
void pcmk__primitive_create_actions(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__primitive_internal_constraints(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
uint32_t pcmk__primitive_action_flags(pcmk_action_t *action,
const pcmk_node_t *node);
G_GNUC_INTERNAL
int pcmk__primitive_apply_coloc_score(pcmk_resource_t *dependent,
const pcmk_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent);
G_GNUC_INTERNAL
void pcmk__with_primitive_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList **list);
G_GNUC_INTERNAL
void pcmk__primitive_with_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList **list);
G_GNUC_INTERNAL
void pcmk__schedule_cleanup(pcmk_resource_t *rsc, const pcmk_node_t *node,
bool optional);
G_GNUC_INTERNAL
void pcmk__primitive_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml);
G_GNUC_INTERNAL
void pcmk__primitive_add_utilization(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
G_GNUC_INTERNAL
void pcmk__primitive_shutdown_lock(pcmk_resource_t *rsc);
// Groups (pcmk_sched_group.c)
G_GNUC_INTERNAL
pcmk_node_t *pcmk__group_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
bool stop_if_fail);
G_GNUC_INTERNAL
void pcmk__group_create_actions(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__group_internal_constraints(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
int pcmk__group_apply_coloc_score(pcmk_resource_t *dependent,
const pcmk_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent);
G_GNUC_INTERNAL
void pcmk__with_group_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList **list);
G_GNUC_INTERNAL
void pcmk__group_with_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList **list);
G_GNUC_INTERNAL
void pcmk__group_add_colocated_node_scores(pcmk_resource_t *source_rsc,
const pcmk_resource_t *target_rsc,
const char *log_id,
GHashTable **nodes,
const pcmk__colocation_t *colocation,
float factor, uint32_t flags);
G_GNUC_INTERNAL
void pcmk__group_apply_location(pcmk_resource_t *rsc,
pcmk__location_t *location);
G_GNUC_INTERNAL
uint32_t pcmk__group_action_flags(pcmk_action_t *action,
const pcmk_node_t *node);
G_GNUC_INTERNAL
uint32_t pcmk__group_update_ordered_actions(pcmk_action_t *first,
pcmk_action_t *then,
const pcmk_node_t *node,
uint32_t flags, uint32_t filter,
uint32_t type,
pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
GList *pcmk__group_colocated_resources(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList *colocated_rscs);
G_GNUC_INTERNAL
void pcmk__group_add_utilization(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
G_GNUC_INTERNAL
void pcmk__group_shutdown_lock(pcmk_resource_t *rsc);
// Clones (pcmk_sched_clone.c)
G_GNUC_INTERNAL
pcmk_node_t *pcmk__clone_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
bool stop_if_fail);
G_GNUC_INTERNAL
void pcmk__clone_create_actions(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__clone_create_probe(pcmk_resource_t *rsc, pcmk_node_t *node);
G_GNUC_INTERNAL
void pcmk__clone_internal_constraints(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
int pcmk__clone_apply_coloc_score(pcmk_resource_t *dependent,
const pcmk_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent);
G_GNUC_INTERNAL
void pcmk__with_clone_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList **list);
G_GNUC_INTERNAL
void pcmk__clone_with_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList **list);
G_GNUC_INTERNAL
void pcmk__clone_apply_location(pcmk_resource_t *rsc,
pcmk__location_t *constraint);
G_GNUC_INTERNAL
uint32_t pcmk__clone_action_flags(pcmk_action_t *action,
const pcmk_node_t *node);
G_GNUC_INTERNAL
void pcmk__clone_add_actions_to_graph(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__clone_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml);
G_GNUC_INTERNAL
void pcmk__clone_add_utilization(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
G_GNUC_INTERNAL
void pcmk__clone_shutdown_lock(pcmk_resource_t *rsc);
// Bundles (pcmk_sched_bundle.c)
G_GNUC_INTERNAL
pcmk_node_t *pcmk__bundle_assign(pcmk_resource_t *rsc,
const pcmk_node_t *prefer, bool stop_if_fail);
G_GNUC_INTERNAL
void pcmk__bundle_create_actions(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__bundle_create_probe(pcmk_resource_t *rsc, pcmk_node_t *node);
G_GNUC_INTERNAL
void pcmk__bundle_internal_constraints(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
int pcmk__bundle_apply_coloc_score(pcmk_resource_t *dependent,
const pcmk_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent);
G_GNUC_INTERNAL
void pcmk__with_bundle_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList **list);
G_GNUC_INTERNAL
void pcmk__bundle_with_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList **list);
G_GNUC_INTERNAL
void pcmk__bundle_apply_location(pcmk_resource_t *rsc,
pcmk__location_t *constraint);
G_GNUC_INTERNAL
uint32_t pcmk__bundle_action_flags(pcmk_action_t *action,
const pcmk_node_t *node);
G_GNUC_INTERNAL
void pcmk__output_bundle_actions(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__bundle_add_actions_to_graph(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__bundle_add_utilization(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
G_GNUC_INTERNAL
void pcmk__bundle_shutdown_lock(pcmk_resource_t *rsc);
// Clone instances or bundle replica containers (pcmk_sched_instances.c)
G_GNUC_INTERNAL
void pcmk__assign_instances(pcmk_resource_t *collective, GList *instances,
int max_total, int max_per_node);
G_GNUC_INTERNAL
void pcmk__create_instance_actions(pcmk_resource_t *rsc, GList *instances);
G_GNUC_INTERNAL
bool pcmk__instance_matches(const pcmk_resource_t *instance,
const pcmk_node_t *node, enum rsc_role_e role,
bool current);
G_GNUC_INTERNAL
pcmk_resource_t *pcmk__find_compatible_instance(const pcmk_resource_t *match_rsc,
const pcmk_resource_t *rsc,
enum rsc_role_e role,
bool current);
G_GNUC_INTERNAL
uint32_t pcmk__instance_update_ordered_actions(pcmk_action_t *first,
pcmk_action_t *then,
const pcmk_node_t *node,
uint32_t flags, uint32_t filter,
uint32_t type,
pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
uint32_t pcmk__collective_action_flags(pcmk_action_t *action,
const GList *instances,
const pcmk_node_t *node);
// Injections (pcmk_injections.c)
G_GNUC_INTERNAL
xmlNode *pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_node_state_change(cib_t *cib_conn, const char *node,
bool up);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node,
const char *resource,
const char *lrm_name,
const char *rclass,
const char *rtype,
const char *rprovider);
G_GNUC_INTERNAL
void pcmk__inject_failcount(pcmk__output_t *out, cib_t *cib_conn,
xmlNode *cib_node, const char *resource,
const char *task, guint interval_ms, int rc,
bool infinity);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_action_result(xmlNode *cib_resource,
lrmd_event_data_t *op, const char *node,
int target_rc);
// Nodes (pcmk_sched_nodes.c)
//! Options for checking node availability
enum pcmk__node_availability {
//! Disallow offline or unclean nodes (always implied)
pcmk__node_alive = 0,
//! Disallow shutting down, standby, and maintenance nodes
pcmk__node_usable = (1 << 0),
+ //! Disallow nodes with zero score
+ pcmk__node_no_zero = (1 << 1),
+
//! Disallow nodes with negative scores
pcmk__node_no_negative = (1 << 2),
//! Disallow guest nodes whose guest resource is unrunnable
pcmk__node_no_unrunnable_guest = (1 << 4),
//! Exempt guest nodes from alive and usable checks
pcmk__node_exempt_guest = (1 << 5),
};
G_GNUC_INTERNAL
bool pcmk__node_available(const pcmk_node_t *node, uint32_t flags);
G_GNUC_INTERNAL
bool pcmk__any_node_available(GHashTable *nodes);
G_GNUC_INTERNAL
GHashTable *pcmk__copy_node_table(GHashTable *nodes);
G_GNUC_INTERNAL
void pcmk__copy_node_tables(const pcmk_resource_t *rsc, GHashTable **copy);
G_GNUC_INTERNAL
void pcmk__restore_node_tables(pcmk_resource_t *rsc, GHashTable *backup);
G_GNUC_INTERNAL
GList *pcmk__sort_nodes(GList *nodes, pcmk_node_t *active_node);
G_GNUC_INTERNAL
void pcmk__apply_node_health(pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
pcmk_node_t *pcmk__top_allowed_node(const pcmk_resource_t *rsc,
const pcmk_node_t *node);
// Functions applying to more than one variant (pcmk_sched_resource.c)
G_GNUC_INTERNAL
void pcmk__set_assignment_methods(pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
bool pcmk__rsc_agent_changed(pcmk_resource_t *rsc, pcmk_node_t *node,
const xmlNode *rsc_entry, bool active_on_node);
G_GNUC_INTERNAL
GList *pcmk__rscs_matching_id(const char *id,
const pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
GList *pcmk__colocated_resources(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList *colocated_rscs);
G_GNUC_INTERNAL
void pcmk__noop_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml);
G_GNUC_INTERNAL
void pcmk__output_resource_actions(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__assign_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool force,
bool stop_if_fail);
G_GNUC_INTERNAL
void pcmk__unassign_resource(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__threshold_reached(pcmk_resource_t *rsc, const pcmk_node_t *node,
pcmk_resource_t **failed);
G_GNUC_INTERNAL
void pcmk__sort_resources(pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
gint pcmk__cmp_instance(gconstpointer a, gconstpointer b);
G_GNUC_INTERNAL
gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b);
// Functions related to probes (pcmk_sched_probes.c)
G_GNUC_INTERNAL
bool pcmk__probe_rsc_on_node(pcmk_resource_t *rsc, pcmk_node_t *node);
G_GNUC_INTERNAL
void pcmk__order_probes(pcmk_scheduler_t *scheduler);
G_GNUC_INTERNAL
bool pcmk__probe_resource_list(GList *rscs, pcmk_node_t *node);
G_GNUC_INTERNAL
void pcmk__schedule_probes(pcmk_scheduler_t *scheduler);
// Functions related to live migration (pcmk_sched_migration.c)
void pcmk__create_migration_actions(pcmk_resource_t *rsc,
const pcmk_node_t *current);
void pcmk__abort_dangling_migration(void *data, void *user_data);
bool pcmk__rsc_can_migrate(const pcmk_resource_t *rsc,
const pcmk_node_t *current);
void pcmk__order_migration_equivalents(pcmk__action_relation_t *order);
// Functions related to node utilization (pcmk_sched_utilization.c)
G_GNUC_INTERNAL
int pcmk__compare_node_capacities(const pcmk_node_t *node1,
const pcmk_node_t *node2);
G_GNUC_INTERNAL
void pcmk__consume_node_capacity(GHashTable *current_utilization,
const pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__release_node_capacity(GHashTable *current_utilization,
const pcmk_resource_t *rsc);
G_GNUC_INTERNAL
const pcmk_node_t *pcmk__ban_insufficient_capacity(pcmk_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__create_utilization_constraints(pcmk_resource_t *rsc,
const GList *allowed_nodes);
G_GNUC_INTERNAL
void pcmk__show_node_capacities(const char *desc, pcmk_scheduler_t *scheduler);
// Functions related to the scheduler (pcmk_scheduler.c)
G_GNUC_INTERNAL
int pcmk__init_scheduler(pcmk__output_t *out, xmlNodePtr input, const crm_time_t *date,
pcmk_scheduler_t **scheduler);
// General setup functions (pcmk_setup.c)
G_GNUC_INTERNAL
int pcmk__setup_output_cib_sched(pcmk__output_t **out, cib_t **cib,
pcmk_scheduler_t **scheduler, xmlNode **xml);
G_GNUC_INTERNAL
int pcmk__setup_output_fencing(pcmk__output_t **out, stonith_t **st, xmlNode **xml);
#ifdef __cplusplus
}
#endif
#endif // PCMK__PACEMAKER_LIBPACEMAKER_PRIVATE__H
diff --git a/lib/pacemaker/pcmk_sched_nodes.c b/lib/pacemaker/pcmk_sched_nodes.c
index cdeb13a1aa..73a845785e 100644
--- a/lib/pacemaker/pcmk_sched_nodes.c
+++ b/lib/pacemaker/pcmk_sched_nodes.c
@@ -1,459 +1,463 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <pacemaker-internal.h>
#include <pacemaker.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Check whether a node is available to run resources
*
* \param[in] node Node to check
* \param[in] flags Group of enum pcmk__node_availability flags
*
* \return true if node is available per flags, otherwise false
*/
bool
pcmk__node_available(const pcmk_node_t *node, uint32_t flags)
{
if ((node == NULL) || (node->details == NULL)) {
return false; // A nonexistent node is not available
}
// Guest nodes may be exempted from alive and usable checks
if (!pcmk_is_set(flags, pcmk__node_exempt_guest)
|| !pcmk__is_guest_or_bundle_node(node)) {
// pcmk__node_alive is implicit
if (!node->details->online || node->details->unclean) {
return false;
}
if (pcmk_is_set(flags, pcmk__node_usable)
&& (node->details->shutdown
|| pcmk_is_set(node->priv->flags, pcmk__node_standby)
|| node->details->maintenance)) {
return false;
}
}
+ if (pcmk_is_set(flags, pcmk__node_no_zero) && (node->assign->score == 0)) {
+ return false;
+ }
+
if (pcmk_is_set(flags, pcmk__node_no_negative)
&& (node->assign->score < 0)) {
return false;
}
if (pcmk_is_set(flags, pcmk__node_no_unrunnable_guest)
&& pcmk__is_guest_or_bundle_node(node)) {
pcmk_resource_t *guest = node->priv->remote->priv->launcher;
if (guest->priv->fns->location(guest, NULL,
pcmk__rsc_node_assigned) == NULL) {
return false;
}
}
return true;
}
/*!
* \internal
* \brief Create a hash table with copies of another table's nodes
*
* \param[in] nodes Hash table to copy
*
* \return New table with copies of nodes in \p nodes, or \c NULL if \p nodes is
* \c NULL
*/
GHashTable *
pcmk__copy_node_table(GHashTable *nodes)
{
GHashTable *new_table = NULL;
GHashTableIter iter;
pcmk_node_t *node = NULL;
if (nodes == NULL) {
return NULL;
}
new_table = pcmk__strkey_table(NULL, pcmk__free_node_copy);
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
pcmk_node_t *new_node = pe__copy_node(node);
g_hash_table_insert(new_table, (gpointer) new_node->priv->id,
new_node);
}
return new_table;
}
/*!
* \internal
* \brief Free a table of node tables
*
* \param[in,out] data Table to free
*
* \note This is a \c GDestroyNotify wrapper for \c g_hash_table_destroy().
*/
static void
destroy_node_tables(gpointer data)
{
g_hash_table_destroy((GHashTable *) data);
}
/*!
* \internal
* \brief Recursively copy the node tables of a resource
*
* Build a hash table containing copies of the allowed nodes tables of \p rsc
* and its entire tree of descendants. The key is the resource ID, and the value
* is a copy of the resource's node table.
*
* \param[in] rsc Resource whose node table to copy
* \param[in,out] copy Where to store the copied node tables
*
* \note \p *copy should be \c NULL for the top-level call.
* \note The caller is responsible for freeing \p copy using
* \c g_hash_table_destroy().
*/
void
pcmk__copy_node_tables(const pcmk_resource_t *rsc, GHashTable **copy)
{
pcmk__assert((rsc != NULL) && (copy != NULL));
if (*copy == NULL) {
*copy = pcmk__strkey_table(NULL, destroy_node_tables);
}
g_hash_table_insert(*copy, rsc->id,
pcmk__copy_node_table(rsc->priv->allowed_nodes));
for (const GList *iter = rsc->priv->children;
iter != NULL; iter = iter->next) {
pcmk__copy_node_tables((const pcmk_resource_t *) iter->data, copy);
}
}
/*!
* \internal
* \brief Recursively restore the node tables of a resource from backup
*
* Given a hash table containing backup copies of the allowed nodes tables of
* \p rsc and its entire tree of descendants, replace the resources' current
* node tables with the backed-up copies.
*
* \param[in,out] rsc Resource whose node tables to restore
* \param[in] backup Table of backup node tables (created by
* \c pcmk__copy_node_tables())
*
* \note This function frees the resources' current node tables.
*/
void
pcmk__restore_node_tables(pcmk_resource_t *rsc, GHashTable *backup)
{
pcmk__assert((rsc != NULL) && (backup != NULL));
g_hash_table_destroy(rsc->priv->allowed_nodes);
// Copy to avoid danger with multiple restores
rsc->priv->allowed_nodes =
pcmk__copy_node_table(g_hash_table_lookup(backup, rsc->id));
for (GList *iter = rsc->priv->children;
iter != NULL; iter = iter->next) {
pcmk__restore_node_tables((pcmk_resource_t *) iter->data, backup);
}
}
/*!
* \internal
* \brief Copy a list of node objects
*
* \param[in] list List to copy
* \param[in] reset Set copies' scores to 0
*
* \return New list of shallow copies of nodes in original list
*/
GList *
pcmk__copy_node_list(const GList *list, bool reset)
{
GList *result = NULL;
for (const GList *iter = list; iter != NULL; iter = iter->next) {
pcmk_node_t *new_node = NULL;
pcmk_node_t *this_node = iter->data;
new_node = pe__copy_node(this_node);
if (reset) {
new_node->assign->score = 0;
}
result = g_list_prepend(result, new_node);
}
return result;
}
/*!
* \internal
* \brief Compare two nodes for assignment preference
*
* Given two nodes, check which one is more preferred by assignment criteria
* such as node score and utilization.
*
* \param[in] a First node to compare
* \param[in] b Second node to compare
* \param[in] data Node to prefer if all else equal
*
* \return -1 if \p a is preferred, +1 if \p b is preferred, or 0 if they are
* equally preferred
*/
static gint
compare_nodes(gconstpointer a, gconstpointer b, gpointer data)
{
const pcmk_node_t *node1 = (const pcmk_node_t *) a;
const pcmk_node_t *node2 = (const pcmk_node_t *) b;
const pcmk_node_t *preferred = (const pcmk_node_t *) data;
int node1_score = -PCMK_SCORE_INFINITY;
int node2_score = -PCMK_SCORE_INFINITY;
int result = 0;
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
// Compare node scores
if (pcmk__node_available(node1, pcmk__node_alive|pcmk__node_usable)) {
node1_score = node1->assign->score;
}
if (pcmk__node_available(node2, pcmk__node_alive|pcmk__node_usable)) {
node2_score = node2->assign->score;
}
if (node1_score > node2_score) {
crm_trace("%s before %s (score %d > %d)",
pcmk__node_name(node1), pcmk__node_name(node2),
node1_score, node2_score);
return -1;
}
if (node1_score < node2_score) {
crm_trace("%s after %s (score %d < %d)",
pcmk__node_name(node1), pcmk__node_name(node2),
node1_score, node2_score);
return 1;
}
// If appropriate, compare node utilization
if (pcmk__str_eq(node1->priv->scheduler->priv->placement_strategy,
PCMK_VALUE_MINIMAL, pcmk__str_casei)) {
goto equal;
}
if (pcmk__str_eq(node1->priv->scheduler->priv->placement_strategy,
PCMK_VALUE_BALANCED, pcmk__str_casei)) {
result = pcmk__compare_node_capacities(node1, node2);
if (result < 0) {
crm_trace("%s before %s (greater capacity by %d attributes)",
pcmk__node_name(node1), pcmk__node_name(node2),
result * -1);
return -1;
} else if (result > 0) {
crm_trace("%s after %s (lower capacity by %d attributes)",
pcmk__node_name(node1), pcmk__node_name(node2), result);
return 1;
}
}
// Compare number of resources already assigned to node
if (node1->priv->num_resources < node2->priv->num_resources) {
crm_trace("%s before %s (%d resources < %d)",
pcmk__node_name(node1), pcmk__node_name(node2),
node1->priv->num_resources, node2->priv->num_resources);
return -1;
} else if (node1->priv->num_resources > node2->priv->num_resources) {
crm_trace("%s after %s (%d resources > %d)",
pcmk__node_name(node1), pcmk__node_name(node2),
node1->priv->num_resources, node2->priv->num_resources);
return 1;
}
// Check whether one node is already running desired resource
if (preferred != NULL) {
if (pcmk__same_node(preferred, node1)) {
crm_trace("%s before %s (preferred node)",
pcmk__node_name(node1), pcmk__node_name(node2));
return -1;
} else if (pcmk__same_node(preferred, node2)) {
crm_trace("%s after %s (not preferred node)",
pcmk__node_name(node1), pcmk__node_name(node2));
return 1;
}
}
// If all else is equal, prefer node with lowest-sorting name
equal:
result = strcmp(node1->priv->name, node2->priv->name);
if (result < 0) {
crm_trace("%s before %s (name)",
pcmk__node_name(node1), pcmk__node_name(node2));
return -1;
} else if (result > 0) {
crm_trace("%s after %s (name)",
pcmk__node_name(node1), pcmk__node_name(node2));
return 1;
}
crm_trace("%s == %s", pcmk__node_name(node1), pcmk__node_name(node2));
return 0;
}
/*!
* \internal
* \brief Sort a list of nodes by assigment preference
*
* \param[in,out] nodes Node list to sort
* \param[in] active_node Node where resource being assigned is active
*
* \return New head of sorted list
*/
GList *
pcmk__sort_nodes(GList *nodes, pcmk_node_t *active_node)
{
return g_list_sort_with_data(nodes, compare_nodes, active_node);
}
/*!
* \internal
* \brief Check whether any node is available to run resources
*
* \param[in] nodes Nodes to check
*
* \return true if any node in \p nodes is available to run resources,
* otherwise false
*/
bool
pcmk__any_node_available(GHashTable *nodes)
{
GHashTableIter iter;
const pcmk_node_t *node = NULL;
if (nodes == NULL) {
return false;
}
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (pcmk__node_available(node, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_negative)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Apply node health values for all nodes in cluster
*
* \param[in,out] scheduler Scheduler data
*/
void
pcmk__apply_node_health(pcmk_scheduler_t *scheduler)
{
int base_health = 0;
enum pcmk__health_strategy strategy;
const char *strategy_str =
pcmk__cluster_option(scheduler->priv->options,
PCMK_OPT_NODE_HEALTH_STRATEGY);
strategy = pcmk__parse_health_strategy(strategy_str);
if (strategy == pcmk__health_strategy_none) {
return;
}
crm_info("Applying node health strategy '%s'", strategy_str);
// The progressive strategy can use a base health score
if (strategy == pcmk__health_strategy_progressive) {
base_health = pcmk__health_score(PCMK_OPT_NODE_HEALTH_BASE, scheduler);
}
for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
int health = pe__sum_node_health_scores(node, base_health);
// An overall health score of 0 has no effect
if (health == 0) {
continue;
}
crm_info("Overall system health of %s is %d",
pcmk__node_name(node), health);
// Use node health as a location score for each resource on the node
for (GList *r = scheduler->priv->resources; r != NULL; r = r->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) r->data;
bool constrain = true;
if (health < 0) {
/* Negative health scores do not apply to resources with
* PCMK_META_ALLOW_UNHEALTHY_NODES=true.
*/
constrain = !crm_is_true(g_hash_table_lookup(rsc->priv->meta,
PCMK_META_ALLOW_UNHEALTHY_NODES));
}
if (constrain) {
pcmk__new_location(strategy_str, rsc, health, NULL, node);
} else {
pcmk__rsc_trace(rsc, "%s is immune from health ban on %s",
rsc->id, pcmk__node_name(node));
}
}
}
}
/*!
* \internal
* \brief Check for a node in a resource's parent's allowed nodes
*
* \param[in] rsc Resource whose parent should be checked
* \param[in] node Node to check for
*
* \return Equivalent of \p node from \p rsc's parent's allowed nodes if any,
* otherwise NULL
*/
pcmk_node_t *
pcmk__top_allowed_node(const pcmk_resource_t *rsc, const pcmk_node_t *node)
{
GHashTable *allowed_nodes = NULL;
if ((rsc == NULL) || (node == NULL)) {
return NULL;
}
if (rsc->priv->parent == NULL) {
allowed_nodes = rsc->priv->allowed_nodes;
} else {
allowed_nodes = rsc->priv->parent->priv->allowed_nodes;
}
return g_hash_table_lookup(allowed_nodes, node->priv->id);
}
diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c
index 4b7d11746e..a5b1a1b384 100644
--- a/lib/pacemaker/pcmk_sched_primitive.c
+++ b/lib/pacemaker/pcmk_sched_primitive.c
@@ -1,1725 +1,1726 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <stdint.h> // uint8_t, uint32_t
#include <crm/common/xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
static void stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
bool optional);
static void start_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
bool optional);
static void demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
bool optional);
static void promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
bool optional);
static void assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node,
bool optional);
#define RSC_ROLE_MAX (pcmk_role_promoted + 1)
static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* This array lists the immediate next role when transitioning from one role
* to a target role. For example, when going from Stopped to Promoted, the
* next role is Unpromoted, because the resource must be started before it
* can be promoted. The current state then becomes Started, which is fed
* into this array again, giving a next role of Promoted.
*
* Current role Immediate next role Final target role
* ------------ ------------------- -----------------
*/
/* Unknown */ { pcmk_role_unknown, /* Unknown */
pcmk_role_stopped, /* Stopped */
pcmk_role_stopped, /* Started */
pcmk_role_stopped, /* Unpromoted */
pcmk_role_stopped, /* Promoted */
},
/* Stopped */ { pcmk_role_stopped, /* Unknown */
pcmk_role_stopped, /* Stopped */
pcmk_role_started, /* Started */
pcmk_role_unpromoted, /* Unpromoted */
pcmk_role_unpromoted, /* Promoted */
},
/* Started */ { pcmk_role_stopped, /* Unknown */
pcmk_role_stopped, /* Stopped */
pcmk_role_started, /* Started */
pcmk_role_unpromoted, /* Unpromoted */
pcmk_role_promoted, /* Promoted */
},
/* Unpromoted */ { pcmk_role_stopped, /* Unknown */
pcmk_role_stopped, /* Stopped */
pcmk_role_stopped, /* Started */
pcmk_role_unpromoted, /* Unpromoted */
pcmk_role_promoted, /* Promoted */
},
/* Promoted */ { pcmk_role_stopped, /* Unknown */
pcmk_role_unpromoted, /* Stopped */
pcmk_role_unpromoted, /* Started */
pcmk_role_unpromoted, /* Unpromoted */
pcmk_role_promoted, /* Promoted */
},
};
/*!
* \internal
* \brief Function to schedule actions needed for a role change
*
* \param[in,out] rsc Resource whose role is changing
* \param[in,out] node Node where resource will be in its next role
* \param[in] optional Whether scheduled actions should be optional
*/
typedef void (*rsc_transition_fn)(pcmk_resource_t *rsc, pcmk_node_t *node,
bool optional);
static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* This array lists the function needed to transition directly from one role
* to another. NULL indicates that nothing is needed.
*
* Current role Transition function Next role
* ------------ ------------------- ----------
*/
/* Unknown */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
assert_role_error, /* Started */
assert_role_error, /* Unpromoted */
assert_role_error, /* Promoted */
},
/* Stopped */ { assert_role_error, /* Unknown */
NULL, /* Stopped */
start_resource, /* Started */
start_resource, /* Unpromoted */
assert_role_error, /* Promoted */
},
/* Started */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
NULL, /* Started */
NULL, /* Unpromoted */
promote_resource, /* Promoted */
},
/* Unpromoted */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
stop_resource, /* Started */
NULL, /* Unpromoted */
promote_resource, /* Promoted */
},
/* Promoted */ { assert_role_error, /* Unknown */
demote_resource, /* Stopped */
demote_resource, /* Started */
demote_resource, /* Unpromoted */
NULL, /* Promoted */
},
};
/*!
* \internal
* \brief Get a list of a resource's allowed nodes sorted by node score
*
* \param[in] rsc Resource to check
*
* \return List of allowed nodes sorted by node score
*/
static GList *
sorted_allowed_nodes(const pcmk_resource_t *rsc)
{
if (rsc->priv->allowed_nodes != NULL) {
GList *nodes = g_hash_table_get_values(rsc->priv->allowed_nodes);
if (nodes != NULL) {
return pcmk__sort_nodes(nodes, pcmk__current_node(rsc));
}
}
return NULL;
}
/*!
* \internal
* \brief Assign a resource to its best allowed node, if possible
*
* \param[in,out] rsc Resource to choose a node for
* \param[in] prefer If not \c NULL, prefer this node when all else
* equal
* \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a
* node, set next role to stopped and update
* existing actions
*
* \return true if \p rsc could be assigned to a node, otherwise false
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
static bool
assign_best_node(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
bool stop_if_fail)
{
GList *nodes = NULL;
pcmk_node_t *chosen = NULL;
pcmk_node_t *best = NULL;
const pcmk_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc);
if (prefer == NULL) {
prefer = most_free_node;
}
if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) {
// We've already finished assignment of resources to nodes
return rsc->priv->assigned_node != NULL;
}
// Sort allowed nodes by score
nodes = sorted_allowed_nodes(rsc);
if (nodes != NULL) {
best = (pcmk_node_t *) nodes->data; // First node has best score
}
if ((prefer != NULL) && (nodes != NULL)) {
// Get the allowed node version of prefer
chosen = g_hash_table_lookup(rsc->priv->allowed_nodes,
prefer->priv->id);
if (chosen == NULL) {
pcmk__rsc_trace(rsc, "Preferred node %s for %s was unknown",
pcmk__node_name(prefer), rsc->id);
/* Favor the preferred node as long as its score is at least as good as
* the best allowed node's.
*
* An alternative would be to favor the preferred node even if the best
* node is better, when the best node's score is less than INFINITY.
*/
} else if (chosen->assign->score < best->assign->score) {
pcmk__rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
pcmk__node_name(chosen), rsc->id);
chosen = NULL;
} else if (!pcmk__node_available(chosen, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_negative)) {
pcmk__rsc_trace(rsc, "Preferred node %s for %s was unavailable",
pcmk__node_name(chosen), rsc->id);
chosen = NULL;
} else {
pcmk__rsc_trace(rsc,
"Chose preferred node %s for %s "
"(ignoring %d candidates)",
pcmk__node_name(chosen), rsc->id,
g_list_length(nodes));
}
}
if ((chosen == NULL) && (best != NULL)) {
/* Either there is no preferred node, or the preferred node is not
* suitable, but another node is allowed to run the resource.
*/
chosen = best;
if (!pcmk__is_unique_clone(rsc->priv->parent)
- && (chosen->assign->score > 0) // Zero not acceptable
- && pcmk__node_available(chosen,
- pcmk__node_alive|pcmk__node_usable)) {
+ && pcmk__node_available(chosen, pcmk__node_alive
+ |pcmk__node_usable
+ |pcmk__node_no_zero
+ |pcmk__node_no_negative)) {
/* If the resource is already running on a node, prefer that node if
* it is just as good as the chosen node.
*
* We don't do this for unique clone instances, because
* pcmk__assign_instances() has already assigned instances to their
* running nodes when appropriate, and if we get here, we don't want
* remaining unassigned instances to prefer a node that's already
* running another instance.
*/
pcmk_node_t *running = pcmk__current_node(rsc);
if (running == NULL) {
// Nothing to do
} else if (!pcmk__node_available(running, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_negative)) {
pcmk__rsc_trace(rsc,
"Current node for %s (%s) can't run resources",
rsc->id, pcmk__node_name(running));
} else {
int nodes_with_best_score = 1;
for (GList *iter = nodes->next; iter; iter = iter->next) {
pcmk_node_t *allowed = (pcmk_node_t *) iter->data;
if (allowed->assign->score != chosen->assign->score) {
// The nodes are sorted by score, so no more are equal
break;
}
if (pcmk__same_node(allowed, running)) {
// Scores are equal, so prefer the current node
chosen = allowed;
}
nodes_with_best_score++;
}
if (nodes_with_best_score > 1) {
uint8_t log_level = LOG_INFO;
if (chosen->assign->score >= PCMK_SCORE_INFINITY) {
log_level = LOG_WARNING;
}
do_crm_log(log_level,
"Chose %s for %s from %d nodes with score %s",
pcmk__node_name(chosen), rsc->id,
nodes_with_best_score,
pcmk_readable_score(chosen->assign->score));
}
}
}
pcmk__rsc_trace(rsc, "Chose %s for %s from %d candidates",
pcmk__node_name(chosen), rsc->id, g_list_length(nodes));
}
pcmk__assign_resource(rsc, chosen, false, stop_if_fail);
g_list_free(nodes);
return rsc->priv->assigned_node != NULL;
}
/*!
* \internal
* \brief Apply a "this with" colocation to a node's allowed node scores
*
* \param[in,out] colocation Colocation to apply
* \param[in,out] rsc Resource being assigned
*/
static void
apply_this_with(pcmk__colocation_t *colocation, pcmk_resource_t *rsc)
{
GHashTable *archive = NULL;
pcmk_resource_t *other = colocation->primary;
// In certain cases, we will need to revert the node scores
if ((colocation->dependent_role >= pcmk_role_promoted)
|| ((colocation->score < 0)
&& (colocation->score > -PCMK_SCORE_INFINITY))) {
archive = pcmk__copy_node_table(rsc->priv->allowed_nodes);
}
if (pcmk_is_set(other->flags, pcmk__rsc_unassigned)) {
pcmk__rsc_trace(rsc,
"%s: Assigning colocation %s primary %s first"
"(score=%d role=%s)",
rsc->id, colocation->id, other->id,
colocation->score,
pcmk_role_text(colocation->dependent_role));
other->priv->cmds->assign(other, NULL, true);
}
// Apply the colocation score to this resource's allowed node scores
rsc->priv->cmds->apply_coloc_score(rsc, other, colocation, true);
if ((archive != NULL)
&& !pcmk__any_node_available(rsc->priv->allowed_nodes)) {
pcmk__rsc_info(rsc,
"%s: Reverting scores from colocation with %s "
"because no nodes allowed",
rsc->id, other->id);
g_hash_table_destroy(rsc->priv->allowed_nodes);
rsc->priv->allowed_nodes = archive;
archive = NULL;
}
if (archive != NULL) {
g_hash_table_destroy(archive);
}
}
/*!
* \internal
* \brief Update a Pacemaker Remote node once its connection has been assigned
*
* \param[in] connection Connection resource that has been assigned
*/
static void
remote_connection_assigned(const pcmk_resource_t *connection)
{
pcmk_node_t *remote_node = pcmk_find_node(connection->priv->scheduler,
connection->id);
CRM_CHECK(remote_node != NULL, return);
if ((connection->priv->assigned_node != NULL)
&& (connection->priv->next_role != pcmk_role_stopped)) {
crm_trace("Pacemaker Remote node %s will be online",
remote_node->priv->id);
remote_node->details->online = TRUE;
if (!pcmk_is_set(remote_node->priv->flags, pcmk__node_seen)) {
// Avoid unnecessary fence, since we will attempt connection
remote_node->details->unclean = FALSE;
}
} else {
crm_trace("Pacemaker Remote node %s will be shut down "
"(%sassigned connection's next role is %s)",
remote_node->priv->id,
((connection->priv->assigned_node == NULL)? "un" : ""),
pcmk_role_text(connection->priv->next_role));
remote_node->details->shutdown = TRUE;
}
}
/*!
* \internal
* \brief Assign a primitive resource to a node
*
* \param[in,out] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
* \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a
* node, set next role to stopped and update
* existing actions
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
pcmk_node_t *
pcmk__primitive_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
bool stop_if_fail)
{
GList *this_with_colocations = NULL;
GList *with_this_colocations = NULL;
GList *iter = NULL;
pcmk_resource_t *parent = NULL;
pcmk__colocation_t *colocation = NULL;
pcmk_scheduler_t *scheduler = NULL;
pcmk__assert(pcmk__is_primitive(rsc));
scheduler = rsc->priv->scheduler;
parent = rsc->priv->parent;
// Never assign a child without parent being assigned first
if ((parent != NULL) && !pcmk_is_set(parent->flags, pcmk__rsc_assigning)) {
pcmk__rsc_debug(rsc, "%s: Assigning parent %s first",
rsc->id, parent->id);
parent->priv->cmds->assign(parent, prefer, stop_if_fail);
}
if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) {
// Assignment has already been done
const char *node_name = "no node";
if (rsc->priv->assigned_node != NULL) {
node_name = pcmk__node_name(rsc->priv->assigned_node);
}
pcmk__rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id, node_name);
return rsc->priv->assigned_node;
}
// Ensure we detect assignment loops
if (pcmk_is_set(rsc->flags, pcmk__rsc_assigning)) {
pcmk__rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
return NULL;
}
pcmk__set_rsc_flags(rsc, pcmk__rsc_assigning);
pe__show_node_scores(true, rsc, "Pre-assignment",
rsc->priv->allowed_nodes, scheduler);
this_with_colocations = pcmk__this_with_colocations(rsc);
with_this_colocations = pcmk__with_this_colocations(rsc);
// Apply mandatory colocations first, to satisfy as many as possible
for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
colocation = iter->data;
if ((colocation->score <= -PCMK_SCORE_INFINITY)
|| (colocation->score >= PCMK_SCORE_INFINITY)) {
apply_this_with(colocation, rsc);
}
}
for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
colocation = iter->data;
if ((colocation->score <= -PCMK_SCORE_INFINITY)
|| (colocation->score >= PCMK_SCORE_INFINITY)) {
pcmk__add_dependent_scores(colocation, rsc);
}
}
pe__show_node_scores(true, rsc, "Mandatory-colocations",
rsc->priv->allowed_nodes, scheduler);
// Then apply optional colocations
for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
colocation = iter->data;
if ((colocation->score > -PCMK_SCORE_INFINITY)
&& (colocation->score < PCMK_SCORE_INFINITY)) {
apply_this_with(colocation, rsc);
}
}
for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
colocation = iter->data;
if ((colocation->score > -PCMK_SCORE_INFINITY)
&& (colocation->score < PCMK_SCORE_INFINITY)) {
pcmk__add_dependent_scores(colocation, rsc);
}
}
g_list_free(this_with_colocations);
g_list_free(with_this_colocations);
if (rsc->priv->next_role == pcmk_role_stopped) {
pcmk__rsc_trace(rsc,
"Banning %s from all nodes because it will be stopped",
rsc->id);
resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
PCMK_META_TARGET_ROLE, scheduler);
} else if ((rsc->priv->next_role > rsc->priv->orig_role)
&& !pcmk_is_set(scheduler->flags, pcmk__sched_quorate)
&& (scheduler->no_quorum_policy == pcmk_no_quorum_freeze)) {
crm_notice("Resource %s cannot be elevated from %s to %s due to "
PCMK_OPT_NO_QUORUM_POLICY "=" PCMK_VALUE_FREEZE,
rsc->id, pcmk_role_text(rsc->priv->orig_role),
pcmk_role_text(rsc->priv->next_role));
pe__set_next_role(rsc, rsc->priv->orig_role,
PCMK_OPT_NO_QUORUM_POLICY "=" PCMK_VALUE_FREEZE);
}
pe__show_node_scores(!pcmk_is_set(scheduler->flags,
pcmk__sched_output_scores),
rsc, __func__, rsc->priv->allowed_nodes, scheduler);
// Unmanage resource if fencing is enabled but no device is configured
if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)
&& !pcmk_is_set(scheduler->flags, pcmk__sched_have_fencing)) {
pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
}
if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
// Unmanaged resources stay on their current node
const char *reason = NULL;
pcmk_node_t *assign_to = NULL;
pe__set_next_role(rsc, rsc->priv->orig_role, "unmanaged");
assign_to = pcmk__current_node(rsc);
if (assign_to == NULL) {
reason = "inactive";
} else if (rsc->priv->orig_role == pcmk_role_promoted) {
reason = "promoted";
} else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
reason = "failed";
} else {
reason = "active";
}
pcmk__rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
(assign_to? assign_to->priv->name : "no node"),
reason);
pcmk__assign_resource(rsc, assign_to, true, stop_if_fail);
} else if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_all)) {
// Must stop at some point, but be consistent with stop_if_fail
if (stop_if_fail) {
pcmk__rsc_debug(rsc,
"Forcing %s to stop: " PCMK_OPT_STOP_ALL_RESOURCES,
rsc->id);
}
pcmk__assign_resource(rsc, NULL, true, stop_if_fail);
} else if (!assign_best_node(rsc, prefer, stop_if_fail)) {
// Assignment failed
if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
pcmk__rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
} else if ((rsc->priv->active_nodes != NULL) && stop_if_fail) {
pcmk__rsc_info(rsc, "Stopping removed resource %s", rsc->id);
}
}
pcmk__clear_rsc_flags(rsc, pcmk__rsc_assigning);
if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
remote_connection_assigned(rsc);
}
return rsc->priv->assigned_node;
}
/*!
* \internal
* \brief Schedule actions to bring resource down and back to current role
*
* \param[in,out] rsc Resource to restart
* \param[in,out] current Node that resource should be brought down on
* \param[in] need_stop Whether the resource must be stopped
* \param[in] need_promote Whether the resource must be promoted
*
* \return Role that resource would have after scheduled actions are taken
*/
static void
schedule_restart_actions(pcmk_resource_t *rsc, pcmk_node_t *current,
bool need_stop, bool need_promote)
{
enum rsc_role_e role = rsc->priv->orig_role;
enum rsc_role_e next_role;
rsc_transition_fn fn = NULL;
pcmk__set_rsc_flags(rsc, pcmk__rsc_restarting);
// Bring resource down to a stop on its current node
while (role != pcmk_role_stopped) {
next_role = rsc_state_matrix[role][pcmk_role_stopped];
pcmk__rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
(need_stop? "required" : "optional"), rsc->id,
pcmk_role_text(role), pcmk_role_text(next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, current, !need_stop);
role = next_role;
}
// Bring resource up to its next role on its next node
while ((rsc->priv->orig_role <= rsc->priv->next_role)
&& (role != rsc->priv->orig_role)
&& !pcmk_is_set(rsc->flags, pcmk__rsc_blocked)) {
bool required = need_stop;
next_role = rsc_state_matrix[role][rsc->priv->orig_role];
if ((next_role == pcmk_role_promoted) && need_promote) {
required = true;
}
pcmk__rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
(required? "required" : "optional"), rsc->id,
pcmk_role_text(role), pcmk_role_text(next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, rsc->priv->assigned_node, !required);
role = next_role;
}
pcmk__clear_rsc_flags(rsc, pcmk__rsc_restarting);
}
/*!
* \internal
* \brief If a resource's next role is not explicitly specified, set a default
*
* \param[in,out] rsc Resource to set next role for
*
* \return "explicit" if next role was explicitly set, otherwise "implicit"
*/
static const char *
set_default_next_role(pcmk_resource_t *rsc)
{
if (rsc->priv->next_role != pcmk_role_unknown) {
return "explicit";
}
if (rsc->priv->assigned_node == NULL) {
pe__set_next_role(rsc, pcmk_role_stopped, "assignment");
} else {
pe__set_next_role(rsc, pcmk_role_started, "assignment");
}
return "implicit";
}
/*!
* \internal
* \brief Create an action to represent an already pending start
*
* \param[in,out] rsc Resource to create start action for
*/
static void
create_pending_start(pcmk_resource_t *rsc)
{
pcmk_action_t *start = NULL;
pcmk__rsc_trace(rsc,
"Creating action for %s to represent already pending start",
rsc->id);
start = start_action(rsc, rsc->priv->assigned_node, TRUE);
pcmk__set_action_flags(start, pcmk__action_always_in_graph);
}
/*!
* \internal
* \brief Schedule actions needed to take a resource to its next role
*
* \param[in,out] rsc Resource to schedule actions for
*/
static void
schedule_role_transition_actions(pcmk_resource_t *rsc)
{
enum rsc_role_e role = rsc->priv->orig_role;
while (role != rsc->priv->next_role) {
enum rsc_role_e next_role =
rsc_state_matrix[role][rsc->priv->next_role];
rsc_transition_fn fn = NULL;
pcmk__rsc_trace(rsc,
"Creating action to take %s from %s to %s "
"(ending at %s)",
rsc->id, pcmk_role_text(role),
pcmk_role_text(next_role),
pcmk_role_text(rsc->priv->next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, rsc->priv->assigned_node, false);
role = next_role;
}
}
/*!
* \internal
* \brief Create all actions needed for a given primitive resource
*
* \param[in,out] rsc Primitive resource to create actions for
*/
void
pcmk__primitive_create_actions(pcmk_resource_t *rsc)
{
bool need_stop = false;
bool need_promote = false;
bool is_moving = false;
bool allow_migrate = false;
bool multiply_active = false;
pcmk_node_t *current = NULL;
pcmk_node_t *migration_target = NULL;
unsigned int num_all_active = 0;
unsigned int num_clean_active = 0;
const char *next_role_source = NULL;
pcmk__assert(pcmk__is_primitive(rsc));
next_role_source = set_default_next_role(rsc);
pcmk__rsc_trace(rsc,
"Creating all actions for %s transition from %s to %s "
"(%s) on %s",
rsc->id, pcmk_role_text(rsc->priv->orig_role),
pcmk_role_text(rsc->priv->next_role), next_role_source,
pcmk__node_name(rsc->priv->assigned_node));
current = rsc->priv->fns->active_node(rsc, &num_all_active,
&num_clean_active);
g_list_foreach(rsc->priv->dangling_migration_sources,
pcmk__abort_dangling_migration, rsc);
if ((current != NULL) && (rsc->priv->assigned_node != NULL)
&& !pcmk__same_node(current, rsc->priv->assigned_node)
&& (rsc->priv->next_role >= pcmk_role_started)) {
pcmk__rsc_trace(rsc, "Moving %s from %s to %s",
rsc->id, pcmk__node_name(current),
pcmk__node_name(rsc->priv->assigned_node));
is_moving = true;
allow_migrate = pcmk__rsc_can_migrate(rsc, current);
// This is needed even if migrating (though I'm not sure why ...)
need_stop = true;
}
// Check whether resource is partially migrated and/or multiply active
migration_target = rsc->priv->partial_migration_target;
if ((rsc->priv->partial_migration_source != NULL)
&& (migration_target != NULL) && allow_migrate && (num_all_active == 2)
&& pcmk__same_node(current, rsc->priv->partial_migration_source)
&& pcmk__same_node(rsc->priv->assigned_node, migration_target)) {
/* A partial migration is in progress, and the migration target remains
* the same as when the migration began.
*/
pcmk__rsc_trace(rsc,
"Partial migration of %s from %s to %s will continue",
rsc->id,
pcmk__node_name(rsc->priv->partial_migration_source),
pcmk__node_name(migration_target));
} else if ((rsc->priv->partial_migration_source != NULL)
|| (migration_target != NULL)) {
// A partial migration is in progress but can't be continued
if (num_all_active > 2) {
// The resource is migrating *and* multiply active!
crm_notice("Forcing recovery of %s because it is migrating "
"from %s to %s and possibly active elsewhere",
rsc->id,
pcmk__node_name(rsc->priv->partial_migration_source),
pcmk__node_name(migration_target));
} else {
// The migration source or target isn't available
crm_notice("Forcing recovery of %s because it can no longer "
"migrate from %s to %s",
rsc->id,
pcmk__node_name(rsc->priv->partial_migration_source),
pcmk__node_name(migration_target));
}
need_stop = true;
rsc->priv->partial_migration_source = NULL;
rsc->priv->partial_migration_target = NULL;
allow_migrate = false;
} else if (pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing)) {
multiply_active = (num_all_active > 1);
} else {
/* If a resource has PCMK_META_REQUIRES set to PCMK_VALUE_NOTHING or
* PCMK_VALUE_QUORUM, don't consider it active on unclean nodes (similar
* to how all resources behave when PCMK_OPT_STONITH_ENABLED is false).
* We can start such resources elsewhere before fencing completes, and
* if we considered the resource active on the failed node, we would
* attempt recovery for being active on multiple nodes.
*/
multiply_active = (num_clean_active > 1);
}
if (multiply_active) {
const char *class = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS);
// Resource was (possibly) incorrectly multiply active
pcmk__sched_err(rsc->priv->scheduler,
"%s resource %s might be active on %u nodes (%s)",
pcmk__s(class, "Untyped"), rsc->id, num_all_active,
pcmk__multiply_active_text(rsc));
crm_notice("For more information, see \"What are multiply active "
"resources?\" at "
"https://projects.clusterlabs.org/w/clusterlabs/faq/");
switch (rsc->priv->multiply_active_policy) {
case pcmk__multiply_active_restart:
need_stop = true;
break;
case pcmk__multiply_active_unexpected:
need_stop = true; // stop_resource() will skip expected node
pcmk__set_rsc_flags(rsc, pcmk__rsc_stop_unexpected);
break;
default:
break;
}
} else {
pcmk__clear_rsc_flags(rsc, pcmk__rsc_stop_unexpected);
}
if (pcmk_is_set(rsc->flags, pcmk__rsc_start_pending)) {
create_pending_start(rsc);
}
if (is_moving) {
// Remaining tests are only for resources staying where they are
} else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
if (pcmk_is_set(rsc->flags, pcmk__rsc_stop_if_failed)) {
need_stop = true;
pcmk__rsc_trace(rsc, "Recovering %s", rsc->id);
} else {
pcmk__rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
if (rsc->priv->next_role == pcmk_role_promoted) {
need_promote = true;
}
}
} else if (pcmk_is_set(rsc->flags, pcmk__rsc_blocked)) {
pcmk__rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
need_stop = true;
} else if ((rsc->priv->orig_role > pcmk_role_started)
&& (current != NULL)
&& (rsc->priv->assigned_node != NULL)) {
pcmk_action_t *start = NULL;
pcmk__rsc_trace(rsc, "Creating start action for promoted resource %s",
rsc->id);
start = start_action(rsc, rsc->priv->assigned_node, TRUE);
if (!pcmk_is_set(start->flags, pcmk__action_optional)) {
// Recovery of a promoted resource
pcmk__rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
need_stop = true;
}
}
// Create any actions needed to bring resource down and back up to same role
schedule_restart_actions(rsc, current, need_stop, need_promote);
// Create any actions needed to take resource from this role to the next
schedule_role_transition_actions(rsc);
pcmk__create_recurring_actions(rsc);
if (allow_migrate) {
pcmk__create_migration_actions(rsc, current);
}
}
/*!
* \internal
* \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes
*
* \param[in] rsc Resource to check
*/
static void
rsc_avoids_remote_nodes(const pcmk_resource_t *rsc)
{
GHashTableIter iter;
pcmk_node_t *node = NULL;
g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (node->priv->remote != NULL) {
node->assign->score = -PCMK_SCORE_INFINITY;
}
}
}
/*!
* \internal
* \brief Return allowed nodes as (possibly sorted) list
*
* Convert a resource's hash table of allowed nodes to a list. If printing to
* stdout, sort the list, to keep action ID numbers consistent for regression
* test output (while avoiding the performance hit on a live cluster).
*
* \param[in] rsc Resource to check for allowed nodes
*
* \return List of resource's allowed nodes
* \note Callers should take care not to rely on the list being sorted.
*/
static GList *
allowed_nodes_as_list(const pcmk_resource_t *rsc)
{
GList *allowed_nodes = NULL;
if (rsc->priv->allowed_nodes != NULL) {
allowed_nodes = g_hash_table_get_values(rsc->priv->allowed_nodes);
}
if (!pcmk__is_daemon) {
allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
}
return allowed_nodes;
}
/*!
* \internal
* \brief Create implicit constraints needed for a primitive resource
*
* \param[in,out] rsc Primitive resource to create implicit constraints for
*/
void
pcmk__primitive_internal_constraints(pcmk_resource_t *rsc)
{
GList *allowed_nodes = NULL;
bool check_unfencing = false;
bool check_utilization = false;
pcmk_scheduler_t *scheduler = NULL;
pcmk__assert(pcmk__is_primitive(rsc));
scheduler = rsc->priv->scheduler;
if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
pcmk__rsc_trace(rsc,
"Skipping implicit constraints for unmanaged resource "
"%s", rsc->id);
return;
}
// Whether resource requires unfencing
check_unfencing = !pcmk_is_set(rsc->flags, pcmk__rsc_fence_device)
&& pcmk_is_set(scheduler->flags,
pcmk__sched_enable_unfencing)
&& pcmk_is_set(rsc->flags, pcmk__rsc_needs_unfencing);
// Whether a non-default placement strategy is used
check_utilization = (g_hash_table_size(rsc->priv->utilization) > 0)
&& !pcmk__str_eq(scheduler->priv->placement_strategy,
PCMK_VALUE_DEFAULT, pcmk__str_casei);
// Order stops before starts (i.e. restart)
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL,
rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL,
pcmk__ar_ordered
|pcmk__ar_first_implies_then
|pcmk__ar_intermediate_stop, scheduler);
// Promotable ordering: demote before stop, start before promote
if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
pcmk__rsc_promotable)
|| (rsc->priv->orig_role > pcmk_role_unpromoted)) {
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_DEMOTE, 0),
NULL,
rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
NULL,
pcmk__ar_promoted_then_implies_first, scheduler);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
NULL,
rsc, pcmk__op_key(rsc->id, PCMK_ACTION_PROMOTE, 0),
NULL,
pcmk__ar_unrunnable_first_blocks, scheduler);
}
// Don't clear resource history if probing on same node
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_LRM_DELETE, 0),
NULL, rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0),
NULL,
pcmk__ar_if_on_same_node|pcmk__ar_then_cancels_first,
scheduler);
// Certain checks need allowed nodes
if (check_unfencing || check_utilization
|| (rsc->priv->launcher != NULL)) {
allowed_nodes = allowed_nodes_as_list(rsc);
}
if (check_unfencing) {
g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc);
}
if (check_utilization) {
pcmk__create_utilization_constraints(rsc, allowed_nodes);
}
if (rsc->priv->launcher != NULL) {
pcmk_resource_t *remote_rsc = NULL;
if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
// rsc is the implicit remote connection for a guest or bundle node
/* Guest resources are not allowed to run on Pacemaker Remote nodes,
* to avoid nesting remotes. However, bundles are allowed.
*/
if (!pcmk_is_set(rsc->flags, pcmk__rsc_remote_nesting_allowed)) {
rsc_avoids_remote_nodes(rsc->priv->launcher);
}
/* If someone cleans up a guest or bundle node's launcher, we will
* likely schedule a (re-)probe of the launcher and recovery of the
* connection. Order the connection stop after the launcher probe,
* so that if we detect the launcher running, we will trigger a new
* transition and avoid the unnecessary recovery.
*/
pcmk__order_resource_actions(rsc->priv->launcher,
PCMK_ACTION_MONITOR,
rsc, PCMK_ACTION_STOP,
pcmk__ar_ordered);
/* A user can specify that a resource must start on a Pacemaker Remote
* node by explicitly configuring it with the PCMK__META_CONTAINER
* meta-attribute. This is of questionable merit, since location
* constraints can accomplish the same thing. But we support it, so here
* we check whether a resource (that is not itself a remote connection)
* has PCMK__META_CONTAINER set to a remote node or guest node resource.
*/
} else if (pcmk_is_set(rsc->priv->launcher->flags,
pcmk__rsc_is_remote_connection)) {
remote_rsc = rsc->priv->launcher;
} else {
remote_rsc =
pe__resource_contains_guest_node(scheduler,
rsc->priv->launcher);
}
if (remote_rsc != NULL) {
/* Force the resource on the Pacemaker Remote node instead of
* colocating the resource with the launcher.
*/
for (GList *item = allowed_nodes; item; item = item->next) {
pcmk_node_t *node = item->data;
if (node->priv->remote != remote_rsc) {
node->assign->score = -PCMK_SCORE_INFINITY;
}
}
} else {
/* This resource is either launched by a resource that does NOT
* represent a Pacemaker Remote node, or a Pacemaker Remote
* connection resource for a guest node or bundle.
*/
int score;
crm_trace("Order and colocate %s relative to its launcher %s",
rsc->id, rsc->priv->launcher->id);
pcmk__new_ordering(rsc->priv->launcher,
pcmk__op_key(rsc->priv->launcher->id,
PCMK_ACTION_START, 0),
NULL, rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
NULL,
pcmk__ar_first_implies_then
|pcmk__ar_unrunnable_first_blocks, scheduler);
pcmk__new_ordering(rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
NULL,
rsc->priv->launcher,
pcmk__op_key(rsc->priv->launcher->id,
PCMK_ACTION_STOP, 0),
NULL, pcmk__ar_then_implies_first, scheduler);
if (pcmk_is_set(rsc->flags, pcmk__rsc_remote_nesting_allowed)
/* @TODO: && non-bundle Pacemaker Remote nodes exist */) {
score = 10000; /* Highly preferred but not essential */
} else {
score = PCMK_SCORE_INFINITY; // Force to run on same host
}
pcmk__new_colocation("#resource-with-container", NULL, score, rsc,
rsc->priv->launcher, NULL, NULL,
pcmk__coloc_influence);
}
}
if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)
|| pcmk_is_set(rsc->flags, pcmk__rsc_fence_device)) {
/* Remote connections and fencing devices are not allowed to run on
* Pacemaker Remote nodes
*/
rsc_avoids_remote_nodes(rsc);
}
g_list_free(allowed_nodes);
}
/*!
* \internal
* \brief Apply a colocation's score to node scores or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node scores (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*
* \return The score added to the dependent's priority
*/
int
pcmk__primitive_apply_coloc_score(pcmk_resource_t *dependent,
const pcmk_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent)
{
enum pcmk__coloc_affects filter_results;
pcmk__assert((dependent != NULL) && (primary != NULL)
&& (colocation != NULL));
if (for_dependent) {
// Always process on behalf of primary resource
return primary->priv->cmds->apply_coloc_score(dependent, primary,
colocation, false);
}
filter_results = pcmk__colocation_affects(dependent, primary, colocation,
false);
pcmk__rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
((colocation->score > 0)? "Colocating" : "Anti-colocating"),
dependent->id, primary->id, colocation->id,
colocation->score,
filter_results);
switch (filter_results) {
case pcmk__coloc_affects_role:
return pcmk__apply_coloc_to_priority(dependent, primary,
colocation);
case pcmk__coloc_affects_location:
pcmk__apply_coloc_to_scores(dependent, primary, colocation);
return 0;
default: // pcmk__coloc_affects_nothing
return 0;
}
}
/* Primitive implementation of
* pcmk__assignment_methods_t:with_this_colocations()
*/
void
pcmk__with_primitive_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc, GList **list)
{
const pcmk_resource_t *parent = NULL;
pcmk__assert(pcmk__is_primitive(rsc) && (list != NULL));
parent = rsc->priv->parent;
if (rsc == orig_rsc) {
/* For the resource itself, add all of its own colocations and relevant
* colocations from its parent (if any).
*/
pcmk__add_with_this_list(list, rsc->priv->with_this_colocations,
orig_rsc);
if (parent != NULL) {
parent->priv->cmds->with_this_colocations(parent, orig_rsc, list);
}
} else {
// For an ancestor, add only explicitly configured constraints
for (GList *iter = rsc->priv->with_this_colocations;
iter != NULL; iter = iter->next) {
pcmk__colocation_t *colocation = iter->data;
if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
pcmk__add_with_this(list, colocation, orig_rsc);
}
}
}
}
/* Primitive implementation of
* pcmk__assignment_methods_t:this_with_colocations()
*/
void
pcmk__primitive_with_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc, GList **list)
{
const pcmk_resource_t *parent = NULL;
pcmk__assert(pcmk__is_primitive(rsc) && (list != NULL));
parent = rsc->priv->parent;
if (rsc == orig_rsc) {
/* For the resource itself, add all of its own colocations and relevant
* colocations from its parent (if any).
*/
pcmk__add_this_with_list(list, rsc->priv->this_with_colocations,
orig_rsc);
if (parent != NULL) {
parent->priv->cmds->this_with_colocations(parent, orig_rsc, list);
}
} else {
// For an ancestor, add only explicitly configured constraints
for (GList *iter = rsc->priv->this_with_colocations;
iter != NULL; iter = iter->next) {
pcmk__colocation_t *colocation = iter->data;
if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
pcmk__add_this_with(list, colocation, orig_rsc);
}
}
}
}
/*!
* \internal
* \brief Return action flags for a given primitive resource action
*
* \param[in,out] action Action to get flags for
* \param[in] node If not NULL, limit effects to this node (ignored)
*
* \return Flags appropriate to \p action on \p node
*/
uint32_t
pcmk__primitive_action_flags(pcmk_action_t *action, const pcmk_node_t *node)
{
pcmk__assert(action != NULL);
return (uint32_t) action->flags;
}
/*!
* \internal
* \brief Check whether a node is a multiply active resource's expected node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return \c true if \p rsc is multiply active with
* \c PCMK_META_MULTIPLE_ACTIVE set to \c PCMK_VALUE_STOP_UNEXPECTED,
* and \p node is the node where it will remain active
* \note This assumes that the resource's next role cannot be changed to stopped
* after this is called, which should be reasonable if status has already
* been unpacked and resources have been assigned to nodes.
*/
static bool
is_expected_node(const pcmk_resource_t *rsc, const pcmk_node_t *node)
{
return pcmk_all_flags_set(rsc->flags,
pcmk__rsc_stop_unexpected|pcmk__rsc_restarting)
&& (rsc->priv->next_role > pcmk_role_stopped)
&& pcmk__same_node(rsc->priv->assigned_node, node);
}
/*!
* \internal
* \brief Schedule actions needed to stop a resource wherever it is active
*
* \param[in,out] rsc Resource being stopped
* \param[in] node Node where resource is being stopped (ignored)
* \param[in] optional Whether actions should be optional
*/
static void
stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
{
for (GList *iter = rsc->priv->active_nodes;
iter != NULL; iter = iter->next) {
pcmk_node_t *current = (pcmk_node_t *) iter->data;
pcmk_action_t *stop = NULL;
if (is_expected_node(rsc, current)) {
/* We are scheduling restart actions for a multiply active resource
* with PCMK_META_MULTIPLE_ACTIVE=PCMK_VALUE_STOP_UNEXPECTED, and
* this is where it should not be stopped.
*/
pcmk__rsc_trace(rsc,
"Skipping stop of multiply active resource %s "
"on expected node %s",
rsc->id, pcmk__node_name(current));
continue;
}
if (rsc->priv->partial_migration_target != NULL) {
// Continue migration if node originally was and remains target
if (pcmk__same_node(current, rsc->priv->partial_migration_target)
&& pcmk__same_node(current, rsc->priv->assigned_node)) {
pcmk__rsc_trace(rsc,
"Skipping stop of %s on %s "
"because partial migration there will continue",
rsc->id, pcmk__node_name(current));
continue;
} else {
pcmk__rsc_trace(rsc,
"Forcing stop of %s on %s "
"because migration target changed",
rsc->id, pcmk__node_name(current));
optional = false;
}
}
pcmk__rsc_trace(rsc, "Scheduling stop of %s on %s",
rsc->id, pcmk__node_name(current));
stop = stop_action(rsc, current, optional);
if (rsc->priv->assigned_node == NULL) {
pe_action_set_reason(stop, "node availability", true);
} else if (pcmk_all_flags_set(rsc->flags, pcmk__rsc_restarting
|pcmk__rsc_stop_unexpected)) {
/* We are stopping a multiply active resource on a node that is
* not its expected node, and we are still scheduling restart
* actions, so the stop is for being multiply active.
*/
pe_action_set_reason(stop, "being multiply active", true);
}
if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
pcmk__clear_action_flags(stop, pcmk__action_runnable);
}
if (pcmk_is_set(rsc->flags, pcmk__rsc_needs_unfencing)) {
pcmk_action_t *unfence = pe_fence_op(current, PCMK_ACTION_ON, true,
NULL, false,
rsc->priv->scheduler);
order_actions(stop, unfence, pcmk__ar_then_implies_first);
if (!pcmk__node_unfenced(current)) {
pcmk__sched_err(rsc->priv->scheduler,
"Stopping %s until %s can be unfenced",
rsc->id, pcmk__node_name(current));
}
}
}
}
/*!
* \internal
* \brief Schedule actions needed to start a resource on a node
*
* \param[in,out] rsc Resource being started
* \param[in,out] node Node where resource should be started
* \param[in] optional Whether actions should be optional
*/
static void
start_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
{
pcmk_action_t *start = NULL;
pcmk__assert(node != NULL);
pcmk__rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)",
(optional? "optional" : "required"), rsc->id,
pcmk__node_name(node), node->assign->score);
start = start_action(rsc, node, TRUE);
pcmk__order_vs_unfence(rsc, node, start, pcmk__ar_first_implies_then);
if (pcmk_is_set(start->flags, pcmk__action_runnable) && !optional) {
pcmk__clear_action_flags(start, pcmk__action_optional);
}
if (is_expected_node(rsc, node)) {
/* This could be a problem if the start becomes necessary for other
* reasons later.
*/
pcmk__rsc_trace(rsc,
"Start of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, pcmk__node_name(node));
pcmk__set_action_flags(start, pcmk__action_pseudo);
}
}
/*!
* \internal
* \brief Schedule actions needed to promote a resource on a node
*
* \param[in,out] rsc Resource being promoted
* \param[in] node Node where resource should be promoted
* \param[in] optional Whether actions should be optional
*/
static void
promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
{
GList *iter = NULL;
GList *action_list = NULL;
bool runnable = true;
pcmk__assert(node != NULL);
// Any start must be runnable for promotion to be runnable
action_list = pe__resource_actions(rsc, node, PCMK_ACTION_START, true);
for (iter = action_list; iter != NULL; iter = iter->next) {
pcmk_action_t *start = (pcmk_action_t *) iter->data;
if (!pcmk_is_set(start->flags, pcmk__action_runnable)) {
runnable = false;
}
}
g_list_free(action_list);
if (runnable) {
pcmk_action_t *promote = promote_action(rsc, node, optional);
pcmk__rsc_trace(rsc, "Scheduling %s promotion of %s on %s",
(optional? "optional" : "required"), rsc->id,
pcmk__node_name(node));
if (is_expected_node(rsc, node)) {
/* This could be a problem if the promote becomes necessary for
* other reasons later.
*/
pcmk__rsc_trace(rsc,
"Promotion of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, pcmk__node_name(node));
pcmk__set_action_flags(promote, pcmk__action_pseudo);
}
} else {
pcmk__rsc_trace(rsc, "Not promoting %s on %s: start unrunnable",
rsc->id, pcmk__node_name(node));
action_list = pe__resource_actions(rsc, node, PCMK_ACTION_PROMOTE,
true);
for (iter = action_list; iter != NULL; iter = iter->next) {
pcmk_action_t *promote = (pcmk_action_t *) iter->data;
pcmk__clear_action_flags(promote, pcmk__action_runnable);
}
g_list_free(action_list);
}
}
/*!
* \internal
* \brief Schedule actions needed to demote a resource wherever it is active
*
* \param[in,out] rsc Resource being demoted
* \param[in] node Node where resource should be demoted (ignored)
* \param[in] optional Whether actions should be optional
*/
static void
demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
{
/* Since this will only be called for a primitive (possibly as an instance
* of a collective resource), the resource is multiply active if it is
* running on more than one node, so we want to demote on all of them as
* part of recovery, regardless of which one is the desired node.
*/
for (GList *iter = rsc->priv->active_nodes;
iter != NULL; iter = iter->next) {
pcmk_node_t *current = (pcmk_node_t *) iter->data;
if (is_expected_node(rsc, current)) {
pcmk__rsc_trace(rsc,
"Skipping demote of multiply active resource %s "
"on expected node %s",
rsc->id, pcmk__node_name(current));
} else {
pcmk__rsc_trace(rsc, "Scheduling %s demotion of %s on %s",
(optional? "optional" : "required"), rsc->id,
pcmk__node_name(current));
demote_action(rsc, current, optional);
}
}
}
static void
assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
{
pcmk__assert(false);
}
/*!
* \internal
* \brief Schedule cleanup of a resource
*
* \param[in,out] rsc Resource to clean up
* \param[in] node Node to clean up on
* \param[in] optional Whether clean-up should be optional
*/
void
pcmk__schedule_cleanup(pcmk_resource_t *rsc, const pcmk_node_t *node,
bool optional)
{
/* If the cleanup is required, its orderings are optional, because they're
* relevant only if both actions are required. Conversely, if the cleanup is
* optional, the orderings make the then action required if the first action
* becomes required.
*/
uint32_t flag = optional? pcmk__ar_first_implies_then : pcmk__ar_ordered;
CRM_CHECK((rsc != NULL) && (node != NULL), return);
if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
pcmk__rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed",
rsc->id, pcmk__node_name(node));
return;
}
if (!pcmk__node_available(node, pcmk__node_alive)) {
pcmk__rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable",
rsc->id, pcmk__node_name(node));
return;
}
crm_notice("Scheduling clean-up of %s on %s",
rsc->id, pcmk__node_name(node));
delete_action(rsc, node, optional);
// stop -> clean-up -> start
pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP,
rsc, PCMK_ACTION_DELETE, flag);
pcmk__order_resource_actions(rsc, PCMK_ACTION_DELETE,
rsc, PCMK_ACTION_START, flag);
}
/*!
* \internal
* \brief Add primitive meta-attributes relevant to graph actions to XML
*
* \param[in] rsc Primitive resource whose meta-attributes should be added
* \param[in,out] xml Transition graph action attributes XML to add to
*/
void
pcmk__primitive_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml)
{
char *name = NULL;
char *value = NULL;
const pcmk_resource_t *parent = NULL;
pcmk__assert(pcmk__is_primitive(rsc) && (xml != NULL));
/* Clone instance numbers get set internally as meta-attributes, and are
* needed in the transition graph (for example, to tell unique clone
* instances apart).
*/
value = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CLONE);
if (value != NULL) {
name = crm_meta_name(PCMK__META_CLONE);
crm_xml_add(xml, name, value);
free(name);
}
// Not sure if this one is really needed ...
value = g_hash_table_lookup(rsc->priv->meta, PCMK_META_REMOTE_NODE);
if (value != NULL) {
name = crm_meta_name(PCMK_META_REMOTE_NODE);
crm_xml_add(xml, name, value);
free(name);
}
/* The PCMK__META_CONTAINER meta-attribute can be set on the primitive
* itself or one of its ancestors, so check them all and keep the highest.
*/
for (parent = rsc; parent != NULL; parent = parent->priv->parent) {
if (parent->priv->launcher != NULL) {
crm_xml_add(xml, CRM_META "_" PCMK__META_CONTAINER,
parent->priv->launcher->id);
}
}
/* Bundle replica children will get their external-ip set internally as a
* meta-attribute. The graph action needs it, but under a different naming
* convention than other meta-attributes.
*/
value = g_hash_table_lookup(rsc->priv->meta, "external-ip");
if (value != NULL) {
crm_xml_add(xml, "pcmk_external_ip", value);
}
}
// Primitive implementation of pcmk__assignment_methods_t:add_utilization()
void
pcmk__primitive_add_utilization(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization)
{
pcmk__assert(pcmk__is_primitive(rsc) && (orig_rsc != NULL)
&& (utilization != NULL));
if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) {
return;
}
pcmk__rsc_trace(orig_rsc,
"%s: Adding primitive %s as colocated utilization",
orig_rsc->id, rsc->id);
pcmk__release_node_capacity(utilization, rsc);
}
/*!
* \internal
* \brief Get epoch time of node's shutdown attribute (or now if none)
*
* \param[in,out] node Node to check
*
* \return Epoch time corresponding to shutdown attribute if set or now if not
*/
static time_t
shutdown_time(pcmk_node_t *node)
{
const char *shutdown = pcmk__node_attr(node, PCMK__NODE_ATTR_SHUTDOWN, NULL,
pcmk__rsc_node_current);
time_t result = 0;
if (shutdown != NULL) {
long long result_ll;
int rc = pcmk__scan_ll(shutdown, &result_ll, 0LL);
if (rc == pcmk_rc_ok) {
result = (time_t) result_ll;
} else {
crm_warn("Ignoring invalid value '%s' for %s "
PCMK__NODE_ATTR_SHUTDOWN " attribute: %s",
shutdown, pcmk__node_name(node), pcmk_rc_str(rc));
}
}
if (result == 0) {
result = pcmk__scheduler_epoch_time(node->priv->scheduler);
}
return result;
}
/*!
* \internal
* \brief Ban a resource from a node if it's not locked to the node
*
* \param[in] data Node to check
* \param[in,out] user_data Resource to check
*/
static void
ban_if_not_locked(gpointer data, gpointer user_data)
{
const pcmk_node_t *node = (const pcmk_node_t *) data;
pcmk_resource_t *rsc = (pcmk_resource_t *) user_data;
if (!pcmk__same_node(node, rsc->priv->lock_node)) {
resource_location(rsc, node, -PCMK_SCORE_INFINITY,
PCMK_OPT_SHUTDOWN_LOCK, rsc->priv->scheduler);
}
}
// Primitive implementation of pcmk__assignment_methods_t:shutdown_lock()
void
pcmk__primitive_shutdown_lock(pcmk_resource_t *rsc)
{
pcmk_scheduler_t *scheduler = NULL;
pcmk__assert(pcmk__is_primitive(rsc));
scheduler = rsc->priv->scheduler;
// Fence devices and remote connections can't be locked
if (pcmk_any_flags_set(rsc->flags, pcmk__rsc_fence_device
|pcmk__rsc_is_remote_connection)) {
return;
}
if (rsc->priv->lock_node != NULL) {
// The lock was obtained from resource history
if (rsc->priv->active_nodes != NULL) {
/* The resource was started elsewhere even though it is now
* considered locked. This shouldn't be possible, but as a
* failsafe, we don't want to disturb the resource now.
*/
pcmk__rsc_info(rsc,
"Cancelling shutdown lock "
"because %s is already active", rsc->id);
pe__clear_resource_history(rsc, rsc->priv->lock_node);
rsc->priv->lock_node = NULL;
rsc->priv->lock_time = 0;
}
// Only a resource active on exactly one node can be locked
} else if (pcmk__list_of_1(rsc->priv->active_nodes)) {
pcmk_node_t *node = rsc->priv->active_nodes->data;
if (node->details->shutdown) {
if (node->details->unclean) {
pcmk__rsc_debug(rsc,
"Not locking %s to unclean %s for shutdown",
rsc->id, pcmk__node_name(node));
} else {
rsc->priv->lock_node = node;
rsc->priv->lock_time = shutdown_time(node);
}
}
}
if (rsc->priv->lock_node == NULL) {
// No lock needed
return;
}
if (scheduler->priv->shutdown_lock_ms > 0U) {
time_t lock_expiration = rsc->priv->lock_time
+ pcmk__timeout_ms2s(scheduler->priv->shutdown_lock_ms);
pcmk__rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
rsc->id, pcmk__node_name(rsc->priv->lock_node),
(long long) lock_expiration);
pcmk__update_recheck_time(++lock_expiration, scheduler,
"shutdown lock expiration");
} else {
pcmk__rsc_info(rsc, "Locking %s to %s due to shutdown",
rsc->id, pcmk__node_name(rsc->priv->lock_node));
}
// If resource is locked to one node, ban it from all other nodes
g_list_foreach(scheduler->nodes, ban_if_not_locked, rsc);
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:19 PM (12 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2002582
Default Alt Text
(126 KB)

Event Timeline