Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F2824923
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
220 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/include/pcmki/pcmki_sched_allocate.h b/include/pcmki/pcmki_sched_allocate.h
index f6561070fd..9a20e76945 100644
--- a/include/pcmki/pcmki_sched_allocate.h
+++ b/include/pcmki/pcmki_sched_allocate.h
@@ -1,93 +1,90 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__PCMKI_PCMKI_SCHED_ALLOCATE__H
# define PCMK__PCMKI_PCMKI_SCHED_ALLOCATE__H
# include <glib.h>
# include <crm/common/xml.h>
# include <crm/pengine/status.h>
# include <crm/pengine/complex.h>
# include <crm/common/xml_internal.h>
# include <crm/pengine/internal.h>
# include <crm/common/xml.h>
# include <pcmki/pcmki_scheduler.h>
pe_node_t *pcmk__native_allocate(pe_resource_t *rsc, pe_node_t *prefer);
void native_create_actions(pe_resource_t *rsc);
void native_internal_constraints(pe_resource_t *rsc);
extern enum pe_action_flags native_action_flags(pe_action_t * action, pe_node_t * node);
void native_rsc_location(pe_resource_t *rsc, pe__location_t *constraint);
void native_expand(pe_resource_t *rsc);
gboolean native_create_probe(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *complete, gboolean force);
extern void native_append_meta(pe_resource_t * rsc, xmlNode * xml);
void pcmk__primitive_add_utilization(pe_resource_t *rsc,
pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization);
void pcmk__primitive_shutdown_lock(pe_resource_t *rsc);
pe_node_t *pcmk__group_allocate(pe_resource_t *rsc, pe_node_t *prefer);
void group_create_actions(pe_resource_t *rsc);
void group_internal_constraints(pe_resource_t *rsc);
extern enum pe_action_flags group_action_flags(pe_action_t * action, pe_node_t * node);
void group_rsc_location(pe_resource_t *rsc, pe__location_t *constraint);
void group_expand(pe_resource_t *rsc);
extern void group_append_meta(pe_resource_t * rsc, xmlNode * xml);
void pcmk__group_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
void pcmk__group_shutdown_lock(pe_resource_t *rsc);
pe_node_t *pcmk__bundle_allocate(pe_resource_t *rsc, pe_node_t *prefer);
void pcmk__bundle_create_actions(pe_resource_t *rsc);
gboolean pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *complete, gboolean force);
void pcmk__bundle_internal_constraints(pe_resource_t *rsc);
void pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint);
enum pe_action_flags pcmk__bundle_action_flags(pe_action_t *action,
pe_node_t *node);
void pcmk__bundle_expand(pe_resource_t *rsc);
void pcmk__bundle_append_meta(pe_resource_t *rsc, xmlNode *xml);
void pcmk__bundle_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
void pcmk__bundle_shutdown_lock(pe_resource_t *rsc);
pe_node_t *pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *prefer);
void clone_create_actions(pe_resource_t *rsc);
void clone_internal_constraints(pe_resource_t *rsc);
void clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint);
extern enum pe_action_flags clone_action_flags(pe_action_t * action, pe_node_t * node);
void clone_expand(pe_resource_t *rsc);
gboolean clone_create_probe(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *complete, gboolean force);
extern void clone_append_meta(pe_resource_t * rsc, xmlNode * xml);
void pcmk__clone_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
void pcmk__clone_shutdown_lock(pe_resource_t *rsc);
void pcmk__add_promotion_scores(pe_resource_t *rsc);
-uint32_t native_update_actions(pe_action_t *first, pe_action_t *then,
- pe_node_t *node, uint32_t flags, uint32_t filter,
- uint32_t type, pe_working_set_t *data_set);
uint32_t group_update_actions(pe_action_t *first, pe_action_t *then,
pe_node_t *node, uint32_t flags, uint32_t filter,
uint32_t type, pe_working_set_t *data_set);
uint32_t pcmk__multi_update_actions(pe_action_t *first,
pe_action_t *then,
pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set);
void pcmk__log_transition_summary(const char *filename);
void clone_create_pseudo_actions(pe_resource_t *rsc, GList *children,
notify_data_t **start_notify,
notify_data_t **stop_notify);
#endif
diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h
index 4c1b707ca0..63d59bfc9b 100644
--- a/lib/pacemaker/libpacemaker_private.h
+++ b/lib/pacemaker/libpacemaker_private.h
@@ -1,659 +1,665 @@
/*
* Copyright 2021-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__LIBPACEMAKER_PRIVATE__H
# define PCMK__LIBPACEMAKER_PRIVATE__H
/* This header is for the sole use of libpacemaker, so that functions can be
* declared with G_GNUC_INTERNAL for efficiency.
*/
#include <crm/pengine/pe_types.h> // pe_action_t, pe_node_t, pe_working_set_t
// Flags to modify the behavior of the add_colocated_node_scores() method
enum pcmk__coloc_select {
// With no other flags, apply all "with this" colocations
pcmk__coloc_select_default = 0,
// Apply "this with" colocations instead of "with this" colocations
pcmk__coloc_select_this_with = (1 << 0),
// Apply only colocations with non-negative scores
pcmk__coloc_select_nonnegative = (1 << 1),
// Apply only colocations with at least one matching node
pcmk__coloc_select_active = (1 << 2),
};
// Flags the update_ordered_actions() method can return
enum pcmk__updated {
pcmk__updated_none = 0, // Nothing changed
pcmk__updated_first = (1 << 0), // First action was updated
pcmk__updated_then = (1 << 1), // Then action was updated
};
#define pcmk__set_updated_flags(au_flags, action, flags_to_set) do { \
au_flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Action update", \
(action)->uuid, au_flags, \
(flags_to_set), #flags_to_set); \
} while (0)
#define pcmk__clear_updated_flags(au_flags, action, flags_to_clear) do { \
au_flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Action update", \
(action)->uuid, au_flags, \
(flags_to_clear), #flags_to_clear); \
} while (0)
// Resource allocation methods
struct resource_alloc_functions_s {
pe_node_t *(*allocate)(pe_resource_t *rsc, pe_node_t *prefer);
void (*create_actions)(pe_resource_t *rsc);
gboolean (*create_probe)(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *complete, gboolean force);
void (*internal_constraints)(pe_resource_t *rsc);
/*!
* \internal
* \brief Apply a colocation's score to node weights or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node weights (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void (*apply_coloc_score) (pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent);
/*!
* \internal
* \brief Update nodes with scores of colocated resources' nodes
*
* Given a table of nodes and a resource, update the nodes' scores with the
* scores of the best nodes matching the attribute used for each of the
* resource's relevant colocations.
*
* \param[in,out] rsc Resource to check colocations for
* \param[in] log_id Resource ID to use in logs (if NULL, use rsc ID)
* \param[in,out] nodes Nodes to update
* \param[in] attr Colocation attribute (NULL to use default)
* \param[in] factor Incorporate scores multiplied by this factor
* \param[in] flags Bitmask of enum pcmk__coloc_select values
*
* \note The caller remains responsible for freeing \p *nodes.
*/
void (*add_colocated_node_scores)(pe_resource_t *rsc, const char *log_id,
GHashTable **nodes, const char *attr,
float factor,
enum pcmk__coloc_select flags);
/*!
* \internal
* \brief Create list of all resources in colocations with a given resource
*
* Given a resource, create a list of all resources involved in mandatory
* colocations with it, whether directly or indirectly via chained colocations.
*
* \param[in] rsc Resource to add to colocated list
* \param[in] orig_rsc Resource originally requested
* \param[in] colocated_rscs Existing list
*
* \return List of given resource and all resources involved in colocations
*
* \note This function is recursive; top-level callers should pass NULL as
* \p colocated_rscs and \p orig_rsc, and the desired resource as
* \p rsc. The recursive calls will use other values.
*/
GList *(*colocated_resources)(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs);
void (*rsc_location) (pe_resource_t *, pe__location_t *);
enum pe_action_flags (*action_flags) (pe_action_t *, pe_node_t *);
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two actions, update the actions'
* flags (and runnable_before members if appropriate) as appropriate for the
* ordering. In some cases, the ordering could be disabled as well.
*
* \param[in] first 'First' action in an ordering
* \param[in] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t (*update_ordered_actions)(pe_action_t *first, pe_action_t *then,
pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set);
void (*output_actions)(pe_resource_t *rsc);
void (*expand)(pe_resource_t *rsc);
void (*append_meta) (pe_resource_t * rsc, xmlNode * xml);
/*!
* \internal
* \brief Add a resource's utilization to a table of utilization values
*
* This function is used when summing the utilization of a resource and all
* resources colocated with it, to determine whether a node has sufficient
* capacity. Given a resource and a table of utilization values, it will add
* the resource's utilization to the existing values, if the resource has
* not yet been allocated to a node.
*
* \param[in] rsc Resource with utilization to add
* \param[in] orig_rsc Resource being allocated (for logging only)
* \param[in] all_rscs List of all resources that will be summed
* \param[in] utilization Table of utilization values to add to
*/
void (*add_utilization)(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization);
/*!
* \internal
* \brief Apply a shutdown lock for a resource, if appropriate
*
* \param[in] rsc Resource to check for shutdown lock
*/
void (*shutdown_lock)(pe_resource_t *rsc);
};
// Actions (pcmk_sched_actions.c)
G_GNUC_INTERNAL
void pcmk__update_action_for_orderings(pe_action_t *action,
pe_working_set_t *data_set);
+G_GNUC_INTERNAL
+uint32_t pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then,
+ pe_node_t *node, uint32_t flags,
+ uint32_t filter, uint32_t type,
+ pe_working_set_t *data_set);
+
G_GNUC_INTERNAL
void pcmk__log_action(const char *pre_text, pe_action_t *action, bool details);
G_GNUC_INTERNAL
pe_action_t *pcmk__new_cancel_action(pe_resource_t *rsc, const char *name,
guint interval_ms, pe_node_t *node);
G_GNUC_INTERNAL
pe_action_t *pcmk__new_shutdown_action(pe_node_t *node);
G_GNUC_INTERNAL
bool pcmk__action_locks_rsc_to_node(const pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__deduplicate_action_inputs(pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__output_actions(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node,
xmlNode *xml_op);
G_GNUC_INTERNAL
void pcmk__handle_rsc_config_changes(pe_working_set_t *data_set);
// Producing transition graphs (pcmk_graph_producer.c)
G_GNUC_INTERNAL
bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action,
pe_action_wrapper_t *input);
G_GNUC_INTERNAL
void pcmk__add_action_to_graph(pe_action_t *action, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__create_graph(pe_working_set_t *data_set);
// Fencing (pcmk_sched_fencing.c)
G_GNUC_INTERNAL
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *action, enum pe_ordering order);
G_GNUC_INTERNAL
void pcmk__fence_guest(pe_node_t *node);
G_GNUC_INTERNAL
bool pcmk__node_unfenced(pe_node_t *node);
// Injected scheduler inputs (pcmk_sched_injections.c)
void pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib,
pcmk_injections_t *injections);
// Constraints of any type (pcmk_sched_constraints.c)
G_GNUC_INTERNAL
pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id);
G_GNUC_INTERNAL
xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__valid_resource_or_tag(pe_working_set_t *data_set, const char *id,
pe_resource_t **rsc, pe_tag_t **tag);
G_GNUC_INTERNAL
bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr,
bool convert_rsc, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__create_internal_constraints(pe_working_set_t *data_set);
// Location constraints
G_GNUC_INTERNAL
void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc,
int node_weight, const char *discover_mode,
pe_node_t *foo_node,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_locations(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_location(pe__location_t *constraint, pe_resource_t *rsc);
// Colocation constraints (pcmk_sched_colocation.c)
enum pcmk__coloc_affects {
pcmk__coloc_affects_nothing = 0,
pcmk__coloc_affects_location,
pcmk__coloc_affects_role,
};
G_GNUC_INTERNAL
enum pcmk__coloc_affects pcmk__colocation_affects(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint,
bool preview);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_weights(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_priority(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint);
G_GNUC_INTERNAL
void pcmk__add_colocated_node_scores(pe_resource_t *rsc, const char *log_id,
GHashTable **nodes, const char *attr,
float factor, uint32_t flags);
G_GNUC_INTERNAL
void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__new_colocation(const char *id, const char *node_attr, int score,
pe_resource_t *dependent, pe_resource_t *primary,
const char *dependent_role, const char *primary_role,
bool influence, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__block_colocated_starts(pe_action_t *action,
pe_working_set_t *data_set);
/*!
* \internal
* \brief Check whether colocation's dependent preferences should be considered
*
* \param[in] colocation Colocation constraint
* \param[in] rsc Primary instance (normally this will be
* colocation->primary, which NULL will be treated as,
* but for clones or bundles with multiple instances
* this can be a particular instance)
*
* \return true if colocation influence should be effective, otherwise false
*/
static inline bool
pcmk__colocation_has_influence(const pcmk__colocation_t *colocation,
const pe_resource_t *rsc)
{
if (rsc == NULL) {
rsc = colocation->primary;
}
/* A bundle replica colocates its remote connection with its container,
* using a finite score so that the container can run on Pacemaker Remote
* nodes.
*
* Moving a connection is lightweight and does not interrupt the service,
* while moving a container is heavyweight and does interrupt the service,
* so don't move a clean, active container based solely on the preferences
* of its connection.
*
* This also avoids problematic scenarios where two containers want to
* perpetually swap places.
*/
if (pcmk_is_set(colocation->dependent->flags, pe_rsc_allow_remote_remotes)
&& !pcmk_is_set(rsc->flags, pe_rsc_failed)
&& pcmk__list_of_1(rsc->running_on)) {
return false;
}
/* The dependent in a colocation influences the primary's location
* if the influence option is true or the primary is not yet active.
*/
return colocation->influence || (rsc->running_on == NULL);
}
// Ordering constraints (pcmk_sched_ordering.c)
G_GNUC_INTERNAL
void pcmk__new_ordering(pe_resource_t *first_rsc, char *first_task,
pe_action_t *first_action, pe_resource_t *then_rsc,
char *then_task, pe_action_t *then_action,
enum pe_ordering type, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__disable_invalid_orderings(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_stops_before_shutdown(pe_node_t *node,
pe_action_t *shutdown_op);
G_GNUC_INTERNAL
void pcmk__apply_orderings(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_after_each(pe_action_t *after, GList *list);
/*!
* \internal
* \brief Create a new ordering between two resource actions
*
* \param[in] first_rsc Resource for 'first' action
* \param[in] then_rsc Resource for 'then' action
* \param[in] first_task Action key for 'first' action
* \param[in] then_task Action key for 'then' action
* \param[in] flags Bitmask of enum pe_ordering flags
* \param[in] data_set Cluster working set to add ordering to
*/
#define pcmk__order_resource_actions(first_rsc, first_task, \
then_rsc, then_task, flags) \
pcmk__new_ordering((first_rsc), \
pcmk__op_key((first_rsc)->id, (first_task), 0), \
NULL, \
(then_rsc), \
pcmk__op_key((then_rsc)->id, (then_task), 0), \
NULL, (flags), (first_rsc)->cluster)
#define pcmk__order_starts(rsc1, rsc2, type) \
pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \
(rsc2), CRMD_ACTION_START, (type))
#define pcmk__order_stops(rsc1, rsc2, type) \
pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \
(rsc2), CRMD_ACTION_STOP, (type))
// Ticket constraints (pcmk_sched_tickets.c)
G_GNUC_INTERNAL
void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set);
// Promotable clone resources (pcmk_sched_promotable.c)
G_GNUC_INTERNAL
void pcmk__require_promotion_tickets(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__set_instance_roles(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__create_promotable_actions(pe_resource_t *clone);
G_GNUC_INTERNAL
void pcmk__promotable_restart_ordering(pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__order_promotable_instances(pe_resource_t *clone);
G_GNUC_INTERNAL
void pcmk__update_dependent_with_promotable(pe_resource_t *primary,
pe_resource_t *dependent,
pcmk__colocation_t *colocation);
G_GNUC_INTERNAL
void pcmk__update_promotable_dependent_priority(pe_resource_t *primary,
pe_resource_t *dependent,
pcmk__colocation_t *colocation);
// Pacemaker Remote nodes (pcmk_sched_remote.c)
G_GNUC_INTERNAL
bool pcmk__is_failed_remote_node(pe_node_t *node);
G_GNUC_INTERNAL
void pcmk__order_remote_connection_actions(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__rsc_corresponds_to_guest(pe_resource_t *rsc, pe_node_t *node);
G_GNUC_INTERNAL
pe_node_t *pcmk__connection_host_for_action(pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params);
G_GNUC_INTERNAL
void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, pe_action_t *action);
// Primitives (pcmk_sched_native.c)
G_GNUC_INTERNAL
void pcmk__primitive_apply_coloc_score(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent);
// Groups (pcmk_sched_group.c)
G_GNUC_INTERNAL
void pcmk__group_apply_coloc_score(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent);
G_GNUC_INTERNAL
void pcmk__group_add_colocated_node_scores(pe_resource_t *rsc,
const char *log_id,
GHashTable **nodes, const char *attr,
float factor, uint32_t flags);
G_GNUC_INTERNAL
GList *pcmk__group_colocated_resources(pe_resource_t *rsc,
pe_resource_t *orig_rsc,
GList *colocated_rscs);
// Clones (pcmk_sched_clone.c)
G_GNUC_INTERNAL
void pcmk__clone_apply_coloc_score(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent);
// Bundles (pcmk_sched_bundle.c)
G_GNUC_INTERNAL
void pcmk__bundle_apply_coloc_score(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent);
G_GNUC_INTERNAL
void pcmk__output_bundle_actions(pe_resource_t *rsc);
// Injections (pcmk_injections.c)
G_GNUC_INTERNAL
xmlNode *pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_node_state_change(cib_t *cib_conn, const char *node,
bool up);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node,
const char *resource,
const char *lrm_name,
const char *rclass,
const char *rtype,
const char *rprovider);
G_GNUC_INTERNAL
void pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node,
const char *resource, const char *task,
guint interval_ms, int rc);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_action_result(xmlNode *cib_resource,
lrmd_event_data_t *op, int target_rc);
// Nodes (pcmk_sched_nodes.c)
G_GNUC_INTERNAL
bool pcmk__node_available(const pe_node_t *node, bool consider_score,
bool consider_guest);
G_GNUC_INTERNAL
bool pcmk__any_node_available(GHashTable *nodes);
G_GNUC_INTERNAL
GHashTable *pcmk__copy_node_table(GHashTable *nodes);
G_GNUC_INTERNAL
GList *pcmk__sort_nodes(GList *nodes, pe_node_t *active_node);
G_GNUC_INTERNAL
void pcmk__apply_node_health(pe_working_set_t *data_set);
G_GNUC_INTERNAL
pe_node_t *pcmk__top_allowed_node(const pe_resource_t *rsc,
const pe_node_t *node);
// Functions applying to more than one variant (pcmk_sched_resource.c)
G_GNUC_INTERNAL
void pcmk__set_allocation_methods(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node,
const xmlNode *rsc_entry, bool active_on_node);
G_GNUC_INTERNAL
GList *pcmk__rscs_matching_id(const char *id, pe_working_set_t *data_set);
G_GNUC_INTERNAL
GList *pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs);
G_GNUC_INTERNAL
void pcmk__output_resource_actions(pe_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force);
G_GNUC_INTERNAL
bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force);
G_GNUC_INTERNAL
void pcmk__unassign_resource(pe_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node,
pe_resource_t **failed);
G_GNUC_INTERNAL
void pcmk__sort_resources(pe_working_set_t *data_set);
G_GNUC_INTERNAL
gint pcmk__cmp_instance(gconstpointer a, gconstpointer b);
G_GNUC_INTERNAL
gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b);
// Functions related to probes (pcmk_sched_probes.c)
G_GNUC_INTERNAL
void pcmk__order_probes(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__schedule_probes(pe_working_set_t *data_set);
// Functions related to node utilization (pcmk_sched_utilization.c)
G_GNUC_INTERNAL
int pcmk__compare_node_capacities(const pe_node_t *node1,
const pe_node_t *node2);
G_GNUC_INTERNAL
void pcmk__consume_node_capacity(GHashTable *current_utilization,
pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__release_node_capacity(GHashTable *current_utilization,
pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__ban_insufficient_capacity(pe_resource_t *rsc, pe_node_t **prefer);
G_GNUC_INTERNAL
void pcmk__create_utilization_constraints(pe_resource_t *rsc,
GList *allowed_nodes);
G_GNUC_INTERNAL
void pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set);
#endif // PCMK__LIBPACEMAKER_PRIVATE__H
diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c
index 648e24c7ca..c05ab5613c 100644
--- a/lib/pacemaker/pcmk_sched_actions.c
+++ b/lib/pacemaker/pcmk_sched_actions.c
@@ -1,2023 +1,2021 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <sys/param.h>
#include <glib.h>
#include <crm/lrmd_internal.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
extern gboolean DeleteRsc(pe_resource_t *rsc, pe_node_t *node,
gboolean optional, pe_working_set_t *data_set);
/*!
* \internal
* \brief Get the action flags relevant to ordering constraints
*
* \param[in] action Action to check
* \param[in] node Node that *other* action in the ordering is on
* (used only for clone resource actions)
*
* \return Action flags that should be used for orderings
*/
static enum pe_action_flags
action_flags_for_ordering(pe_action_t *action, pe_node_t *node)
{
bool runnable = false;
enum pe_action_flags flags;
// For non-resource actions, return the action flags
if (action->rsc == NULL) {
return action->flags;
}
/* For non-clone resources, or a clone action not assigned to a node,
* return the flags as determined by the resource method without a node
* specified.
*/
flags = action->rsc->cmds->action_flags(action, NULL);
if ((node == NULL) || !pe_rsc_is_clone(action->rsc)) {
return flags;
}
/* Otherwise (i.e., for clone resource actions on a specific node), first
* remember whether the non-node-specific action is runnable.
*/
runnable = pcmk_is_set(flags, pe_action_runnable);
// Then recheck the resource method with the node
flags = action->rsc->cmds->action_flags(action, node);
/* For clones in ordering constraints, the node-specific "runnable" doesn't
* matter, just the non-node-specific setting (i.e., is the action runnable
* anywhere).
*
* This applies only to runnable, and only for ordering constraints. This
* function shouldn't be used for other types of constraints without
* changes. Not very satisfying, but it's logical and appears to work well.
*/
if (runnable && !pcmk_is_set(flags, pe_action_runnable)) {
pe__set_raw_action_flags(flags, action->rsc->id,
pe_action_runnable);
}
return flags;
}
/*!
* \internal
* \brief Get action UUID that should be used with a resource ordering
*
* When an action is ordered relative to an action for a collective resource
* (clone, group, or bundle), it actually needs to be ordered after all
* instances of the collective have completed the relevant action (for example,
* given "start CLONE then start RSC", RSC must wait until all instances of
* CLONE have started). Given the UUID and resource of the first action in an
* ordering, this returns the UUID of the action that should actually be used
* for ordering (for example, "CLONE_started_0" instead of "CLONE_start_0").
*
* \param[in] first_uuid UUID of first action in ordering
* \param[in] first_rsc Resource of first action in ordering
*
* \return Newly allocated copy of UUID to use with ordering
* \note It is the caller's responsibility to free the return value.
*/
static char *
action_uuid_for_ordering(const char *first_uuid, pe_resource_t *first_rsc)
{
guint interval_ms = 0;
char *uuid = NULL;
char *rid = NULL;
char *first_task_str = NULL;
enum action_tasks first_task = no_action;
enum action_tasks remapped_task = no_action;
// Only non-notify actions for collective resources need remapping
if ((strstr(first_uuid, "notify") != NULL)
|| (first_rsc->variant < pe_group)) {
goto done;
}
// Only non-recurring actions need remapping
CRM_ASSERT(parse_op_key(first_uuid, &rid, &first_task_str, &interval_ms));
if (interval_ms > 0) {
goto done;
}
first_task = text2task(first_task_str);
switch (first_task) {
case stop_rsc:
case start_rsc:
case action_notify:
case action_promote:
case action_demote:
remapped_task = first_task + 1;
break;
case stopped_rsc:
case started_rsc:
case action_notified:
case action_promoted:
case action_demoted:
remapped_task = first_task;
break;
case monitor_rsc:
case shutdown_crm:
case stonith_node:
break;
default:
crm_err("Unknown action '%s' in ordering", first_task_str);
break;
}
if (remapped_task != no_action) {
/* If a (clone) resource has notifications enabled, we want to order
* relative to when all notifications have been sent for the remapped
* task. Only outermost resources or those in bundles have
* notifications.
*/
if (pcmk_is_set(first_rsc->flags, pe_rsc_notify)
&& ((first_rsc->parent == NULL)
|| (pe_rsc_is_clone(first_rsc)
&& (first_rsc->parent->variant == pe_container)))) {
uuid = pcmk__notify_key(rid, "confirmed-post",
task2text(remapped_task));
} else {
uuid = pcmk__op_key(rid, task2text(remapped_task), 0);
}
pe_rsc_trace(first_rsc,
"Remapped action UUID %s to %s for ordering purposes",
first_uuid, uuid);
}
done:
if (uuid == NULL) {
uuid = strdup(first_uuid);
CRM_ASSERT(uuid != NULL);
}
free(first_task_str);
free(rid);
return uuid;
}
/*!
* \internal
* \brief Get actual action that should be used with an ordering
*
* When an action is ordered relative to an action for a collective resource
* (clone, group, or bundle), it actually needs to be ordered after all
* instances of the collective have completed the relevant action (for example,
* given "start CLONE then start RSC", RSC must wait until all instances of
* CLONE have started). Given the first action in an ordering, this returns the
* the action that should actually be used for ordering (for example, the
* started action instead of the start action).
*
* \param[in] action First action in an ordering
*
* \return Actual action that should be used for the ordering
*/
static pe_action_t *
action_for_ordering(pe_action_t *action)
{
pe_action_t *result = action;
pe_resource_t *rsc = action->rsc;
if ((rsc != NULL) && (rsc->variant >= pe_group) && (action->uuid != NULL)) {
char *uuid = action_uuid_for_ordering(action->uuid, rsc);
result = find_first_action(rsc->actions, uuid, NULL, NULL);
if (result == NULL) {
crm_warn("Not remapping %s to %s because %s does not have "
"remapped action", action->uuid, uuid, rsc->id);
result = action;
}
free(uuid);
}
return result;
}
/*!
* \internal
* \brief Update flags for ordering's actions appropriately for ordering's flags
*
* \param[in] first First action in an ordering
* \param[in] then Then action in an ordering
* \param[in] first_flags Action flags for \p first for ordering purposes
* \param[in] then_flags Action flags for \p then for ordering purposes
* \param[in] order Action wrapper for \p first in ordering
* \param[in] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags
*/
static uint32_t
update_action_for_ordering_flags(pe_action_t *first, pe_action_t *then,
enum pe_action_flags first_flags,
enum pe_action_flags then_flags,
pe_action_wrapper_t *order,
pe_working_set_t *data_set)
{
uint32_t changed = pcmk__updated_none;
/* The node will only be used for clones. If interleaved, node will be NULL,
* otherwise the ordering scope will be limited to the node. Normally, the
* whole 'then' clone should restart if 'first' is restarted, so then->node
* is needed.
*/
pe_node_t *node = then->node;
if (pcmk_is_set(order->type, pe_order_implies_then_on_node)) {
/* For unfencing, only instances of 'then' on the same node as 'first'
* (the unfencing operation) should restart, so reset node to
* first->node, at which point this case is handled like a normal
* pe_order_implies_then.
*/
pe__clear_order_flags(order->type, pe_order_implies_then_on_node);
pe__set_order_flags(order->type, pe_order_implies_then);
node = first->node;
pe_rsc_trace(then->rsc,
"%s then %s: mapped pe_order_implies_then_on_node to "
"pe_order_implies_then on %s",
first->uuid, then->uuid, node->details->uname);
}
if (pcmk_is_set(order->type, pe_order_implies_then)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_ordered_actions(first, then,
node,
first_flags & pe_action_optional,
pe_action_optional,
pe_order_implies_then,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_optional)
&& pcmk_is_set(then->flags, pe_action_optional)) {
pe__clear_action_flags(then, pe_action_optional);
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_then",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_restart) && (then->rsc != NULL)) {
enum pe_action_flags restart = pe_action_optional|pe_action_runnable;
changed |= then->rsc->cmds->update_ordered_actions(first, then, node,
first_flags, restart,
pe_order_restart,
data_set);
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_restart",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_implies_first)) {
if (first->rsc != NULL) {
changed |= first->rsc->cmds->update_ordered_actions(first, then,
node,
first_flags,
pe_action_optional,
pe_order_implies_first,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_optional)
&& pcmk_is_set(first->flags, pe_action_runnable)) {
pe__clear_action_flags(first, pe_action_runnable);
pcmk__set_updated_flags(changed, first, pcmk__updated_first);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_first",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_promoted_implies_first)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_ordered_actions(first, then,
node,
first_flags & pe_action_optional,
pe_action_optional,
pe_order_promoted_implies_first,
data_set);
}
pe_rsc_trace(then->rsc,
"%s then %s: %s after pe_order_promoted_implies_first",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_one_or_more)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_ordered_actions(first, then,
node,
first_flags,
pe_action_runnable,
pe_order_one_or_more,
data_set);
} else if (pcmk_is_set(first_flags, pe_action_runnable)) {
// We have another runnable instance of "first"
then->runnable_before++;
/* Mark "then" as runnable if it requires a certain number of
* "before" instances to be runnable, and they now are.
*/
if ((then->runnable_before >= then->required_runnable_before)
&& !pcmk_is_set(then->flags, pe_action_runnable)) {
pe__set_action_flags(then, pe_action_runnable);
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_one_or_more",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_probe) && (then->rsc != NULL)) {
if (!pcmk_is_set(first_flags, pe_action_runnable)
&& (first->rsc->running_on != NULL)) {
pe_rsc_trace(then->rsc,
"%s then %s: ignoring because first is stopping",
first->uuid, then->uuid);
order->type = pe_order_none;
} else {
changed |= then->rsc->cmds->update_ordered_actions(first, then,
node,
first_flags,
pe_action_runnable,
pe_order_runnable_left,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_probe",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_runnable_left)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_ordered_actions(first, then,
node,
first_flags,
pe_action_runnable,
pe_order_runnable_left,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_runnable)
&& pcmk_is_set(then->flags, pe_action_runnable)) {
pe__clear_action_flags(then, pe_action_runnable);
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_runnable_left",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_implies_first_migratable)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_ordered_actions(first, then,
node,
first_flags,
pe_action_optional,
pe_order_implies_first_migratable,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after "
"pe_order_implies_first_migratable",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_pseudo_left)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_ordered_actions(first, then,
node,
first_flags,
pe_action_optional,
pe_order_pseudo_left,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_pseudo_left",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_optional)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_ordered_actions(first, then,
node,
first_flags,
pe_action_runnable,
pe_order_optional,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_optional",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_asymmetrical)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_ordered_actions(first, then,
node,
first_flags,
pe_action_runnable,
pe_order_asymmetrical,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_asymmetrical",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(first->flags, pe_action_runnable)
&& pcmk_is_set(order->type, pe_order_implies_then_printed)
&& !pcmk_is_set(first_flags, pe_action_optional)) {
pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
then->uuid, first->uuid);
pe__set_action_flags(then, pe_action_print_always);
// Don't bother marking 'then' as changed just for this
}
if (pcmk_is_set(order->type, pe_order_implies_first_printed)
&& !pcmk_is_set(then_flags, pe_action_optional)) {
pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
first->uuid, then->uuid);
pe__set_action_flags(first, pe_action_print_always);
// Don't bother marking 'first' as changed just for this
}
if (pcmk_any_flags_set(order->type, pe_order_implies_then
|pe_order_implies_first
|pe_order_restart)
&& (first->rsc != NULL)
&& !pcmk_is_set(first->rsc->flags, pe_rsc_managed)
&& pcmk_is_set(first->rsc->flags, pe_rsc_block)
&& !pcmk_is_set(first->flags, pe_action_runnable)
&& pcmk__str_eq(first->task, RSC_STOP, pcmk__str_casei)) {
if (pcmk_is_set(then->flags, pe_action_runnable)) {
pe__clear_action_flags(then, pe_action_runnable);
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after checking whether first "
"is blocked, unmanaged, unrunnable stop",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
return changed;
}
// Convenience macros for logging action properties
#define action_type_str(flags) \
(pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action")
#define action_optional_str(flags) \
(pcmk_is_set((flags), pe_action_optional)? "optional" : "required")
#define action_runnable_str(flags) \
(pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable")
#define action_node_str(a) \
(((a)->node == NULL)? "no node" : (a)->node->details->uname)
/*!
* \internal
* \brief Update an action's flags for all orderings where it is "then"
*
* \param[in] then Action to update
* \param[in] data_set Cluster working set
*/
void
pcmk__update_action_for_orderings(pe_action_t *then, pe_working_set_t *data_set)
{
GList *lpc = NULL;
uint32_t changed = pcmk__updated_none;
int last_flags = then->flags;
pe_rsc_trace(then->rsc, "Updating %s %s (%s %s) on %s",
action_type_str(then->flags), then->uuid,
action_optional_str(then->flags),
action_runnable_str(then->flags), action_node_str(then));
if (pcmk_is_set(then->flags, pe_action_requires_any)) {
/* Initialize current known "runnable before" actions. As
* update_action_for_ordering_flags() is called for each of then's
* before actions, this number will increment as runnable 'first'
* actions are encountered.
*/
then->runnable_before = 0;
if (then->required_runnable_before == 0) {
/* @COMPAT This ordering constraint uses the deprecated
* "require-all=false" attribute. Treat it like "clone-min=1".
*/
then->required_runnable_before = 1;
}
/* The pe_order_one_or_more clause of update_action_for_ordering_flags()
* (called below) will reset runnable if appropriate.
*/
pe__clear_action_flags(then, pe_action_runnable);
}
for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
pe_action_t *first = other->action;
pe_node_t *then_node = then->node;
pe_node_t *first_node = first->node;
if ((first->rsc != NULL)
&& (first->rsc->variant == pe_group)
&& pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) {
first_node = first->rsc->fns->location(first->rsc, NULL, FALSE);
if (first_node != NULL) {
pe_rsc_trace(first->rsc, "Found node %s for 'first' %s",
first_node->details->uname, first->uuid);
}
}
if ((then->rsc != NULL)
&& (then->rsc->variant == pe_group)
&& pcmk__str_eq(then->task, RSC_START, pcmk__str_casei)) {
then_node = then->rsc->fns->location(then->rsc, NULL, FALSE);
if (then_node != NULL) {
pe_rsc_trace(then->rsc, "Found node %s for 'then' %s",
then_node->details->uname, then->uuid);
}
}
// Disable constraint if it only applies when on same node, but isn't
if (pcmk_is_set(other->type, pe_order_same_node)
&& (first_node != NULL) && (then_node != NULL)
&& (first_node->details != then_node->details)) {
pe_rsc_trace(then->rsc,
"Disabled ordering %s on %s then %s on %s: not same node",
other->action->uuid, first_node->details->uname,
then->uuid, then_node->details->uname);
other->type = pe_order_none;
continue;
}
pcmk__clear_updated_flags(changed, then, pcmk__updated_first);
if ((first->rsc != NULL)
&& pcmk_is_set(other->type, pe_order_then_cancels_first)
&& !pcmk_is_set(then->flags, pe_action_optional)) {
/* 'then' is required, so we must abandon 'first'
* (e.g. a required stop cancels any agent reload).
*/
pe__set_action_flags(other->action, pe_action_optional);
if (!strcmp(first->task, CRMD_ACTION_RELOAD_AGENT)) {
pe__clear_resource_flags(first->rsc, pe_rsc_reload);
}
}
if ((first->rsc != NULL) && (then->rsc != NULL)
&& (first->rsc != then->rsc) && !is_parent(then->rsc, first->rsc)) {
first = action_for_ordering(first);
}
if (first != other->action) {
pe_rsc_trace(then->rsc, "Ordering %s after %s instead of %s",
then->uuid, first->uuid, other->action->uuid);
}
pe_rsc_trace(then->rsc,
"%s (%#.6x) then %s (%#.6x): type=%#.6x node=%s",
first->uuid, first->flags, then->uuid, then->flags,
other->type, action_node_str(first));
if (first == other->action) {
/* 'first' was not remapped (e.g. from 'start' to 'running'), which
* could mean it is a non-resource action, a primitive resource
* action, or already expanded.
*/
enum pe_action_flags first_flags, then_flags;
first_flags = action_flags_for_ordering(first, then_node);
then_flags = action_flags_for_ordering(then, first_node);
changed |= update_action_for_ordering_flags(first, then,
first_flags, then_flags,
other, data_set);
/* 'first' was for a complex resource (clone, group, etc),
* create a new dependency if necessary
*/
} else if (order_actions(first, then, other->type)) {
/* This was the first time 'first' and 'then' were associated,
* start again to get the new actions_before list
*/
pcmk__set_updated_flags(changed, then, pcmk__updated_then);
pe_rsc_trace(then->rsc,
"Disabled ordering %s then %s in favor of %s then %s",
other->action->uuid, then->uuid, first->uuid,
then->uuid);
other->type = pe_order_none;
}
if (pcmk_is_set(changed, pcmk__updated_first)) {
crm_trace("Re-processing %s and its 'after' actions "
"because it changed", first->uuid);
for (GList *lpc2 = first->actions_after; lpc2 != NULL;
lpc2 = lpc2->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc2->data;
pcmk__update_action_for_orderings(other->action, data_set);
}
pcmk__update_action_for_orderings(first, data_set);
}
}
if (pcmk_is_set(then->flags, pe_action_requires_any)) {
if (last_flags == then->flags) {
pcmk__clear_updated_flags(changed, then, pcmk__updated_then);
} else {
pcmk__set_updated_flags(changed, then, pcmk__updated_then);
}
}
if (pcmk_is_set(changed, pcmk__updated_then)) {
crm_trace("Re-processing %s and its 'after' actions because it changed",
then->uuid);
if (pcmk_is_set(last_flags, pe_action_runnable)
&& !pcmk_is_set(then->flags, pe_action_runnable)) {
pcmk__block_colocated_starts(then, data_set);
}
pcmk__update_action_for_orderings(then, data_set);
for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
pcmk__update_action_for_orderings(other->action, data_set);
}
}
}
static inline bool
is_primitive_action(pe_action_t *action)
{
return action && action->rsc && (action->rsc->variant == pe_native);
}
/*!
* \internal
* \brief Clear a single action flag and set reason text
*
* \param[in] action Action whose flag should be cleared
* \param[in] flag Action flag that should be cleared
* \param[in] reason Action that is the reason why flag is being cleared
*/
#define clear_action_flag_because(action, flag, reason) do { \
if (pcmk_is_set((action)->flags, (flag))) { \
pe__clear_action_flags(action, flag); \
if ((action)->rsc != (reason)->rsc) { \
char *reason_text = pe__action2reason((reason), (flag)); \
pe_action_set_reason((action), reason_text, \
((flag) == pe_action_migrate_runnable)); \
free(reason_text); \
} \
} \
} while (0)
/*!
* \internal
* \brief Update actions in an asymmetric ordering
*
* \param[in] first 'First' action in an asymmetric ordering
* \param[in] then 'Then' action in an asymmetric ordering
*/
static void
handle_asymmetric_ordering(pe_action_t *first, pe_action_t *then)
{
enum rsc_role_e then_rsc_role = RSC_ROLE_UNKNOWN;
GList *then_on = NULL;
if (then->rsc == NULL) {
// Asymmetric orderings only matter if there's a resource involved
return;
}
then_rsc_role = then->rsc->fns->state(then->rsc, TRUE);
then_on = then->rsc->running_on;
if ((then_rsc_role == RSC_ROLE_STOPPED)
&& pcmk__str_eq(then->task, RSC_STOP, pcmk__str_none)) {
/* Nothing needs to be done for asymmetric ordering if 'then' is
* supposed to be stopped after 'first' but is already stopped.
*/
return;
}
if ((then_rsc_role >= RSC_ROLE_STARTED)
&& pcmk_is_set(then->flags, pe_action_optional)
&& (then->node != NULL)
&& pcmk__list_of_1(then_on)
&& (then->node->details == ((pe_node_t *) then_on->data)->details)
&& pcmk__str_eq(then->task, RSC_START, pcmk__str_none)) {
/* Nothing needs to be done for asymmetric ordering if 'then' is
* supposed to be started after 'first' but is already started --
* unless the start is mandatory, which indicates the resource is
* restarting and the ordering is still needed.
*/
return;
}
if (!pcmk_is_set(first->flags, pe_action_runnable)) {
// 'First' can't run, so 'then' can't either
clear_action_flag_because(then, pe_action_optional, first);
clear_action_flag_because(then, pe_action_runnable, first);
}
}
/*!
* \internal
* \brief Set action bits appropriately when pe_restart_order is used
*
* \param[in] first 'First' action in an ordering with pe_restart_order
* \param[in] then 'Then' action in an ordering with pe_restart_order
* \param[in] filter What action flags to care about
*
* \note pe_restart_order is set for "stop resource before starting it" and
* "stop later group member before stopping earlier group member"
*/
static void
handle_restart_ordering(pe_action_t *first, pe_action_t *then, uint32_t filter)
{
const char *reason = NULL;
CRM_ASSERT(is_primitive_action(first));
CRM_ASSERT(is_primitive_action(then));
// We need to update the action in two cases:
// ... if 'then' is required
if (pcmk_is_set(filter, pe_action_optional)
&& !pcmk_is_set(then->flags, pe_action_optional)) {
reason = "restart";
}
/* ... if 'then' is unrunnable action on same resource (if a resource
* should restart but can't start, we still want to stop)
*/
if (pcmk_is_set(filter, pe_action_runnable)
&& !pcmk_is_set(then->flags, pe_action_runnable)
&& pcmk_is_set(then->rsc->flags, pe_rsc_managed)
&& (first->rsc == then->rsc)) {
reason = "stop";
}
if (reason == NULL) {
return;
}
pe_rsc_trace(first->rsc, "Handling %s -> %s for %s",
first->uuid, then->uuid, reason);
// Make 'first' required if it is runnable
if (pcmk_is_set(first->flags, pe_action_runnable)) {
clear_action_flag_because(first, pe_action_optional, then);
}
// Make 'first' required if 'then' is required
if (!pcmk_is_set(then->flags, pe_action_optional)) {
clear_action_flag_because(first, pe_action_optional, then);
}
// Make 'first' unmigratable if 'then' is unmigratable
if (!pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
clear_action_flag_because(first, pe_action_migrate_runnable, then);
}
// Make 'then' unrunnable if 'first' is required but unrunnable
if (!pcmk_is_set(first->flags, pe_action_optional)
&& !pcmk_is_set(first->flags, pe_action_runnable)) {
clear_action_flag_because(then, pe_action_runnable, first);
}
}
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two actions, update the actions'
* flags (and runnable_before members if appropriate) as appropriate for the
* ordering. In some cases, the ordering could be disabled as well.
*
* \param[in] first 'First' action in an ordering
* \param[in] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t
-native_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node,
- uint32_t flags, uint32_t filter,
- uint32_t type, pe_working_set_t *data_set)
+pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then,
+ pe_node_t *node, uint32_t flags, uint32_t filter,
+ uint32_t type, pe_working_set_t *data_set)
{
uint32_t changed = pcmk__updated_none;
uint32_t then_flags = then->flags;
uint32_t first_flags = first->flags;
if (pcmk_is_set(type, pe_order_asymmetrical)) {
handle_asymmetric_ordering(first, then);
}
if (pcmk_is_set(type, pe_order_implies_first)
&& !pcmk_is_set(then_flags, pe_action_optional)) {
// Then is required, and implies first should be, too
if (pcmk_is_set(filter, pe_action_optional)
&& !pcmk_is_set(flags, pe_action_optional)
&& pcmk_is_set(first_flags, pe_action_optional)) {
clear_action_flag_because(first, pe_action_optional, then);
}
- if (pcmk_is_set(flags, pe_action_migrate_runnable) &&
- !pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
+ if (pcmk_is_set(flags, pe_action_migrate_runnable)
+ && !pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
clear_action_flag_because(first, pe_action_migrate_runnable, then);
}
}
- if (pcmk_is_set(type, pe_order_promoted_implies_first)) {
- if (pcmk_is_set(filter, pe_action_optional)
- && !pcmk_is_set(then->flags, pe_action_optional)
- && (then->rsc != NULL) && (then->rsc->role == RSC_ROLE_PROMOTED)) {
+ if (pcmk_is_set(type, pe_order_promoted_implies_first)
+ && (then->rsc != NULL) && (then->rsc->role == RSC_ROLE_PROMOTED)
+ && pcmk_is_set(filter, pe_action_optional)
+ && !pcmk_is_set(then->flags, pe_action_optional)) {
- clear_action_flag_because(first, pe_action_optional, then);
+ clear_action_flag_because(first, pe_action_optional, then);
- if (pcmk_is_set(first->flags, pe_action_migrate_runnable)
- && !pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
- clear_action_flag_because(first, pe_action_migrate_runnable,
- then);
- }
+ if (pcmk_is_set(first->flags, pe_action_migrate_runnable)
+ && !pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
+ clear_action_flag_because(first, pe_action_migrate_runnable,
+ then);
}
}
if (pcmk_is_set(type, pe_order_implies_first_migratable)
&& pcmk_is_set(filter, pe_action_optional)) {
if (!pcmk_all_flags_set(then->flags,
pe_action_migrate_runnable|pe_action_runnable)) {
clear_action_flag_because(first, pe_action_runnable, then);
}
if (!pcmk_is_set(then->flags, pe_action_optional)) {
clear_action_flag_because(first, pe_action_optional, then);
}
}
if (pcmk_is_set(type, pe_order_pseudo_left)
- && pcmk_is_set(filter, pe_action_optional)) {
+ && pcmk_is_set(filter, pe_action_optional)
+ && !pcmk_is_set(first->flags, pe_action_runnable)) {
- if (!pcmk_is_set(first->flags, pe_action_runnable)) {
- clear_action_flag_because(then, pe_action_migrate_runnable, first);
- pe__clear_action_flags(then, pe_action_pseudo);
- }
+ clear_action_flag_because(then, pe_action_migrate_runnable, first);
+ pe__clear_action_flags(then, pe_action_pseudo);
}
if (pcmk_is_set(type, pe_order_runnable_left)
&& pcmk_is_set(filter, pe_action_runnable)
&& pcmk_is_set(then->flags, pe_action_runnable)
&& !pcmk_is_set(flags, pe_action_runnable)) {
clear_action_flag_because(then, pe_action_runnable, first);
clear_action_flag_because(then, pe_action_migrate_runnable, first);
}
if (pcmk_is_set(type, pe_order_implies_then)
&& pcmk_is_set(filter, pe_action_optional)
&& pcmk_is_set(then->flags, pe_action_optional)
&& !pcmk_is_set(flags, pe_action_optional)
&& !pcmk_is_set(first->flags, pe_action_migrate_runnable)) {
clear_action_flag_because(then, pe_action_optional, first);
}
if (pcmk_is_set(type, pe_order_restart)) {
handle_restart_ordering(first, then, filter);
}
if (then_flags != then->flags) {
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
pe_rsc_trace(then->rsc,
"%s on %s: flags are now %#.6x (was %#.6x) "
"because of 'first' %s (%#.6x)",
then->uuid,
then->node? then->node->details->uname : "no node",
then->flags, then_flags, first->uuid, first->flags);
- if(then->rsc && then->rsc->parent) {
- /* "X_stop then X_start" doesn't get handled for cloned groups unless we do this */
+ if ((then->rsc != NULL) && (then->rsc->parent != NULL)) {
+ // Required to handle "X_stop then X_start" for cloned groups
pcmk__update_action_for_orderings(then, data_set);
}
}
if (first_flags != first->flags) {
pcmk__set_updated_flags(changed, first, pcmk__updated_first);
pe_rsc_trace(first->rsc,
"%s on %s: flags are now %#.6x (was %#.6x) "
"because of 'then' %s (%#.6x)",
first->uuid,
first->node? first->node->details->uname : "no node",
first->flags, first_flags, then->uuid, then->flags);
}
return changed;
}
/*!
* \internal
* \brief Trace-log an action (optionally with its dependent actions)
*
* \param[in] pre_text If not NULL, prefix the log with this plus ": "
* \param[in] action Action to log
* \param[in] details If true, recursively log dependent actions
*/
void
pcmk__log_action(const char *pre_text, pe_action_t *action, bool details)
{
const char *node_uname = NULL;
const char *node_uuid = NULL;
const char *desc = NULL;
CRM_CHECK(action != NULL, return);
if (!pcmk_is_set(action->flags, pe_action_pseudo)) {
if (action->node != NULL) {
node_uname = action->node->details->uname;
node_uuid = action->node->details->id;
} else {
node_uname = "<none>";
}
}
switch (text2task(action->task)) {
case stonith_node:
case shutdown_crm:
if (pcmk_is_set(action->flags, pe_action_pseudo)) {
desc = "Pseudo ";
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
desc = "Optional ";
} else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
desc = "!!Non-Startable!! ";
} else if (pcmk_is_set(action->flags, pe_action_processed)) {
desc = "";
} else {
desc = "(Provisional) ";
}
crm_trace("%s%s%sAction %d: %s%s%s%s%s%s",
((pre_text == NULL)? "" : pre_text),
((pre_text == NULL)? "" : ": "),
desc, action->id, action->uuid,
(node_uname? "\ton " : ""), (node_uname? node_uname : ""),
(node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
(node_uuid? ")" : ""));
break;
default:
if (pcmk_is_set(action->flags, pe_action_optional)) {
desc = "Optional ";
} else if (pcmk_is_set(action->flags, pe_action_pseudo)) {
desc = "Pseudo ";
} else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
desc = "!!Non-Startable!! ";
} else if (pcmk_is_set(action->flags, pe_action_processed)) {
desc = "";
} else {
desc = "(Provisional) ";
}
crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s",
((pre_text == NULL)? "" : pre_text),
((pre_text == NULL)? "" : ": "),
desc, action->id, action->uuid,
(action->rsc? action->rsc->id : "<none>"),
(node_uname? "\ton " : ""), (node_uname? node_uname : ""),
(node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
(node_uuid? ")" : ""));
break;
}
if (details) {
GList *iter = NULL;
crm_trace("\t\t====== Preceding Actions");
for (iter = action->actions_before; iter != NULL; iter = iter->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) iter->data;
pcmk__log_action("\t\t", other->action, false);
}
crm_trace("\t\t====== Subsequent Actions");
for (iter = action->actions_after; iter != NULL; iter = iter->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) iter->data;
pcmk__log_action("\t\t", other->action, false);
}
crm_trace("\t\t====== End");
} else {
crm_trace("\t\t(before=%d, after=%d)",
g_list_length(action->actions_before),
g_list_length(action->actions_after));
}
}
/*!
* \internal
* \brief Create an executor cancel action
*
* \param[in] rsc Resource of action to cancel
* \param[in] task Name of action to cancel
* \param[in] interval_ms Interval of action to cancel
* \param[in] node Node of action to cancel
* \param[in] data_set Working set of cluster
*
* \return Created op
*/
pe_action_t *
pcmk__new_cancel_action(pe_resource_t *rsc, const char *task, guint interval_ms,
pe_node_t *node)
{
pe_action_t *cancel_op = NULL;
char *key = NULL;
char *interval_ms_s = NULL;
CRM_ASSERT((rsc != NULL) && (task != NULL) && (node != NULL));
// @TODO dangerous if possible to schedule another action with this key
key = pcmk__op_key(rsc->id, task, interval_ms);
cancel_op = custom_action(rsc, key, RSC_CANCEL, node, FALSE, TRUE,
rsc->cluster);
pcmk__str_update(&cancel_op->task, RSC_CANCEL);
pcmk__str_update(&cancel_op->cancel_task, task);
interval_ms_s = crm_strdup_printf("%u", interval_ms);
add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, task);
add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL_MS, interval_ms_s);
free(interval_ms_s);
return cancel_op;
}
/*!
* \internal
* \brief Create a new shutdown action for a node
*
* \param[in] node Node being shut down
*
* \return Newly created shutdown action for \p node
*/
pe_action_t *
pcmk__new_shutdown_action(pe_node_t *node)
{
char *shutdown_id = NULL;
pe_action_t *shutdown_op = NULL;
CRM_ASSERT(node != NULL);
shutdown_id = crm_strdup_printf("%s-%s", CRM_OP_SHUTDOWN,
node->details->uname);
shutdown_op = custom_action(NULL, shutdown_id, CRM_OP_SHUTDOWN, node, FALSE,
TRUE, node->details->data_set);
pcmk__order_stops_before_shutdown(node, shutdown_op);
add_hash_param(shutdown_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
return shutdown_op;
}
/*!
* \internal
* \brief Calculate and add an operation digest to XML
*
* Calculate an operation digest, which enables us to later determine when a
* restart is needed due to the resource's parameters being changed, and add it
* to given XML.
*
* \param[in] op Operation result from executor
* \param[in] update XML to add digest to
*/
static void
add_op_digest_to_xml(lrmd_event_data_t *op, xmlNode *update)
{
char *digest = NULL;
xmlNode *args_xml = NULL;
if (op->params == NULL) {
return;
}
args_xml = create_xml_node(NULL, XML_TAG_PARAMS);
g_hash_table_foreach(op->params, hash2field, args_xml);
pcmk__filter_op_for_digest(args_xml);
digest = calculate_operation_digest(args_xml, NULL);
crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest);
free_xml(args_xml);
free(digest);
}
#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
/*!
* \internal
* \brief Create XML for resource operation history update
*
* \param[in,out] parent Parent XML node to add to
* \param[in,out] op Operation event data
* \param[in] caller_version DC feature set
* \param[in] target_rc Expected result of operation
* \param[in] node Name of node on which operation was performed
* \param[in] origin Arbitrary description of update source
*
* \return Newly created XML node for history update
*/
xmlNode *
pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
const char *caller_version, int target_rc,
const char *node, const char *origin)
{
char *key = NULL;
char *magic = NULL;
char *op_id = NULL;
char *op_id_additional = NULL;
char *local_user_data = NULL;
const char *exit_reason = NULL;
xmlNode *xml_op = NULL;
const char *task = NULL;
CRM_CHECK(op != NULL, return NULL);
crm_trace("Creating history XML for %s-interval %s action for %s on %s "
"(DC version: %s, origin: %s)",
pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
((node == NULL)? "no node" : node), caller_version, origin);
task = op->op_type;
/* Record a successful agent reload as a start, and a failed one as a
* monitor, to make life easier for the scheduler when determining the
* current state.
*
* @COMPAT We should check "reload" here only if the operation was for a
* pre-OCF-1.1 resource agent, but we don't know that here, and we should
* only ever get results for actions scheduled by us, so we can reasonably
* assume any "reload" is actually a pre-1.1 agent reload.
*/
if (pcmk__str_any_of(task, CRMD_ACTION_RELOAD, CRMD_ACTION_RELOAD_AGENT,
NULL)) {
if (op->op_status == PCMK_EXEC_DONE) {
task = CRMD_ACTION_START;
} else {
task = CRMD_ACTION_STATUS;
}
}
key = pcmk__op_key(op->rsc_id, task, op->interval_ms);
if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_none)) {
const char *n_type = crm_meta_value(op->params, "notify_type");
const char *n_task = crm_meta_value(op->params, "notify_operation");
CRM_LOG_ASSERT(n_type != NULL);
CRM_LOG_ASSERT(n_task != NULL);
op_id = pcmk__notify_key(op->rsc_id, n_type, n_task);
if (op->op_status != PCMK_EXEC_PENDING) {
/* Ignore notify errors.
*
* @TODO It might be better to keep the correct result here, and
* ignore it in process_graph_event().
*/
lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
}
/* Migration history is preserved separately, which usually matters for
* multiple nodes and is important for future cluster transitions.
*/
} else if (pcmk__str_any_of(op->op_type, CRMD_ACTION_MIGRATE,
CRMD_ACTION_MIGRATED, NULL)) {
op_id = strdup(key);
} else if (did_rsc_op_fail(op, target_rc)) {
op_id = pcmk__op_key(op->rsc_id, "last_failure", 0);
if (op->interval_ms == 0) {
// Ensure 'last' gets updated, in case record-pending is true
op_id_additional = pcmk__op_key(op->rsc_id, "last", 0);
}
exit_reason = op->exit_reason;
} else if (op->interval_ms > 0) {
op_id = strdup(key);
} else {
op_id = pcmk__op_key(op->rsc_id, "last", 0);
}
again:
xml_op = pcmk__xe_match(parent, XML_LRM_TAG_RSC_OP, XML_ATTR_ID, op_id);
if (xml_op == NULL) {
xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP);
}
if (op->user_data == NULL) {
crm_debug("Generating fake transition key for: " PCMK__OP_FMT
" %d from %s", op->rsc_id, op->op_type, op->interval_ms,
op->call_id, origin);
local_user_data = pcmk__transition_key(-1, op->call_id, target_rc,
FAKE_TE_ID);
op->user_data = local_user_data;
}
if (magic == NULL) {
magic = crm_strdup_printf("%d:%d;%s", op->op_status, op->rc,
(const char *) op->user_data);
}
crm_xml_add(xml_op, XML_ATTR_ID, op_id);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task);
crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin);
crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic);
crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, exit_reason == NULL ? "" : exit_reason);
crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); /* For context during triage */
crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id);
crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc);
crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status);
crm_xml_add_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, op->interval_ms);
if (compare_version("2.1", caller_version) <= 0) {
if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) {
crm_trace("Timing data (" PCMK__OP_FMT
"): last=%u change=%u exec=%u queue=%u",
op->rsc_id, op->op_type, op->interval_ms,
op->t_run, op->t_rcchange, op->exec_time, op->queue_time);
if ((op->interval_ms != 0) && (op->t_rcchange != 0)) {
// Recurring ops may have changed rc after initial run
crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
(long long) op->t_rcchange);
} else {
crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
(long long) op->t_run);
}
crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time);
crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time);
}
}
if (pcmk__str_any_of(op->op_type, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
/*
* Record migrate_source and migrate_target always for migrate ops.
*/
const char *name = XML_LRM_ATTR_MIGRATE_SOURCE;
crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
name = XML_LRM_ATTR_MIGRATE_TARGET;
crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
}
add_op_digest_to_xml(op, xml_op);
if (op_id_additional) {
free(op_id);
op_id = op_id_additional;
op_id_additional = NULL;
goto again;
}
if (local_user_data) {
free(local_user_data);
op->user_data = NULL;
}
free(magic);
free(op_id);
free(key);
return xml_op;
}
/*!
* \internal
* \brief Check whether an action shutdown-locks a resource to a node
*
* If the shutdown-lock cluster property is set, resources will not be recovered
* on a different node if cleanly stopped, and may start only on that same node.
* This function checks whether that applies to a given action, so that the
* transition graph can be marked appropriately.
*
* \param[in] action Action to check
*
* \return true if \p action locks its resource to the action's node,
* otherwise false
*/
bool
pcmk__action_locks_rsc_to_node(const pe_action_t *action)
{
// Only resource actions taking place on resource's lock node are locked
if ((action == NULL) || (action->rsc == NULL)
|| (action->rsc->lock_node == NULL) || (action->node == NULL)
|| (action->node->details != action->rsc->lock_node->details)) {
return false;
}
/* During shutdown, only stops are locked (otherwise, another action such as
* a demote would cause the controller to clear the lock)
*/
if (action->node->details->shutdown && (action->task != NULL)
&& (strcmp(action->task, RSC_STOP) != 0)) {
return false;
}
return true;
}
/* lowest to highest */
static gint
sort_action_id(gconstpointer a, gconstpointer b)
{
const pe_action_wrapper_t *action_wrapper2 = (const pe_action_wrapper_t *)a;
const pe_action_wrapper_t *action_wrapper1 = (const pe_action_wrapper_t *)b;
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (action_wrapper1->action->id < action_wrapper2->action->id) {
return 1;
}
if (action_wrapper1->action->id > action_wrapper2->action->id) {
return -1;
}
return 0;
}
/*!
* \internal
* \brief Remove any duplicate action inputs, merging action flags
*
* \param[in] action Action whose inputs should be checked
*/
void
pcmk__deduplicate_action_inputs(pe_action_t *action)
{
GList *item = NULL;
GList *next = NULL;
pe_action_wrapper_t *last_input = NULL;
action->actions_before = g_list_sort(action->actions_before,
sort_action_id);
for (item = action->actions_before; item != NULL; item = next) {
pe_action_wrapper_t *input = (pe_action_wrapper_t *) item->data;
next = item->next;
if ((last_input != NULL)
&& (input->action->id == last_input->action->id)) {
crm_trace("Input %s (%d) duplicate skipped for action %s (%d)",
input->action->uuid, input->action->id,
action->uuid, action->id);
/* For the purposes of scheduling, the ordering flags no longer
* matter, but crm_simulate looks at certain ones when creating a
* dot graph. Combining the flags is sufficient for that purpose.
*/
last_input->type |= input->type;
if (input->state == pe_link_dumped) {
last_input->state = pe_link_dumped;
}
free(item->data);
action->actions_before = g_list_delete_link(action->actions_before,
item);
} else {
last_input = input;
input->state = pe_link_not_dumped;
}
}
}
/*!
* \internal
* \brief Output all scheduled actions
*
* \param[in] data_set Cluster working set
*/
void
pcmk__output_actions(pe_working_set_t *data_set)
{
pcmk__output_t *out = data_set->priv;
// Output node (non-resource) actions
for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) {
char *node_name = NULL;
char *task = NULL;
pe_action_t *action = (pe_action_t *) iter->data;
if (action->rsc != NULL) {
continue; // Resource actions will be output later
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
continue; // This action was not scheduled
}
if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
task = strdup("Shutdown");
} else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) {
const char *op = g_hash_table_lookup(action->meta, "stonith_action");
task = crm_strdup_printf("Fence (%s)", op);
} else {
continue; // Don't display other node action types
}
if (pe__is_guest_node(action->node)) {
node_name = crm_strdup_printf("%s (resource: %s)",
action->node->details->uname,
action->node->details->remote_rsc->container->id);
} else if (action->node != NULL) {
node_name = crm_strdup_printf("%s", action->node->details->uname);
}
out->message(out, "node-action", task, node_name, action->reason);
free(node_name);
free(task);
}
// Output resource actions
for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
rsc->cmds->output_actions(rsc);
}
}
/*!
* \internal
* \brief Schedule cancellation of a recurring action
*
* \param[in] rsc Resource that action is for
* \param[in] call_id Action's call ID from history
* \param[in] task Action name
* \param[in] interval_ms Action interval
* \param[in] node Node that history entry is for
* \param[in] reason Short description of why action is being cancelled
*/
static void
schedule_cancel(pe_resource_t *rsc, const char *call_id, const char *task,
guint interval_ms, pe_node_t *node, const char *reason)
{
pe_action_t *cancel = NULL;
CRM_CHECK((rsc != NULL) && (task != NULL)
&& (node != NULL) && (reason != NULL),
return);
crm_info("Recurring %s-interval %s for %s will be stopped on %s: %s",
pcmk__readable_interval(interval_ms), task, rsc->id,
pcmk__s(node->details->uname, "unknown node"), reason);
cancel = pcmk__new_cancel_action(rsc, task, interval_ms, node);
add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
// Cancellations happen after stops
pcmk__new_ordering(rsc, stop_key(rsc), NULL, rsc, NULL, cancel,
pe_order_optional, rsc->cluster);
}
/*!
* \internal
* \brief Check whether action from resource history is still in configuration
*
* \param[in] rsc Resource that action is for
* \param[in] task Action's name
* \param[in] interval_ms Action's interval (in milliseconds)
*
* \return true if action is still in resource configuration, otherwise false
*/
static bool
action_in_config(pe_resource_t *rsc, const char *task, guint interval_ms)
{
char *key = pcmk__op_key(rsc->id, task, interval_ms);
bool config = (find_rsc_op_entry(rsc, key) != NULL);
free(key);
return config;
}
/*!
* \internal
* \brief Get action name needed to compare digest for configuration changes
*
* \param[in] task Action name from history
* \param[in] interval_ms Action interval (in milliseconds)
*
* \return Action name whose digest should be compared
*/
static const char *
task_for_digest(const char *task, guint interval_ms)
{
/* Certain actions need to be compared against the parameters used to start
* the resource.
*/
if ((interval_ms == 0)
&& pcmk__str_any_of(task, RSC_STATUS, RSC_MIGRATED, RSC_PROMOTE, NULL)) {
task = RSC_START;
}
return task;
}
/*!
* \internal
* \brief Check whether only sanitized parameters to an action changed
*
* When collecting CIB files for troubleshooting, crm_report will mask
* sensitive resource parameters. If simulations were run using that, affected
* resources would appear to need a restart, which would complicate
* troubleshooting. To avoid that, we save a "secure digest" of non-sensitive
* parameters. This function used that digest to check whether only masked
* parameters are different.
*
* \param[in] xml_op Resource history entry with secure digest
* \param[in] digest_data Operation digest information being compared
* \param[in] data_set Cluster working set
*
* \return true if only sanitized parameters changed, otherwise false
*/
static bool
only_sanitized_changed(xmlNode *xml_op, const op_digest_cache_t *digest_data,
pe_working_set_t *data_set)
{
const char *digest_secure = NULL;
if (!pcmk_is_set(data_set->flags, pe_flag_sanitized)) {
// The scheduler is not being run as a simulation
return false;
}
digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
return (digest_data->rc != RSC_DIGEST_MATCH) && (digest_secure != NULL)
&& (digest_data->digest_secure_calc != NULL)
&& (strcmp(digest_data->digest_secure_calc, digest_secure) == 0);
}
/*!
* \internal
* \brief Force a restart due to a configuration change
*
* \param[in] rsc Resource that action is for
* \param[in] task Name of action whose configuration changed
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in] node Node where resource should be restarted
*/
static void
force_restart(pe_resource_t *rsc, const char *task, guint interval_ms,
pe_node_t *node)
{
char *key = pcmk__op_key(rsc->id, task, interval_ms);
pe_action_t *required = custom_action(rsc, key, task, NULL, FALSE, TRUE,
rsc->cluster);
pe_action_set_reason(required, "resource definition change", true);
trigger_unfencing(rsc, node, "Device parameters changed", NULL,
rsc->cluster);
}
/*!
* \internal
* \brief Reschedule a recurring action
*
* \param[in] rsc Resource that action is for
* \param[in] task Name of action being rescheduled
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in] node Node where action should be rescheduled
*/
static void
reschedule_recurring(pe_resource_t *rsc, const char *task, guint interval_ms,
pe_node_t *node)
{
pe_action_t *op = NULL;
trigger_unfencing(rsc, node, "Device parameters changed (reschedule)",
NULL, rsc->cluster);
op = custom_action(rsc, pcmk__op_key(rsc->id, task, interval_ms),
task, node, TRUE, TRUE, rsc->cluster);
pe__set_action_flags(op, pe_action_reschedule);
}
/*!
* \internal
* \brief Schedule a reload of a resource on a node
*
* \param[in] rsc Resource to reload
* \param[in] node Where resource should be reloaded
*/
static void
schedule_reload(pe_resource_t *rsc, pe_node_t *node)
{
pe_action_t *reload = NULL;
// For collective resources, just call recursively for children
if (rsc->variant > pe_native) {
g_list_foreach(rsc->children, (GFunc) schedule_reload, node);
return;
}
// Skip the reload in certain situations
if ((node == NULL)
|| !pcmk_is_set(rsc->flags, pe_rsc_managed)
|| pcmk_is_set(rsc->flags, pe_rsc_failed)) {
pe_rsc_trace(rsc, "Skip reload of %s:%s%s %s",
rsc->id,
pcmk_is_set(rsc->flags, pe_rsc_managed)? "" : " unmanaged",
pcmk_is_set(rsc->flags, pe_rsc_failed)? " failed" : "",
(node == NULL)? "inactive" : node->details->uname);
return;
}
/* If a resource's configuration changed while a start was pending,
* force a full restart instead of a reload.
*/
if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
pe_rsc_trace(rsc, "%s: preventing agent reload because start pending",
rsc->id);
custom_action(rsc, stop_key(rsc), CRMD_ACTION_STOP, node, FALSE, TRUE,
rsc->cluster);
return;
}
// Schedule the reload
pe__set_resource_flags(rsc, pe_rsc_reload);
reload = custom_action(rsc, reload_key(rsc), CRMD_ACTION_RELOAD_AGENT, node,
FALSE, TRUE, rsc->cluster);
pe_action_set_reason(reload, "resource definition change", FALSE);
// Set orderings so that a required stop or demote cancels the reload
pcmk__new_ordering(NULL, NULL, reload, rsc, stop_key(rsc), NULL,
pe_order_optional|pe_order_then_cancels_first,
rsc->cluster);
pcmk__new_ordering(NULL, NULL, reload, rsc, demote_key(rsc), NULL,
pe_order_optional|pe_order_then_cancels_first,
rsc->cluster);
}
/*!
* \internal
* \brief Handle any configuration change for an action
*
* Given an action from resource history, if the resource's configuration
* changed since the action was done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, etc.).
*
* \param[in] rsc Resource that action is for
* \param[in] node Node that action was on
* \param[in] xml_op Action XML from resource history
*
* \return true if action configuration changed, otherwise false
*/
bool
pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op)
{
guint interval_ms = 0;
const char *task = NULL;
const op_digest_cache_t *digest_data = NULL;
CRM_CHECK((rsc != NULL) && (node != NULL) && (xml_op != NULL),
return false);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
CRM_CHECK(task != NULL, return false);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
// If this is a recurring action, check whether it has been orphaned
if (interval_ms > 0) {
if (action_in_config(rsc, task, interval_ms)) {
pe_rsc_trace(rsc, "%s-interval %s for %s on %s is in configuration",
pcmk__readable_interval(interval_ms), task, rsc->id,
node->details->uname);
} else if (pcmk_is_set(rsc->cluster->flags,
pe_flag_stop_action_orphans)) {
schedule_cancel(rsc,
crm_element_value(xml_op, XML_LRM_ATTR_CALLID),
task, interval_ms, node, "orphan");
return true;
} else {
pe_rsc_debug(rsc, "%s-interval %s for %s on %s is orphaned",
pcmk__readable_interval(interval_ms), task, rsc->id,
node->details->uname);
return true;
}
}
crm_trace("Checking %s-interval %s for %s on %s for configuration changes",
pcmk__readable_interval(interval_ms), task, rsc->id,
node->details->uname);
task = task_for_digest(task, interval_ms);
digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->cluster);
if (only_sanitized_changed(xml_op, digest_data, rsc->cluster)) {
if (!pcmk__is_daemon && (rsc->cluster->priv != NULL)) {
pcmk__output_t *out = rsc->cluster->priv;
out->info(out,
"Only 'private' parameters to %s-interval %s for %s "
"on %s changed: %s",
pcmk__readable_interval(interval_ms), task, rsc->id,
node->details->uname,
crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
}
return false;
}
switch (digest_data->rc) {
case RSC_DIGEST_RESTART:
crm_log_xml_debug(digest_data->params_restart, "params:restart");
force_restart(rsc, task, interval_ms, node);
return true;
case RSC_DIGEST_ALL:
case RSC_DIGEST_UNKNOWN:
// Changes that can potentially be handled by an agent reload
if (interval_ms > 0) {
/* Recurring actions aren't reloaded per se, they are just
* re-scheduled so the next run uses the new parameters.
* The old instance will be cancelled automatically.
*/
crm_log_xml_debug(digest_data->params_all, "params:reschedule");
reschedule_recurring(rsc, task, interval_ms, node);
} else if (crm_element_value(xml_op,
XML_LRM_ATTR_RESTART_DIGEST) != NULL) {
// Agent supports reload, so use it
trigger_unfencing(rsc, node,
"Device parameters changed (reload)", NULL,
rsc->cluster);
crm_log_xml_debug(digest_data->params_all, "params:reload");
schedule_reload(rsc, node);
} else {
pe_rsc_trace(rsc,
"Restarting %s because agent doesn't support reload",
rsc->id);
crm_log_xml_debug(digest_data->params_restart,
"params:restart");
force_restart(rsc, task, interval_ms, node);
}
return true;
default:
break;
}
return false;
}
/*!
* \internal
* \brief Create a list of resource's action history entries, sorted by call ID
*
* \param[in] rsc Resource whose history should be checked
* \param[in] rsc_entry Resource's <lrm_rsc_op> status XML
* \param[out] start_index Where to store index of start-like action, if any
* \param[out] stop_index Where to store index of stop action, if any
*/
static GList *
rsc_history_as_list(pe_resource_t *rsc, xmlNode *rsc_entry,
int *start_index, int *stop_index)
{
GList *ops = NULL;
for (xmlNode *rsc_op = first_named_child(rsc_entry, XML_LRM_TAG_RSC_OP);
rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) {
ops = g_list_prepend(ops, rsc_op);
}
ops = g_list_sort(ops, sort_op_by_callid);
calculate_active_ops(ops, start_index, stop_index);
return ops;
}
/*!
* \internal
* \brief Process a resource's action history from the CIB status
*
* Given a resource's action history, if the resource's configuration
* changed since the actions were done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, clean-up, etc.).
* (This also cancels recurring actions for maintenance mode, which is not
* entirely related but convenient to do here.)
*
* \param[in] rsc_entry Resource's <lrm_rsc_op> status XML
* \param[in] rsc Resource whose history is being processed
* \param[in] node Node whose history is being processed
*/
static void
process_rsc_history(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node)
{
int offset = -1;
int stop_index = 0;
int start_index = 0;
GList *sorted_op_list = NULL;
if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
if (pe_rsc_is_anon_clone(uber_parent(rsc))) {
pe_rsc_trace(rsc,
"Skipping configuration check "
"for orphaned clone instance %s",
rsc->id);
} else {
pe_rsc_trace(rsc,
"Skipping configuration check and scheduling clean-up "
"for orphaned resource %s", rsc->id);
DeleteRsc(rsc, node, FALSE, rsc->cluster);
}
return;
}
if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, false)) {
DeleteRsc(rsc, node, FALSE, rsc->cluster);
}
pe_rsc_trace(rsc,
"Skipping configuration check for %s "
"because no longer active on %s",
rsc->id, node->details->uname);
return;
}
pe_rsc_trace(rsc, "Checking for configuration changes for %s on %s",
rsc->id, node->details->uname);
if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, true)) {
DeleteRsc(rsc, node, FALSE, rsc->cluster);
}
sorted_op_list = rsc_history_as_list(rsc, rsc_entry, &start_index,
&stop_index);
if (start_index < stop_index) {
return; // Resource is stopped
}
for (GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
xmlNode *rsc_op = (xmlNode *) iter->data;
const char *task = NULL;
guint interval_ms = 0;
if (++offset < start_index) {
// Skip actions that happened before a start
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if ((interval_ms > 0)
&& (pcmk_is_set(rsc->flags, pe_rsc_maintenance)
|| node->details->maintenance)) {
// Maintenance mode cancels recurring operations
schedule_cancel(rsc,
crm_element_value(rsc_op, XML_LRM_ATTR_CALLID),
task, interval_ms, node, "maintenance mode");
} else if ((interval_ms > 0)
|| pcmk__strcase_any_of(task, RSC_STATUS, RSC_START,
RSC_PROMOTE, RSC_MIGRATED, NULL)) {
/* If a resource operation failed, and the operation's definition
* has changed, clear any fail count so they can be retried fresh.
*/
if (pe__bundle_needs_remote_name(rsc, rsc->cluster)) {
/* We haven't allocated resources to nodes yet, so if the
* REMOTE_CONTAINER_HACK is used, we may calculate the digest
* based on the literal "#uname" value rather than the properly
* substituted value. That would mistakenly make the action
* definition appear to have been changed. Defer the check until
* later in this case.
*/
pe__add_param_check(rsc_op, rsc, node, pe_check_active,
rsc->cluster);
} else if (pcmk__check_action_config(rsc, node, rsc_op)
&& (pe_get_failcount(node, rsc, NULL, pe_fc_effective,
NULL, rsc->cluster) != 0)) {
pe__clear_failcount(rsc, node, "action definition changed",
rsc->cluster);
}
}
}
g_list_free(sorted_op_list);
}
/*!
* \internal
* \brief Process a node's action history from the CIB status
*
* Given a node's resource history, if the resource's configuration changed
* since the actions were done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, clean-up, etc.).
* (This also cancels recurring actions for maintenance mode, which is not
* entirely related but convenient to do here.)
*
* \param[in] node Node whose history is being processed
* \param[in] lrm_rscs Node's <lrm_resources> from CIB status XML
* \param[in] data_set Cluster working set
*/
static void
process_node_history(pe_node_t *node, xmlNode *lrm_rscs, pe_working_set_t *data_set)
{
crm_trace("Processing history for node %s", node->details->uname);
for (xmlNode *rsc_entry = first_named_child(lrm_rscs, XML_LRM_TAG_RESOURCE);
rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) {
if (xml_has_children(rsc_entry)) {
GList *result = pcmk__rscs_matching_id(ID(rsc_entry), data_set);
for (GList *iter = result; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
if (rsc->variant == pe_native) {
process_rsc_history(rsc_entry, rsc, node);
}
}
g_list_free(result);
}
}
}
// XPath to find a node's resource history
#define XPATH_NODE_HISTORY "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
"/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
"/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES
/*!
* \internal
* \brief Process any resource configuration changes in the CIB status
*
* Go through all nodes' resource history, and if a resource's configuration
* changed since its actions were done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, clean-up, etc.).
* (This also cancels recurring actions for maintenance mode, which is not
* entirely related but convenient to do here.)
*
* \param[in] data_set Cluster working set
*/
void
pcmk__handle_rsc_config_changes(pe_working_set_t *data_set)
{
crm_trace("Check resource and action configuration for changes");
/* Rather than iterate through the status section, iterate through the nodes
* and search for the appropriate status subsection for each. This skips
* orphaned nodes and lets us eliminate some cases before searching the XML.
*/
for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
/* Don't bother checking actions for a node that can't run actions ...
* unless it's in maintenance mode, in which case we still need to
* cancel any existing recurring monitors.
*/
if (node->details->maintenance
|| pcmk__node_available(node, false, false)) {
char *xpath = NULL;
xmlNode *history = NULL;
xpath = crm_strdup_printf(XPATH_NODE_HISTORY, node->details->uname);
history = get_xpath_object(xpath, data_set->input, LOG_NEVER);
free(xpath);
process_node_history(node, history, data_set);
}
}
}
diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c
index 88bae54f94..abd8346d36 100644
--- a/lib/pacemaker/pcmk_sched_bundle.c
+++ b/lib/pacemaker/pcmk_sched_bundle.c
@@ -1,1136 +1,1136 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
#define PE__VARIANT_BUNDLE 1
#include <lib/pengine/variant.h>
static bool
is_bundle_node(pe__bundle_variant_data_t *data, pe_node_t *node)
{
for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (node->details == replica->node->details) {
return TRUE;
}
}
return FALSE;
}
void distribute_children(pe_resource_t *rsc, GList *children, GList *nodes,
int max, int per_host_max, pe_working_set_t * data_set);
static GList *
get_container_list(pe_resource_t *rsc)
{
GList *containers = NULL;
if (rsc->variant == pe_container) {
pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, rsc);
for (GList *gIter = data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
containers = g_list_append(containers, replica->container);
}
}
return containers;
}
static inline GList *
get_containers_or_children(pe_resource_t *rsc)
{
return (rsc->variant == pe_container)?
get_container_list(rsc) : rsc->children;
}
pe_node_t *
pcmk__bundle_allocate(pe_resource_t *rsc, pe_node_t *prefer)
{
GList *containers = NULL;
GList *nodes = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return NULL);
get_bundle_variant_data(bundle_data, rsc);
pe__set_resource_flags(rsc, pe_rsc_allocating);
containers = get_container_list(rsc);
pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
nodes = g_hash_table_get_values(rsc->allowed_nodes);
nodes = pcmk__sort_nodes(nodes, NULL);
containers = g_list_sort(containers, pcmk__cmp_instance);
distribute_children(rsc, containers, nodes, bundle_data->nreplicas,
bundle_data->nreplicas_per_host, rsc->cluster);
g_list_free(nodes);
g_list_free(containers);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
pe_node_t *container_host = NULL;
CRM_ASSERT(replica);
if (replica->ip) {
pe_rsc_trace(rsc, "Allocating bundle %s IP %s",
rsc->id, replica->ip->id);
replica->ip->cmds->allocate(replica->ip, prefer);
}
container_host = replica->container->allocated_to;
if (replica->remote && pe__is_guest_or_remote_node(container_host)) {
/* We need 'nested' connection resources to be on the same
* host because pacemaker-remoted only supports a single
* active connection
*/
pcmk__new_colocation("child-remote-with-docker-remote", NULL,
INFINITY, replica->remote,
container_host->details->remote_rsc, NULL,
NULL, true, rsc->cluster);
}
if (replica->remote) {
pe_rsc_trace(rsc, "Allocating bundle %s connection %s",
rsc->id, replica->remote->id);
replica->remote->cmds->allocate(replica->remote, prefer);
}
// Explicitly allocate replicas' children before bundle child
if (replica->child) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, replica->child->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
if (node->details != replica->node->details) {
node->weight = -INFINITY;
} else if (!pcmk__threshold_reached(replica->child, node,
NULL)) {
node->weight = INFINITY;
}
}
pe__set_resource_flags(replica->child->parent, pe_rsc_allocating);
pe_rsc_trace(rsc, "Allocating bundle %s replica child %s",
rsc->id, replica->child->id);
replica->child->cmds->allocate(replica->child, replica->node);
pe__clear_resource_flags(replica->child->parent,
pe_rsc_allocating);
}
}
if (bundle_data->child) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, bundle_data->child->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
if (is_bundle_node(bundle_data, node)) {
node->weight = 0;
} else {
node->weight = -INFINITY;
}
}
pe_rsc_trace(rsc, "Allocating bundle %s child %s",
rsc->id, bundle_data->child->id);
bundle_data->child->cmds->allocate(bundle_data->child, prefer);
}
pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional);
return NULL;
}
void
pcmk__bundle_create_actions(pe_resource_t *rsc)
{
pe_action_t *action = NULL;
GList *containers = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
containers = get_container_list(rsc);
get_bundle_variant_data(bundle_data, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (replica->ip) {
replica->ip->cmds->create_actions(replica->ip);
}
if (replica->container) {
replica->container->cmds->create_actions(replica->container);
}
if (replica->remote) {
replica->remote->cmds->create_actions(replica->remote);
}
}
clone_create_pseudo_actions(rsc, containers, NULL, NULL);
if (bundle_data->child) {
bundle_data->child->cmds->create_actions(bundle_data->child);
if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) {
/* promote */
pe__new_rsc_pseudo_action(rsc, RSC_PROMOTE, true, true);
action = pe__new_rsc_pseudo_action(rsc, RSC_PROMOTED, true, true);
action->priority = INFINITY;
/* demote */
pe__new_rsc_pseudo_action(rsc, RSC_DEMOTE, true, true);
action = pe__new_rsc_pseudo_action(rsc, RSC_DEMOTED, true, true);
action->priority = INFINITY;
}
}
g_list_free(containers);
}
void
pcmk__bundle_internal_constraints(pe_resource_t *rsc)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_bundle_variant_data(bundle_data, rsc);
if (bundle_data->child) {
pcmk__order_resource_actions(rsc, RSC_START, bundle_data->child,
RSC_START, pe_order_implies_first_printed);
pcmk__order_resource_actions(rsc, RSC_STOP, bundle_data->child,
RSC_STOP, pe_order_implies_first_printed);
if (bundle_data->child->children) {
pcmk__order_resource_actions(bundle_data->child, RSC_STARTED, rsc,
RSC_STARTED,
pe_order_implies_then_printed);
pcmk__order_resource_actions(bundle_data->child, RSC_STOPPED, rsc,
RSC_STOPPED,
pe_order_implies_then_printed);
} else {
pcmk__order_resource_actions(bundle_data->child, RSC_START, rsc,
RSC_STARTED,
pe_order_implies_then_printed);
pcmk__order_resource_actions(bundle_data->child, RSC_STOP, rsc,
RSC_STOPPED,
pe_order_implies_then_printed);
}
}
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
CRM_ASSERT(replica->container);
replica->container->cmds->internal_constraints(replica->container);
pcmk__order_starts(rsc, replica->container,
pe_order_runnable_left|pe_order_implies_first_printed);
if (replica->child) {
pcmk__order_stops(rsc, replica->child,
pe_order_implies_first_printed);
}
pcmk__order_stops(rsc, replica->container,
pe_order_implies_first_printed);
pcmk__order_resource_actions(replica->container, RSC_START, rsc,
RSC_STARTED,
pe_order_implies_then_printed);
pcmk__order_resource_actions(replica->container, RSC_STOP, rsc,
RSC_STOPPED,
pe_order_implies_then_printed);
if (replica->ip) {
replica->ip->cmds->internal_constraints(replica->ip);
// Start IP then container
pcmk__order_starts(replica->ip, replica->container,
pe_order_runnable_left|pe_order_preserve);
pcmk__order_stops(replica->container, replica->ip,
pe_order_implies_first|pe_order_preserve);
pcmk__new_colocation("ip-with-docker", NULL, INFINITY, replica->ip,
replica->container, NULL, NULL, true,
rsc->cluster);
}
if (replica->remote) {
/* This handles ordering and colocating remote relative to container
* (via "resource-with-container"). Since IP is also ordered and
* colocated relative to the container, we don't need to do anything
* explicit here with IP.
*/
replica->remote->cmds->internal_constraints(replica->remote);
}
if (replica->child) {
CRM_ASSERT(replica->remote);
// "Start remote then child" is implicit in scheduler's remote logic
}
}
if (bundle_data->child) {
bundle_data->child->cmds->internal_constraints(bundle_data->child);
if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) {
pcmk__promotable_restart_ordering(rsc);
/* child demoted before global demoted */
pcmk__order_resource_actions(bundle_data->child, RSC_DEMOTED, rsc,
RSC_DEMOTED,
pe_order_implies_then_printed);
/* global demote before child demote */
pcmk__order_resource_actions(rsc, RSC_DEMOTE, bundle_data->child,
RSC_DEMOTE,
pe_order_implies_first_printed);
/* child promoted before global promoted */
pcmk__order_resource_actions(bundle_data->child, RSC_PROMOTED, rsc,
RSC_PROMOTED,
pe_order_implies_then_printed);
/* global promote before child promote */
pcmk__order_resource_actions(rsc, RSC_PROMOTE, bundle_data->child,
RSC_PROMOTE,
pe_order_implies_first_printed);
}
}
}
static pe_resource_t *
compatible_replica_for_node(pe_resource_t *rsc_lh, pe_node_t *candidate,
pe_resource_t *rsc, enum rsc_role_e filter,
gboolean current)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(candidate != NULL, return NULL);
get_bundle_variant_data(bundle_data, rsc);
crm_trace("Looking for compatible child from %s for %s on %s",
rsc_lh->id, rsc->id, candidate->details->uname);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (is_child_compatible(replica->container, candidate, filter, current)) {
crm_trace("Pairing %s with %s on %s",
rsc_lh->id, replica->container->id,
candidate->details->uname);
return replica->container;
}
}
crm_trace("Can't pair %s with %s", rsc_lh->id, rsc->id);
return NULL;
}
static pe_resource_t *
compatible_replica(pe_resource_t *rsc_lh, pe_resource_t *rsc,
enum rsc_role_e filter, gboolean current,
pe_working_set_t *data_set)
{
GList *scratch = NULL;
pe_resource_t *pair = NULL;
pe_node_t *active_node_lh = NULL;
active_node_lh = rsc_lh->fns->location(rsc_lh, NULL, current);
if (active_node_lh) {
return compatible_replica_for_node(rsc_lh, active_node_lh, rsc, filter,
current);
}
scratch = g_hash_table_get_values(rsc_lh->allowed_nodes);
scratch = pcmk__sort_nodes(scratch, NULL);
for (GList *gIter = scratch; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
pair = compatible_replica_for_node(rsc_lh, node, rsc, filter, current);
if (pair) {
goto done;
}
}
pe_rsc_debug(rsc, "Can't pair %s with %s", rsc_lh->id, (rsc? rsc->id : "none"));
done:
g_list_free(scratch);
return pair;
}
int copies_per_node(pe_resource_t * rsc)
{
/* Strictly speaking, there should be a 'copies_per_node' addition
* to the resource function table and each case would be a
* function. However that would be serious overkill to return an
* int. In fact, it seems to me that both function tables
* could/should be replaced by resources.{c,h} full of
* rsc_{some_operation} functions containing a switch as below
* which calls out to functions named {variant}_{some_operation}
* as needed.
*/
switch(rsc->variant) {
case pe_unknown:
return 0;
case pe_native:
case pe_group:
return 1;
case pe_clone:
{
const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX);
if (max_clones_node == NULL) {
return 1;
} else {
int max_i;
pcmk__scan_min_int(max_clones_node, &max_i, 0);
return max_i;
}
}
case pe_container:
{
pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, rsc);
return data->nreplicas_per_host;
}
}
return 0;
}
/*!
* \internal
* \brief Apply a colocation's score to node weights or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node weights (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__bundle_apply_coloc_score(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent)
{
GList *allocated_primaries = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
/* This should never be called for the bundle itself as a dependent.
* Instead, we add its colocation constraints to its replicas and call the
* apply_coloc_score() for the replicas as dependents.
*/
CRM_ASSERT(!for_dependent);
CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL),
return);
CRM_ASSERT(dependent->variant == pe_native);
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
pe_rsc_trace(primary, "%s is still provisional", primary->id);
return;
} else if (colocation->dependent->variant > pe_group) {
pe_resource_t *primary_replica = compatible_replica(dependent, primary,
RSC_ROLE_UNKNOWN,
FALSE,
dependent->cluster);
if (primary_replica) {
pe_rsc_debug(primary, "Pairing %s with %s",
dependent->id, primary_replica->id);
dependent->cmds->apply_coloc_score(dependent, primary_replica,
colocation, true);
} else if (colocation->score >= INFINITY) {
crm_notice("Cannot pair %s with instance of %s",
dependent->id, primary->id);
pcmk__assign_resource(dependent, NULL, true);
} else {
pe_rsc_debug(primary, "Cannot pair %s with instance of %s",
dependent->id, primary->id);
}
return;
}
get_bundle_variant_data(bundle_data, primary);
pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d",
colocation->id, dependent->id, primary->id, colocation->score);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (colocation->score < INFINITY) {
replica->container->cmds->apply_coloc_score(dependent,
replica->container,
colocation, false);
} else {
pe_node_t *chosen = replica->container->fns->location(replica->container,
NULL, FALSE);
if ((chosen == NULL)
|| is_set_recursive(replica->container, pe_rsc_block, TRUE)) {
continue;
}
if ((colocation->primary_role >= RSC_ROLE_PROMOTED)
&& (replica->child == NULL)) {
continue;
}
if ((colocation->primary_role >= RSC_ROLE_PROMOTED)
&& (replica->child->next_role < RSC_ROLE_PROMOTED)) {
continue;
}
pe_rsc_trace(primary, "Allowing %s: %s %d",
colocation->id, chosen->details->uname,
chosen->weight);
allocated_primaries = g_list_prepend(allocated_primaries, chosen);
}
}
if (colocation->score >= INFINITY) {
node_list_exclude(dependent->allowed_nodes, allocated_primaries, FALSE);
}
g_list_free(allocated_primaries);
}
enum pe_action_flags
pcmk__bundle_action_flags(pe_action_t *action, pe_node_t *node)
{
GList *containers = NULL;
enum pe_action_flags flags = 0;
pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, action->rsc);
if(data->child) {
enum action_tasks task = get_complex_task(data->child, action->task, TRUE);
switch(task) {
case no_action:
case action_notify:
case action_notified:
case action_promote:
case action_promoted:
case action_demote:
case action_demoted:
return summary_action_flags(action, data->child->children, node);
default:
break;
}
}
containers = get_container_list(action->rsc);
flags = summary_action_flags(action, containers, node);
g_list_free(containers);
return flags;
}
pe_resource_t *
find_compatible_child_by_node(pe_resource_t * local_child, pe_node_t * local_node, pe_resource_t * rsc,
enum rsc_role_e filter, gboolean current)
{
GList *gIter = NULL;
GList *children = NULL;
if (local_node == NULL) {
crm_err("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id);
return NULL;
}
crm_trace("Looking for compatible child from %s for %s on %s",
local_child->id, rsc->id, local_node->details->uname);
children = get_containers_or_children(rsc);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if(is_child_compatible(child_rsc, local_node, filter, current)) {
crm_trace("Pairing %s with %s on %s",
local_child->id, child_rsc->id, local_node->details->uname);
return child_rsc;
}
}
crm_trace("Can't pair %s with %s", local_child->id, rsc->id);
if(children != rsc->children) {
g_list_free(children);
}
return NULL;
}
static pe__bundle_replica_t *
replica_for_container(pe_resource_t *rsc, pe_resource_t *container,
pe_node_t *node)
{
if (rsc->variant == pe_container) {
pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, rsc);
for (GList *gIter = data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (replica->child
&& (container == replica->container)
&& (node->details == replica->node->details)) {
return replica;
}
}
}
return NULL;
}
static uint32_t
multi_update_interleave_actions(pe_action_t *first, pe_action_t *then,
pe_node_t *node, uint32_t filter, uint32_t type,
pe_working_set_t *data_set)
{
GList *gIter = NULL;
GList *children = NULL;
gboolean current = FALSE;
uint32_t changed = pcmk__updated_none;
/* Fix this - lazy */
if (pcmk__ends_with(first->uuid, "_stopped_0")
|| pcmk__ends_with(first->uuid, "_demoted_0")) {
current = TRUE;
}
children = get_containers_or_children(then->rsc);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *then_child = gIter->data;
pe_resource_t *first_child = find_compatible_child(then_child,
first->rsc,
RSC_ROLE_UNKNOWN,
current);
if (first_child == NULL && current) {
crm_trace("Ignore");
} else if (first_child == NULL) {
crm_debug("No match found for %s (%d / %s / %s)", then_child->id, current, first->uuid, then->uuid);
/* Me no like this hack - but what else can we do?
*
* If there is no-one active or about to be active
* on the same node as then_child, then they must
* not be allowed to start
*/
if (pcmk_any_flags_set(type, pe_order_runnable_left|pe_order_implies_then) /* Mandatory */ ) {
pe_rsc_info(then->rsc, "Inhibiting %s from being active", then_child->id);
if (pcmk__assign_resource(then_child, NULL, true)) {
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
}
} else {
pe_action_t *first_action = NULL;
pe_action_t *then_action = NULL;
enum action_tasks task = clone_child_action(first);
const char *first_task = task2text(task);
pe__bundle_replica_t *first_replica = NULL;
pe__bundle_replica_t *then_replica = NULL;
first_replica = replica_for_container(first->rsc, first_child,
node);
if (strstr(first->task, "stop") && first_replica && first_replica->child) {
/* Except for 'stopped' we should be looking at the
* in-container resource, actions for the child will
* happen later and are therefor more likely to align
* with the user's intent.
*/
first_action = find_first_action(first_replica->child->actions,
NULL, task2text(task), node);
} else {
first_action = find_first_action(first_child->actions, NULL, task2text(task), node);
}
then_replica = replica_for_container(then->rsc, then_child, node);
if (strstr(then->task, "mote")
&& then_replica && then_replica->child) {
/* Promote/demote actions will never be found for the
* container resource, look in the child instead
*
* Alternatively treat:
* 'XXXX then promote YYYY' as 'XXXX then start container for YYYY', and
* 'demote XXXX then stop YYYY' as 'stop container for XXXX then stop YYYY'
*/
then_action = find_first_action(then_replica->child->actions,
NULL, then->task, node);
} else {
then_action = find_first_action(then_child->actions, NULL, then->task, node);
}
if (first_action == NULL) {
if (!pcmk_is_set(first_child->flags, pe_rsc_orphan)
&& !pcmk__str_any_of(first_task, RSC_STOP, RSC_DEMOTE, NULL)) {
crm_err("Internal error: No action found for %s in %s (first)",
first_task, first_child->id);
} else {
crm_trace("No action found for %s in %s%s (first)",
first_task, first_child->id,
pcmk_is_set(first_child->flags, pe_rsc_orphan)? " (ORPHAN)" : "");
}
continue;
}
/* We're only interested if 'then' is neither stopping nor being demoted */
if (then_action == NULL) {
if (!pcmk_is_set(then_child->flags, pe_rsc_orphan)
&& !pcmk__str_any_of(then->task, RSC_STOP, RSC_DEMOTE, NULL)) {
crm_err("Internal error: No action found for %s in %s (then)",
then->task, then_child->id);
} else {
crm_trace("No action found for %s in %s%s (then)",
then->task, then_child->id,
pcmk_is_set(then_child->flags, pe_rsc_orphan)? " (ORPHAN)" : "");
}
continue;
}
if (order_actions(first_action, then_action, type)) {
crm_debug("Created constraint for %s (%d) -> %s (%d) %.6x",
first_action->uuid,
pcmk_is_set(first_action->flags, pe_action_optional),
then_action->uuid,
pcmk_is_set(then_action->flags, pe_action_optional),
type);
pcmk__set_updated_flags(changed, first,
pcmk__updated_first|pcmk__updated_then);
}
if(first_action && then_action) {
changed |= then_child->cmds->update_ordered_actions(first_action,
then_action,
node,
first_child->cmds->action_flags(first_action, node),
filter,
type,
data_set);
} else {
crm_err("Nothing found either for %s (%p) or %s (%p) %s",
first_child->id, first_action,
then_child->id, then_action, task2text(task));
}
}
}
if(children != then->rsc->children) {
g_list_free(children);
}
return changed;
}
static bool
can_interleave_actions(pe_action_t *first, pe_action_t *then)
{
bool interleave = FALSE;
pe_resource_t *rsc = NULL;
const char *interleave_s = NULL;
if(first->rsc == NULL || then->rsc == NULL) {
crm_trace("Not interleaving %s with %s (both must be resources)", first->uuid, then->uuid);
return FALSE;
} else if(first->rsc == then->rsc) {
crm_trace("Not interleaving %s with %s (must belong to different resources)", first->uuid, then->uuid);
return FALSE;
} else if(first->rsc->variant < pe_clone || then->rsc->variant < pe_clone) {
crm_trace("Not interleaving %s with %s (both sides must be clones or bundles)", first->uuid, then->uuid);
return FALSE;
}
if (pcmk__ends_with(then->uuid, "_stop_0")
|| pcmk__ends_with(then->uuid, "_demote_0")) {
rsc = first->rsc;
} else {
rsc = then->rsc;
}
interleave_s = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE);
interleave = crm_is_true(interleave_s);
crm_trace("Interleave %s -> %s: %s (based on %s)",
first->uuid, then->uuid, interleave ? "yes" : "no", rsc->id);
return interleave;
}
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two actions, update the actions'
* flags (and runnable_before members if appropriate) as appropriate for the
* ordering. In some cases, the ordering could be disabled as well.
*
* \param[in] first 'First' action in an ordering
* \param[in] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t
pcmk__multi_update_actions(pe_action_t *first, pe_action_t *then,
pe_node_t *node, uint32_t flags, uint32_t filter,
uint32_t type, pe_working_set_t *data_set)
{
uint32_t changed = pcmk__updated_none;
crm_trace("%s -> %s", first->uuid, then->uuid);
if(can_interleave_actions(first, then)) {
changed = multi_update_interleave_actions(first, then, node, filter,
type, data_set);
} else if(then->rsc) {
GList *gIter = NULL;
GList *children = NULL;
// Handle the 'primitive' ordering case
- changed |= native_update_actions(first, then, node, flags, filter,
- type, data_set);
+ changed |= pcmk__update_ordered_actions(first, then, node, flags,
+ filter, type, data_set);
// Now any children (or containers in the case of a bundle)
children = get_containers_or_children(then->rsc);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *then_child = (pe_resource_t *) gIter->data;
uint32_t then_child_changed = pcmk__updated_none;
pe_action_t *then_child_action = find_first_action(then_child->actions, NULL, then->task, node);
if (then_child_action) {
uint32_t then_child_flags = then_child->cmds->action_flags(then_child_action,
node);
if (pcmk_is_set(then_child_flags, pe_action_runnable)) {
then_child_changed |= then_child->cmds->update_ordered_actions(first,
then_child_action,
node,
flags,
filter,
type,
data_set);
}
changed |= then_child_changed;
if (pcmk_is_set(then_child_changed, pcmk__updated_then)) {
for (GList *lpc = then_child_action->actions_after; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *next = (pe_action_wrapper_t *) lpc->data;
pcmk__update_action_for_orderings(next->action,
data_set);
}
}
}
}
if(children != then->rsc->children) {
g_list_free(children);
}
}
return changed;
}
void
pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
{
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, rsc);
pcmk__apply_location(constraint, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (replica->container) {
replica->container->cmds->rsc_location(replica->container,
constraint);
}
if (replica->ip) {
replica->ip->cmds->rsc_location(replica->ip, constraint);
}
}
if (bundle_data->child
&& ((constraint->role_filter == RSC_ROLE_UNPROMOTED)
|| (constraint->role_filter == RSC_ROLE_PROMOTED))) {
bundle_data->child->cmds->rsc_location(bundle_data->child, constraint);
bundle_data->child->rsc_location = g_list_prepend(bundle_data->child->rsc_location,
constraint);
}
}
void
pcmk__bundle_expand(pe_resource_t *rsc)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_bundle_variant_data(bundle_data, rsc);
if (bundle_data->child) {
bundle_data->child->cmds->expand(bundle_data->child);
}
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (replica->remote && replica->container
&& pe__bundle_needs_remote_name(replica->remote, rsc->cluster)) {
/* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that
* run pacemaker-remoted inside, without needing a separate IP for
* the container. This is done by configuring the inner remote's
* connection host as the magic string "#uname", then
* replacing it with the underlying host when needed.
*/
xmlNode *nvpair = get_xpath_object("//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']",
replica->remote->xml, LOG_ERR);
const char *calculated_addr = NULL;
// Replace the value in replica->remote->xml (if appropriate)
calculated_addr = pe__add_bundle_remote_name(replica->remote,
rsc->cluster,
nvpair, "value");
if (calculated_addr) {
/* Since this is for the bundle as a resource, and not any
* particular action, replace the value in the default
* parameters (not evaluated for node). create_graph_action()
* will grab it from there to replace it in node-evaluated
* parameters.
*/
GHashTable *params = pe_rsc_params(replica->remote,
NULL, rsc->cluster);
g_hash_table_replace(params,
strdup(XML_RSC_ATTR_REMOTE_RA_ADDR),
strdup(calculated_addr));
} else {
/* The only way to get here is if the remote connection is
* neither currently running nor scheduled to run. That means we
* won't be doing any operations that require addr (only start
* requires it; we additionally use it to compare digests when
* unpacking status, promote, and migrate_from history, but
* that's already happened by this point).
*/
crm_info("Unable to determine address for bundle %s remote connection",
rsc->id);
}
}
if (replica->ip) {
replica->ip->cmds->expand(replica->ip);
}
if (replica->container) {
replica->container->cmds->expand(replica->container);
}
if (replica->remote) {
replica->remote->cmds->expand(replica->remote);
}
}
}
gboolean
pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *complete, gboolean force)
{
bool any_created = FALSE;
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
get_bundle_variant_data(bundle_data, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (replica->ip) {
any_created |= replica->ip->cmds->create_probe(replica->ip, node,
complete, force);
}
if (replica->child && (node->details == replica->node->details)) {
any_created |= replica->child->cmds->create_probe(replica->child,
node, complete,
force);
}
if (replica->container) {
bool created = replica->container->cmds->create_probe(replica->container,
node, complete,
force);
if(created) {
any_created = TRUE;
/* If we're limited to one replica per host (due to
* the lack of an IP range probably), then we don't
* want any of our peer containers starting until
* we've established that no other copies are already
* running.
*
* Partly this is to ensure that nreplicas_per_host is
* observed, but also to ensure that the containers
* don't fail to start because the necessary port
* mappings (which won't include an IP for uniqueness)
* are already taken
*/
for (GList *tIter = bundle_data->replicas;
tIter && (bundle_data->nreplicas_per_host == 1);
tIter = tIter->next) {
pe__bundle_replica_t *other = tIter->data;
if ((other != replica) && (other != NULL)
&& (other->container != NULL)) {
pcmk__new_ordering(replica->container,
pcmk__op_key(replica->container->id, RSC_STATUS, 0),
NULL, other->container,
pcmk__op_key(other->container->id, RSC_START, 0),
NULL,
pe_order_optional|pe_order_same_node,
rsc->cluster);
}
}
}
}
if (replica->container && replica->remote
&& replica->remote->cmds->create_probe(replica->remote, node,
complete, force)) {
/* Do not probe the remote resource until we know where the
* container is running. This is required for REMOTE_CONTAINER_HACK
* to correctly probe remote resources.
*/
char *probe_uuid = pcmk__op_key(replica->remote->id, RSC_STATUS,
0);
pe_action_t *probe = find_first_action(replica->remote->actions,
probe_uuid, NULL, node);
free(probe_uuid);
if (probe) {
any_created = TRUE;
crm_trace("Ordering %s probe on %s",
replica->remote->id, node->details->uname);
pcmk__new_ordering(replica->container,
pcmk__op_key(replica->container->id, RSC_START, 0),
NULL, replica->remote, NULL, probe,
pe_order_probe, rsc->cluster);
}
}
}
return any_created;
}
void
pcmk__bundle_append_meta(pe_resource_t *rsc, xmlNode *xml)
{
}
void
pcmk__output_bundle_actions(pe_resource_t *rsc)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_bundle_variant_data(bundle_data, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (replica->ip != NULL) {
replica->ip->cmds->output_actions(replica->ip);
}
if (replica->container != NULL) {
replica->container->cmds->output_actions(replica->container);
}
if (replica->remote != NULL) {
replica->remote->cmds->output_actions(replica->remote);
}
if (replica->child != NULL) {
replica->child->cmds->output_actions(replica->child);
}
}
}
// Bundle implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__bundle_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization)
{
pe__bundle_variant_data_t *bundle_data = NULL;
pe__bundle_replica_t *replica = NULL;
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return;
}
get_bundle_variant_data(bundle_data, rsc);
if (bundle_data->replicas == NULL) {
return;
}
/* All bundle replicas are identical, so using the utilization of the first
* is sufficient for any. Only the implicit container resource can have
* utilization values.
*/
replica = (pe__bundle_replica_t *) bundle_data->replicas->data;
if (replica->container != NULL) {
replica->container->cmds->add_utilization(replica->container, orig_rsc,
all_rscs, utilization);
}
}
// Bundle implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__bundle_shutdown_lock(pe_resource_t *rsc)
{
return; // Bundles currently don't support shutdown locks
}
diff --git a/lib/pacemaker/pcmk_sched_group.c b/lib/pacemaker/pcmk_sched_group.c
index 7046a1f536..c8f85e5763 100644
--- a/lib/pacemaker/pcmk_sched_group.c
+++ b/lib/pacemaker/pcmk_sched_group.c
@@ -1,743 +1,743 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
#define VARIANT_GROUP 1
#include <lib/pengine/variant.h>
/*!
* \internal
* \brief Expand a group's colocations to its members
*
* \param[in,out] rsc Group resource
*/
static void
expand_group_colocations(pe_resource_t *rsc)
{
group_variant_data_t *group_data = NULL;
pe_resource_t *member = NULL;
bool any_unmanaged = false;
get_group_variant_data(group_data, rsc);
// Treat "group with R" colocations as "first member with R"
member = group_data->first_child;
member->rsc_cons = g_list_concat(member->rsc_cons, rsc->rsc_cons);
/* The above works for the whole group because each group member is
* colocated with the previous one.
*
* However, there is a special case when a group has a mandatory colocation
* with a resource that can't start. In that case,
* pcmk__block_colocated_starts() will ensure that dependent resources in
* mandatory colocations (i.e. the first member for groups) can't start
* either. But if any group member is unmanaged and already started, the
* internal group colocations are no longer sufficient to make that apply to
* later members.
*
* To handle that case, add mandatory colocations to each member after the
* first.
*/
any_unmanaged = !pcmk_is_set(member->flags, pe_rsc_managed);
for (GList *item = rsc->children->next; item != NULL; item = item->next) {
member = item->data;
if (any_unmanaged) {
for (GList *cons_iter = rsc->rsc_cons; cons_iter != NULL;
cons_iter = cons_iter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) cons_iter->data;
if (constraint->score == INFINITY) {
member->rsc_cons = g_list_prepend(member->rsc_cons, constraint);
}
}
} else if (!pcmk_is_set(member->flags, pe_rsc_managed)) {
any_unmanaged = true;
}
}
rsc->rsc_cons = NULL;
// Treat "R with group" colocations as "R with last member"
member = group_data->last_child;
member->rsc_cons_lhs = g_list_concat(member->rsc_cons_lhs,
rsc->rsc_cons_lhs);
rsc->rsc_cons_lhs = NULL;
}
pe_node_t *
pcmk__group_allocate(pe_resource_t *rsc, pe_node_t *prefer)
{
pe_node_t *node = NULL;
pe_node_t *group_node = NULL;
GList *gIter = NULL;
group_variant_data_t *group_data = NULL;
get_group_variant_data(group_data, rsc);
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return rsc->allocated_to;
}
if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
if (group_data->first_child == NULL) {
// Nothing to allocate
pe__clear_resource_flags(rsc, pe_rsc_provisional);
return NULL;
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
rsc->role = group_data->first_child->role;
expand_group_colocations(rsc);
pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_rsc_trace(rsc, "Allocating group %s member %s",
rsc->id, child_rsc->id);
node = child_rsc->cmds->allocate(child_rsc, prefer);
if (group_node == NULL) {
group_node = node;
}
}
pe__set_next_role(rsc, group_data->first_child->next_role,
"first group member");
pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional);
if (group_data->colocated) {
return group_node;
}
return NULL;
}
void group_update_pseudo_status(pe_resource_t * parent, pe_resource_t * child);
void
group_create_actions(pe_resource_t *rsc)
{
pe_action_t *op = NULL;
const char *value = NULL;
GList *gIter = rsc->children;
pe_rsc_trace(rsc, "Creating actions for %s", rsc->id);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->create_actions(child_rsc);
group_update_pseudo_status(rsc, child_rsc);
}
op = start_action(rsc, NULL, TRUE /* !group_data->child_starting */ );
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
op = custom_action(rsc, started_key(rsc),
RSC_STARTED, NULL,
TRUE /* !group_data->child_starting */ ,
TRUE, rsc->cluster);
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
op = stop_action(rsc, NULL, TRUE /* !group_data->child_stopping */ );
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
op = custom_action(rsc, stopped_key(rsc),
RSC_STOPPED, NULL,
TRUE /* !group_data->child_stopping */ ,
TRUE, rsc->cluster);
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROMOTABLE);
if (crm_is_true(value)) {
op = custom_action(rsc, demote_key(rsc), RSC_DEMOTE, NULL, TRUE, TRUE,
rsc->cluster);
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
op = custom_action(rsc, demoted_key(rsc), RSC_DEMOTED, NULL, TRUE, TRUE,
rsc->cluster);
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
op = custom_action(rsc, promote_key(rsc), RSC_PROMOTE, NULL, TRUE, TRUE,
rsc->cluster);
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
op = custom_action(rsc, promoted_key(rsc), RSC_PROMOTED, NULL, TRUE,
TRUE, rsc->cluster);
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
}
}
void
group_update_pseudo_status(pe_resource_t * parent, pe_resource_t * child)
{
GList *gIter = child->actions;
group_variant_data_t *group_data = NULL;
get_group_variant_data(group_data, parent);
if (group_data->ordered == FALSE) {
/* If this group is not ordered, then leave the meta-actions as optional */
return;
}
if (group_data->child_stopping && group_data->child_starting) {
return;
}
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (pcmk_is_set(action->flags, pe_action_optional)) {
continue;
}
if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)
&& pcmk_is_set(action->flags, pe_action_runnable)) {
group_data->child_stopping = TRUE;
pe_rsc_trace(action->rsc, "Based on %s the group is stopping", action->uuid);
} else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)
&& pcmk_is_set(action->flags, pe_action_runnable)) {
group_data->child_starting = TRUE;
pe_rsc_trace(action->rsc, "Based on %s the group is starting", action->uuid);
}
}
}
void
group_internal_constraints(pe_resource_t *rsc)
{
GList *gIter = rsc->children;
pe_resource_t *last_rsc = NULL;
pe_resource_t *last_active = NULL;
pe_resource_t *top = uber_parent(rsc);
group_variant_data_t *group_data = NULL;
get_group_variant_data(group_data, rsc);
pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START,
pe_order_optional);
pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED,
pe_order_runnable_left);
pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED,
pe_order_runnable_left);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
int stop = pe_order_none;
int stopped = pe_order_implies_then_printed;
int start = pe_order_implies_then | pe_order_runnable_left;
int started =
pe_order_runnable_left | pe_order_implies_then | pe_order_implies_then_printed;
child_rsc->cmds->internal_constraints(child_rsc);
if (last_rsc == NULL) {
if (group_data->ordered) {
pe__set_order_flags(stop, pe_order_optional);
stopped = pe_order_implies_then;
}
} else if (group_data->colocated) {
pcmk__new_colocation("group:internal_colocation", NULL, INFINITY,
child_rsc, last_rsc, NULL, NULL,
pcmk_is_set(child_rsc->flags, pe_rsc_critical),
rsc->cluster);
}
if (pcmk_is_set(top->flags, pe_rsc_promotable)) {
pcmk__order_resource_actions(rsc, RSC_DEMOTE, child_rsc, RSC_DEMOTE,
stop|pe_order_implies_first_printed);
pcmk__order_resource_actions(child_rsc, RSC_DEMOTE, rsc,
RSC_DEMOTED, stopped);
pcmk__order_resource_actions(child_rsc, RSC_PROMOTE, rsc,
RSC_PROMOTED, started);
pcmk__order_resource_actions(rsc, RSC_PROMOTE, child_rsc,
RSC_PROMOTE,
pe_order_implies_first_printed);
}
pcmk__order_starts(rsc, child_rsc, pe_order_implies_first_printed);
pcmk__order_stops(rsc, child_rsc,
stop|pe_order_implies_first_printed);
pcmk__order_resource_actions(child_rsc, RSC_STOP, rsc, RSC_STOPPED,
stopped);
pcmk__order_resource_actions(child_rsc, RSC_START, rsc, RSC_STARTED,
started);
if (group_data->ordered == FALSE) {
pcmk__order_starts(rsc, child_rsc,
start|pe_order_implies_first_printed);
if (pcmk_is_set(top->flags, pe_rsc_promotable)) {
pcmk__order_resource_actions(rsc, RSC_PROMOTE, child_rsc,
RSC_PROMOTE,
start|pe_order_implies_first_printed);
}
} else if (last_rsc != NULL) {
pcmk__order_starts(last_rsc, child_rsc, start);
pcmk__order_stops(child_rsc, last_rsc,
pe_order_optional|pe_order_restart);
if (pcmk_is_set(top->flags, pe_rsc_promotable)) {
pcmk__order_resource_actions(last_rsc, RSC_PROMOTE, child_rsc,
RSC_PROMOTE, start);
pcmk__order_resource_actions(child_rsc, RSC_DEMOTE, last_rsc,
RSC_DEMOTE, pe_order_optional);
}
} else {
pcmk__order_starts(rsc, child_rsc, pe_order_none);
if (pcmk_is_set(top->flags, pe_rsc_promotable)) {
pcmk__order_resource_actions(rsc, RSC_PROMOTE, child_rsc,
RSC_PROMOTE, pe_order_none);
}
}
/* Look for partially active groups
* Make sure they still shut down in sequence
*/
if (child_rsc->running_on) {
if (group_data->ordered
&& last_rsc
&& last_rsc->running_on == NULL && last_active && last_active->running_on) {
pcmk__order_stops(child_rsc, last_active, pe_order_optional);
}
last_active = child_rsc;
}
last_rsc = child_rsc;
}
if (group_data->ordered && last_rsc != NULL) {
int stop_stop_flags = pe_order_implies_then;
int stop_stopped_flags = pe_order_optional;
pcmk__order_stops(rsc, last_rsc, stop_stop_flags);
pcmk__order_resource_actions(last_rsc, RSC_STOP, rsc, RSC_STOPPED,
stop_stopped_flags);
if (pcmk_is_set(top->flags, pe_rsc_promotable)) {
pcmk__order_resource_actions(rsc, RSC_DEMOTE, last_rsc, RSC_DEMOTE,
stop_stop_flags);
pcmk__order_resource_actions(last_rsc, RSC_DEMOTE, rsc, RSC_DEMOTED,
stop_stopped_flags);
}
}
}
/*!
* \internal
* \brief Apply a colocation's score to node weights or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node weights (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__group_apply_coloc_score(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent)
{
GList *gIter = NULL;
group_variant_data_t *group_data = NULL;
CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL),
return);
if (!for_dependent) {
goto for_primary;
}
gIter = dependent->children;
pe_rsc_trace(dependent, "Processing constraints from %s", dependent->id);
get_group_variant_data(group_data, dependent);
if (group_data->colocated) {
group_data->first_child->cmds->apply_coloc_score(group_data->first_child,
primary, colocation,
true);
return;
} else if (colocation->score >= INFINITY) {
pcmk__config_err("%s: Cannot perform mandatory colocation "
"between non-colocated group and %s",
dependent->id, primary->id);
return;
}
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->apply_coloc_score(child_rsc, primary, colocation,
true);
}
return;
for_primary:
gIter = primary->children;
get_group_variant_data(group_data, primary);
CRM_CHECK(dependent->variant == pe_native, return);
pe_rsc_trace(primary,
"Processing colocation %s (%s with group %s) for primary",
colocation->id, dependent->id, primary->id);
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
return;
} else if (group_data->colocated && group_data->first_child) {
if (colocation->score >= INFINITY) {
// Dependent can't start until group is fully up
group_data->last_child->cmds->apply_coloc_score(dependent,
group_data->last_child,
colocation, false);
} else {
// Dependent can start as long as group is partially up
group_data->first_child->cmds->apply_coloc_score(dependent,
group_data->first_child,
colocation, false);
}
return;
} else if (colocation->score >= INFINITY) {
pcmk__config_err("%s: Cannot perform mandatory colocation with"
" non-colocated group %s", dependent->id, primary->id);
return;
}
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->apply_coloc_score(dependent, child_rsc, colocation,
false);
}
}
enum pe_action_flags
group_action_flags(pe_action_t * action, pe_node_t * node)
{
GList *gIter = NULL;
enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo);
for (gIter = action->rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
enum action_tasks task = get_complex_task(child, action->task, TRUE);
const char *task_s = task2text(task);
pe_action_t *child_action = find_first_action(child->actions, NULL, task_s, node);
if (child_action) {
enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node);
if (pcmk_is_set(flags, pe_action_optional)
&& !pcmk_is_set(child_flags, pe_action_optional)) {
pe_rsc_trace(action->rsc, "%s is mandatory because of %s", action->uuid,
child_action->uuid);
pe__clear_raw_action_flags(flags, "group action",
pe_action_optional);
pe__clear_action_flags(action, pe_action_optional);
}
if (!pcmk__str_eq(task_s, action->task, pcmk__str_casei)
&& pcmk_is_set(flags, pe_action_runnable)
&& !pcmk_is_set(child_flags, pe_action_runnable)) {
pe_rsc_trace(action->rsc, "%s is not runnable because of %s", action->uuid,
child_action->uuid);
pe__clear_raw_action_flags(flags, "group action",
pe_action_runnable);
pe__clear_action_flags(action, pe_action_runnable);
}
} else if (task != stop_rsc && task != action_demote) {
pe_rsc_trace(action->rsc, "%s is not runnable because of %s (not found in %s)",
action->uuid, task_s, child->id);
pe__clear_raw_action_flags(flags, "group action",
pe_action_runnable);
}
}
return flags;
}
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two actions, update the actions'
* flags (and runnable_before members if appropriate) as appropriate for the
* ordering. In some cases, the ordering could be disabled as well.
*
* \param[in] first 'First' action in an ordering
* \param[in] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t
group_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node,
uint32_t flags, uint32_t filter, uint32_t type,
pe_working_set_t *data_set)
{
GList *gIter = then->rsc->children;
uint32_t changed = pcmk__updated_none;
CRM_ASSERT(then->rsc != NULL);
- changed |= native_update_actions(first, then, node, flags, filter, type,
- data_set);
+ changed |= pcmk__update_ordered_actions(first, then, node, flags, filter,
+ type, data_set);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
pe_action_t *child_action = find_first_action(child->actions, NULL, then->task, node);
if (child_action) {
changed |= child->cmds->update_ordered_actions(first, child_action,
node, flags, filter,
type, data_set);
}
}
return changed;
}
void
group_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
{
GList *gIter = rsc->children;
GList *saved = constraint->node_list_rh;
GList *zero = pcmk__copy_node_list(constraint->node_list_rh, true);
gboolean reset_scores = TRUE;
group_variant_data_t *group_data = NULL;
get_group_variant_data(group_data, rsc);
pe_rsc_debug(rsc, "Processing rsc_location %s for %s", constraint->id, rsc->id);
pcmk__apply_location(constraint, rsc);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->rsc_location(child_rsc, constraint);
if (group_data->colocated && reset_scores) {
reset_scores = FALSE;
constraint->node_list_rh = zero;
}
}
constraint->node_list_rh = saved;
g_list_free_full(zero, free);
}
void
group_expand(pe_resource_t *rsc)
{
CRM_CHECK(rsc != NULL, return);
pe_rsc_trace(rsc, "Processing actions from %s", rsc->id);
native_expand(rsc);
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->expand(child_rsc);
}
}
/*!
* \internal
* \brief Update nodes with scores of colocated resources' nodes
*
* Given a table of nodes and a resource, update the nodes' scores with the
* scores of the best nodes matching the attribute used for each of the
* resource's relevant colocations.
*
* \param[in,out] rsc Resource to check colocations for
* \param[in] log_id Resource ID to use in log messages
* \param[in,out] nodes Nodes to update
* \param[in] attr Colocation attribute (NULL to use default)
* \param[in] factor Incorporate scores multiplied by this factor
* \param[in] flags Bitmask of enum pcmk__coloc_select values
*
* \note The caller remains responsible for freeing \p *nodes.
*/
void
pcmk__group_add_colocated_node_scores(pe_resource_t *rsc, const char *log_id,
GHashTable **nodes, const char *attr,
float factor, uint32_t flags)
{
GList *gIter = rsc->rsc_cons_lhs;
pe_resource_t *member = NULL;
group_variant_data_t *group_data = NULL;
CRM_CHECK((rsc != NULL) && (nodes != NULL), return);
if (log_id == NULL) {
log_id = rsc->id;
}
get_group_variant_data(group_data, rsc);
if (pcmk_is_set(rsc->flags, pe_rsc_merging)) {
pe_rsc_info(rsc, "Breaking dependency loop with %s at %s",
rsc->id, log_id);
return;
}
pe__set_resource_flags(rsc, pe_rsc_merging);
member = group_data->first_child;
member->cmds->add_colocated_node_scores(member, log_id, nodes, attr,
factor, flags);
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pcmk__add_colocated_node_scores(constraint->dependent, rsc->id, nodes,
constraint->node_attribute,
constraint->score / (float) INFINITY,
flags);
}
pe__clear_resource_flags(rsc, pe_rsc_merging);
}
void
group_append_meta(pe_resource_t * rsc, xmlNode * xml)
{
}
// Group implementation of resource_alloc_functions_t:colocated_resources()
GList *
pcmk__group_colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs)
{
pe_resource_t *child_rsc = NULL;
group_variant_data_t *group_data = NULL;
get_group_variant_data(group_data, rsc);
if (orig_rsc == NULL) {
orig_rsc = rsc;
}
if (group_data->colocated || pe_rsc_is_clone(rsc->parent)) {
/* This group has colocated members and/or is cloned -- either way,
* add every child's colocated resources to the list.
*/
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
child_rsc = (pe_resource_t *) gIter->data;
colocated_rscs = child_rsc->cmds->colocated_resources(child_rsc,
orig_rsc,
colocated_rscs);
}
} else if (group_data->first_child != NULL) {
/* This group's members are not colocated, and the group is not cloned,
* so just add the first child's colocations to the list.
*/
child_rsc = group_data->first_child;
colocated_rscs = child_rsc->cmds->colocated_resources(child_rsc,
orig_rsc,
colocated_rscs);
}
// Now consider colocations where the group itself is specified
colocated_rscs = pcmk__colocated_resources(rsc, orig_rsc, colocated_rscs);
return colocated_rscs;
}
// Group implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__group_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization)
{
group_variant_data_t *group_data = NULL;
pe_resource_t *child = NULL;
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return;
}
pe_rsc_trace(orig_rsc, "%s: Adding group %s as colocated utilization",
orig_rsc->id, rsc->id);
get_group_variant_data(group_data, rsc);
if (group_data->colocated || pe_rsc_is_clone(rsc->parent)) {
// Every group member will be on same node, so sum all members
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
child = (pe_resource_t *) iter->data;
if (pcmk_is_set(child->flags, pe_rsc_provisional)
&& (g_list_find(all_rscs, child) == NULL)) {
child->cmds->add_utilization(child, orig_rsc, all_rscs,
utilization);
}
}
} else {
// Just add first child's utilization
child = group_data->first_child;
if ((child != NULL)
&& pcmk_is_set(child->flags, pe_rsc_provisional)
&& (g_list_find(all_rscs, child) == NULL)) {
child->cmds->add_utilization(child, orig_rsc, all_rscs,
utilization);
}
}
}
// Group implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__group_shutdown_lock(pe_resource_t *rsc)
{
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
child->cmds->shutdown_lock(child);
}
}
diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c
index e82cc2c892..941d34f368 100644
--- a/lib/pacemaker/pcmk_sched_resource.c
+++ b/lib/pacemaker/pcmk_sched_resource.c
@@ -1,1088 +1,1088 @@
/*
* Copyright 2014-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdlib.h>
#include <string.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
// Resource allocation methods that vary by resource variant
static resource_alloc_functions_t allocation_methods[] = {
{
pcmk__native_allocate,
native_create_actions,
native_create_probe,
native_internal_constraints,
pcmk__primitive_apply_coloc_score,
pcmk__add_colocated_node_scores,
pcmk__colocated_resources,
native_rsc_location,
native_action_flags,
- native_update_actions,
+ pcmk__update_ordered_actions,
pcmk__output_resource_actions,
native_expand,
native_append_meta,
pcmk__primitive_add_utilization,
pcmk__primitive_shutdown_lock,
},
{
pcmk__group_allocate,
group_create_actions,
native_create_probe,
group_internal_constraints,
pcmk__group_apply_coloc_score,
pcmk__group_add_colocated_node_scores,
pcmk__group_colocated_resources,
group_rsc_location,
group_action_flags,
group_update_actions,
pcmk__output_resource_actions,
group_expand,
group_append_meta,
pcmk__group_add_utilization,
pcmk__group_shutdown_lock,
},
{
pcmk__clone_allocate,
clone_create_actions,
clone_create_probe,
clone_internal_constraints,
pcmk__clone_apply_coloc_score,
pcmk__add_colocated_node_scores,
pcmk__colocated_resources,
clone_rsc_location,
clone_action_flags,
pcmk__multi_update_actions,
pcmk__output_resource_actions,
clone_expand,
clone_append_meta,
pcmk__clone_add_utilization,
pcmk__clone_shutdown_lock,
},
{
pcmk__bundle_allocate,
pcmk__bundle_create_actions,
pcmk__bundle_create_probe,
pcmk__bundle_internal_constraints,
pcmk__bundle_apply_coloc_score,
pcmk__add_colocated_node_scores,
pcmk__colocated_resources,
pcmk__bundle_rsc_location,
pcmk__bundle_action_flags,
pcmk__multi_update_actions,
pcmk__output_bundle_actions,
pcmk__bundle_expand,
pcmk__bundle_append_meta,
pcmk__bundle_add_utilization,
pcmk__bundle_shutdown_lock,
}
};
/*!
* \internal
* \brief Check whether a resource's agent standard, provider, or type changed
*
* \param[in] rsc Resource to check
* \param[in] node Node needing unfencing/restart if agent changed
* \param[in] rsc_entry XML with previously known agent information
* \param[in] active_on_node Whether \p rsc is active on \p node
*
* \return true if agent for \p rsc changed, otherwise false
*/
bool
pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node,
const xmlNode *rsc_entry, bool active_on_node)
{
bool changed = false;
const char *attr_list[] = {
XML_ATTR_TYPE,
XML_AGENT_ATTR_CLASS,
XML_AGENT_ATTR_PROVIDER
};
for (int i = 0; i < PCMK__NELEM(attr_list); i++) {
const char *value = crm_element_value(rsc->xml, attr_list[i]);
const char *old_value = crm_element_value(rsc_entry, attr_list[i]);
if (!pcmk__str_eq(value, old_value, pcmk__str_none)) {
changed = true;
trigger_unfencing(rsc, node, "Device definition changed", NULL,
rsc->cluster);
if (active_on_node) {
crm_notice("Forcing restart of %s on %s "
"because %s changed from '%s' to '%s'",
rsc->id, node->details->uname, attr_list[i],
pcmk__s(old_value, ""), pcmk__s(value, ""));
}
}
}
if (changed && active_on_node) {
// Make sure the resource is restarted
custom_action(rsc, stop_key(rsc), CRMD_ACTION_STOP, node, FALSE, TRUE,
rsc->cluster);
pe__set_resource_flags(rsc, pe_rsc_start_pending);
}
return changed;
}
/*!
* \internal
* \brief Add resource (and any matching children) to list if it matches ID
*
* \param[in] result List to add resource to
* \param[in] rsc Resource to check
* \param[in] id ID to match
*
* \return (Possibly new) head of list
*/
static GList *
add_rsc_if_matching(GList *result, pe_resource_t *rsc, const char *id)
{
if ((strcmp(rsc->id, id) == 0)
|| ((rsc->clone_name != NULL) && (strcmp(rsc->clone_name, id) == 0))) {
result = g_list_prepend(result, rsc);
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
result = add_rsc_if_matching(result, child, id);
}
return result;
}
/*!
* \internal
* \brief Find all resources matching a given ID by either ID or clone name
*
* \param[in] id Resource ID to check
* \param[in] data_set Cluster working set
*
* \return List of all resources that match \p id
* \note The caller is responsible for freeing the return value with
* g_list_free().
*/
GList *
pcmk__rscs_matching_id(const char *id, pe_working_set_t *data_set)
{
GList *result = NULL;
CRM_CHECK((id != NULL) && (data_set != NULL), return NULL);
for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
result = add_rsc_if_matching(result, (pe_resource_t *) iter->data, id);
}
return result;
}
/*!
* \internal
* \brief Set the variant-appropriate allocation methods for a resource
*
* \param[in] rsc Resource to set allocation methods for
* \param[in] ignored Only here so function can be used with g_list_foreach()
*/
static void
set_allocation_methods_for_rsc(pe_resource_t *rsc, void *ignored)
{
rsc->cmds = &allocation_methods[rsc->variant];
g_list_foreach(rsc->children, (GFunc) set_allocation_methods_for_rsc, NULL);
}
/*!
* \internal
* \brief Set the variant-appropriate allocation methods for all resources
*
* \param[in] data_set Cluster working set
*/
void
pcmk__set_allocation_methods(pe_working_set_t *data_set)
{
g_list_foreach(data_set->resources, (GFunc) set_allocation_methods_for_rsc,
NULL);
}
// Shared implementation of resource_alloc_functions_t:colocated_resources()
GList *
pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs)
{
GList *gIter = NULL;
if (orig_rsc == NULL) {
orig_rsc = rsc;
}
if ((rsc == NULL) || (g_list_find(colocated_rscs, rsc) != NULL)) {
return colocated_rscs;
}
pe_rsc_trace(orig_rsc, "%s is in colocation chain with %s",
rsc->id, orig_rsc->id);
colocated_rscs = g_list_append(colocated_rscs, rsc);
// Follow colocations where this resource is the dependent resource
for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pe_resource_t *primary = constraint->primary;
if (primary == orig_rsc) {
continue; // Break colocation loop
}
if ((constraint->score == INFINITY) &&
(pcmk__colocation_affects(rsc, primary, constraint,
true) == pcmk__coloc_affects_location)) {
colocated_rscs = primary->cmds->colocated_resources(primary,
orig_rsc,
colocated_rscs);
}
}
// Follow colocations where this resource is the primary resource
for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pe_resource_t *dependent = constraint->dependent;
if (dependent == orig_rsc) {
continue; // Break colocation loop
}
if (pe_rsc_is_clone(rsc) && !pe_rsc_is_clone(dependent)) {
continue; // We can't be sure whether dependent will be colocated
}
if ((constraint->score == INFINITY) &&
(pcmk__colocation_affects(dependent, rsc, constraint,
true) == pcmk__coloc_affects_location)) {
colocated_rscs = dependent->cmds->colocated_resources(dependent,
orig_rsc,
colocated_rscs);
}
}
return colocated_rscs;
}
void
pcmk__output_resource_actions(pe_resource_t *rsc)
{
pcmk__output_t *out = rsc->cluster->priv;
pe_node_t *next = NULL;
pe_node_t *current = NULL;
if (rsc->children != NULL) {
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
child->cmds->output_actions(child);
}
return;
}
next = rsc->allocated_to;
if (rsc->running_on) {
current = pe__current_node(rsc);
if (rsc->role == RSC_ROLE_STOPPED) {
/*
* This can occur when resources are being recovered
* We fiddle with the current role in native_create_actions()
*/
rsc->role = RSC_ROLE_STARTED;
}
}
if ((current == NULL) && pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
/* Don't log stopped orphans */
return;
}
out->message(out, "rsc-action", rsc, current, next);
}
/*!
* \internal
* \brief Assign a specified primitive resource to a node
*
* Assign a specified primitive resource to a specified node, if the node can
* run the resource (or unconditionally, if \p force is true). Mark the resource
* as no longer provisional. If the primitive can't be assigned (or \p chosen is
* NULL), unassign any previous assignment for it, set its next role to stopped,
* and update any existing actions scheduled for it. This is not done
* recursively for children, so it should be called only for primitives.
*
* \param[in] rsc Resource to assign
* \param[in] chosen Node to assign \p rsc to
* \param[in] force If true, assign to \p chosen even if unavailable
*
* \return true if \p rsc could be assigned, otherwise false
*
* \note Assigning a resource to the NULL node using this function is different
* from calling pcmk__unassign_resource(), in that it will also update any
* actions created for the resource.
*/
bool
pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force)
{
pcmk__output_t *out = rsc->cluster->priv;
CRM_ASSERT(rsc->variant == pe_native);
if (!force && (chosen != NULL)) {
if ((chosen->weight < 0)
// Allow the graph to assume that guest node connections will come up
|| (!pcmk__node_available(chosen, true, false)
&& !pe__is_guest_node(chosen))) {
crm_debug("All nodes for resource %s are unavailable, unclean or "
"shutting down (%s can%s run resources, with weight %d)",
rsc->id, chosen->details->uname,
(pcmk__node_available(chosen, true, false)? "" : "not"),
chosen->weight);
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability");
chosen = NULL;
}
}
pcmk__unassign_resource(rsc);
pe__clear_resource_flags(rsc, pe_rsc_provisional);
if (chosen == NULL) {
crm_debug("Could not allocate a node for %s", rsc->id);
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to allocate");
for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
pe_action_t *op = (pe_action_t *) iter->data;
crm_debug("Updating %s for allocation failure", op->uuid);
if (pcmk__str_eq(op->task, RSC_STOP, pcmk__str_casei)) {
pe__clear_action_flags(op, pe_action_optional);
} else if (pcmk__str_eq(op->task, RSC_START, pcmk__str_casei)) {
pe__clear_action_flags(op, pe_action_runnable);
//pe__set_resource_flags(rsc, pe_rsc_block);
} else {
// Cancel recurring actions, unless for stopped state
const char *interval_ms_s = NULL;
const char *target_rc_s = NULL;
char *rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING);
interval_ms_s = g_hash_table_lookup(op->meta,
XML_LRM_ATTR_INTERVAL_MS);
target_rc_s = g_hash_table_lookup(op->meta,
XML_ATTR_TE_TARGET_RC);
if ((interval_ms_s != NULL)
&& !pcmk__str_eq(interval_ms_s, "0", pcmk__str_none)
&& !pcmk__str_eq(rc_stopped, target_rc_s, pcmk__str_none)) {
pe__clear_action_flags(op, pe_action_runnable);
}
free(rc_stopped);
}
}
return false;
}
crm_debug("Assigning %s to %s", rsc->id, chosen->details->uname);
rsc->allocated_to = pe__copy_node(chosen);
chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc,
rsc);
chosen->details->num_resources++;
chosen->count++;
pcmk__consume_node_capacity(chosen->details->utilization, rsc);
if (pcmk_is_set(rsc->cluster->flags, pe_flag_show_utilization)) {
out->message(out, "resource-util", rsc, chosen, __func__);
}
return true;
}
/*!
* \internal
* \brief Assign a specified resource (of any variant) to a node
*
* Assign a specified resource and its children (if any) to a specified node, if
* the node can run the resource (or unconditionally, if \p force is true). Mark
* the resources as no longer provisional. If the resources can't be assigned
* (or \p chosen is NULL), unassign any previous assignments, set next role to
* stopped, and update any existing actions scheduled for them.
*
* \param[in] rsc Resource to assign
* \param[in] chosen Node to assign \p rsc to
* \param[in] force If true, assign to \p chosen even if unavailable
*
* \return true if \p rsc could be assigned, otherwise false
*
* \note Assigning a resource to the NULL node using this function is different
* from calling pcmk__unassign_resource(), in that it will also update any
* actions created for the resource.
*/
bool
pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force)
{
bool changed = false;
if (rsc->children == NULL) {
if (rsc->allocated_to != NULL) {
changed = true;
}
pcmk__assign_primitive(rsc, node, force);
} else {
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
changed |= pcmk__assign_resource(child_rsc, node, force);
}
}
return changed;
}
/*!
* \internal
* \brief Remove any assignment of a specified resource to a node
*
* If a specified resource has been assigned to a node, remove that assignment
* and mark the resource as provisional again. This is not done recursively for
* children, so it should be called only for primitives.
*
* \param[in] rsc Resource to unassign
*/
void
pcmk__unassign_resource(pe_resource_t *rsc)
{
pe_node_t *old = rsc->allocated_to;
if (old == NULL) {
return;
}
crm_info("Unassigning %s from %s", rsc->id, old->details->uname);
pe__set_resource_flags(rsc, pe_rsc_provisional);
rsc->allocated_to = NULL;
/* We're going to free the pe_node_t, but its details member is shared and
* will remain, so update that appropriately first.
*/
old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc,
rsc);
old->details->num_resources--;
pcmk__release_node_capacity(old->details->utilization, rsc);
free(old);
}
/*!
* \internal
* \brief Check whether a resource has reached its migration threshold on a node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
* \param[out] failed If the threshold has been reached, this will be set to
* the resource that failed (possibly a parent of \p rsc)
*
* \return true if the migration threshold has been reached, false otherwise
*/
bool
pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node,
pe_resource_t **failed)
{
int fail_count, remaining_tries;
pe_resource_t *rsc_to_ban = rsc;
// Migration threshold of 0 means never force away
if (rsc->migration_threshold == 0) {
return false;
}
// If we're ignoring failures, also ignore the migration threshold
if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) {
return false;
}
// If there are no failures, there's no need to force away
fail_count = pe_get_failcount(node, rsc, NULL,
pe_fc_effective|pe_fc_fillers, NULL,
rsc->cluster);
if (fail_count <= 0) {
return false;
}
// If failed resource is anonymous clone instance, we'll force clone away
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
rsc_to_ban = uber_parent(rsc);
}
// How many more times recovery will be tried on this node
remaining_tries = rsc->migration_threshold - fail_count;
if (remaining_tries <= 0) {
crm_warn("%s cannot run on %s due to reaching migration threshold "
"(clean up resource to allow again)"
CRM_XS " failures=%d migration-threshold=%d",
rsc_to_ban->id, node->details->uname, fail_count,
rsc->migration_threshold);
if (failed != NULL) {
*failed = rsc_to_ban;
}
return true;
}
crm_info("%s can fail %d more time%s on "
"%s before reaching migration threshold (%d)",
rsc_to_ban->id, remaining_tries, pcmk__plural_s(remaining_tries),
node->details->uname, rsc->migration_threshold);
return false;
}
static void *
convert_const_pointer(const void *ptr)
{
/* Worst function ever */
return (void *)ptr;
}
/*!
* \internal
* \brief Get a node's weight
*
* \param[in] node Unweighted node to check (for node ID)
* \param[in] nodes List of weighted nodes to look for \p node in
*
* \return Node's weight, or -INFINITY if not found
*/
static int
get_node_weight(pe_node_t *node, GHashTable *nodes)
{
pe_node_t *weighted_node = NULL;
if ((node != NULL) && (nodes != NULL)) {
weighted_node = g_hash_table_lookup(nodes, node->details->id);
}
return (weighted_node == NULL)? -INFINITY : weighted_node->weight;
}
/*!
* \internal
* \brief Compare two resources according to which should be allocated first
*
* \param[in] a First resource to compare
* \param[in] b Second resource to compare
* \param[in] data Sorted list of all nodes in cluster
*
* \return -1 if \p a should be allocated before \b, 0 if they are equal,
* or +1 if \p a should be allocated after \b
*/
static gint
cmp_resources(gconstpointer a, gconstpointer b, gpointer data)
{
const pe_resource_t *resource1 = a;
const pe_resource_t *resource2 = b;
GList *nodes = (GList *) data;
int rc = 0;
int r1_weight = -INFINITY;
int r2_weight = -INFINITY;
pe_node_t *r1_node = NULL;
pe_node_t *r2_node = NULL;
GHashTable *r1_nodes = NULL;
GHashTable *r2_nodes = NULL;
const char *reason = NULL;
// Resources with highest priority should be allocated first
reason = "priority";
r1_weight = resource1->priority;
r2_weight = resource2->priority;
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
// We need nodes to make any other useful comparisons
reason = "no node list";
if (nodes == NULL) {
goto done;
}
// Calculate and log node weights
pcmk__add_colocated_node_scores(convert_const_pointer(resource1),
resource1->id, &r1_nodes, NULL, 1,
pcmk__coloc_select_this_with);
pcmk__add_colocated_node_scores(convert_const_pointer(resource2),
resource2->id, &r2_nodes, NULL, 1,
pcmk__coloc_select_this_with);
pe__show_node_weights(true, NULL, resource1->id, r1_nodes,
resource1->cluster);
pe__show_node_weights(true, NULL, resource2->id, r2_nodes,
resource2->cluster);
// The resource with highest score on its current node goes first
reason = "current location";
if (resource1->running_on != NULL) {
r1_node = pe__current_node(resource1);
}
if (resource2->running_on != NULL) {
r2_node = pe__current_node(resource2);
}
r1_weight = get_node_weight(r1_node, r1_nodes);
r2_weight = get_node_weight(r2_node, r2_nodes);
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
// Otherwise a higher weight on any node will do
reason = "score";
for (GList *iter = nodes; iter != NULL; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
r1_weight = get_node_weight(node, r1_nodes);
r2_weight = get_node_weight(node, r2_nodes);
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
}
done:
crm_trace("%s (%d)%s%s %c %s (%d)%s%s: %s",
resource1->id, r1_weight,
((r1_node == NULL)? "" : " on "),
((r1_node == NULL)? "" : r1_node->details->id),
((rc < 0)? '>' : ((rc > 0)? '<' : '=')),
resource2->id, r2_weight,
((r2_node == NULL)? "" : " on "),
((r2_node == NULL)? "" : r2_node->details->id),
reason);
if (r1_nodes != NULL) {
g_hash_table_destroy(r1_nodes);
}
if (r2_nodes != NULL) {
g_hash_table_destroy(r2_nodes);
}
return rc;
}
/*!
* \internal
* \brief Sort resources in the order they should be allocated to nodes
*
* \param[in] data_set Cluster working set
*/
void
pcmk__sort_resources(pe_working_set_t *data_set)
{
GList *nodes = g_list_copy(data_set->nodes);
nodes = pcmk__sort_nodes(nodes, NULL);
data_set->resources = g_list_sort_with_data(data_set->resources,
cmp_resources, nodes);
g_list_free(nodes);
}
/*!
* \internal
* \brief Create a hash table with a single node in it
*
* \param[in] node Node to copy into new table
*
* \return Newly created hash table containing a copy of \p node
* \note The caller is responsible for freeing the result with
* g_hash_table_destroy().
*/
static GHashTable *
new_node_table(pe_node_t *node)
{
GHashTable *table = pcmk__strkey_table(NULL, free);
node = pe__copy_node(node);
g_hash_table_insert(table, (gpointer) node->details->id, node);
return table;
}
/*!
* \internal
* \brief Apply a resource's parent's colocation scores to a node table
*
* \param[in] rsc Resource whose colocations should be applied
* \param[in,out] nodes Node table to apply colocations to
*/
static void
apply_parent_colocations(const pe_resource_t *rsc, GHashTable **nodes)
{
GList *iter = NULL;
pcmk__colocation_t *colocation = NULL;
for (iter = rsc->parent->rsc_cons; iter != NULL; iter = iter->next) {
colocation = (pcmk__colocation_t *) iter->data;
pcmk__add_colocated_node_scores(colocation->primary, rsc->id, nodes,
colocation->node_attribute,
colocation->score / (float) INFINITY,
pcmk__coloc_select_default);
}
for (iter = rsc->parent->rsc_cons_lhs; iter != NULL; iter = iter->next) {
colocation = (pcmk__colocation_t *) iter->data;
if (!pcmk__colocation_has_influence(colocation, rsc)) {
continue;
}
pcmk__add_colocated_node_scores(colocation->dependent, rsc->id, nodes,
colocation->node_attribute,
colocation->score / (float) INFINITY,
pcmk__coloc_select_nonnegative);
}
}
/*!
* \internal
* \brief Compare clone or bundle instances based on colocation scores
*
* Determine the relative order in which two clone or bundle instances should be
* assigned to nodes, considering the scores of colocation constraints directly
* or indirectly involving them.
*
* \param[in] instance1 First instance to compare
* \param[in] instance2 Second instance to compare
*
* \return A negative number if \p instance1 should be assigned first,
* a positive number if \p instance2 should be assigned first,
* or 0 if assignment order doesn't matter
*/
static int
cmp_instance_by_colocation(const pe_resource_t *instance1,
const pe_resource_t *instance2)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
pe_node_t *current_node1 = pe__current_node(instance1);
pe_node_t *current_node2 = pe__current_node(instance2);
GHashTable *colocated_scores1 = NULL;
GHashTable *colocated_scores2 = NULL;
CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL)
&& (instance2 != NULL) && (instance2->parent != NULL)
&& (current_node1 != NULL) && (current_node2 != NULL));
// Create node tables initialized with each node
colocated_scores1 = new_node_table(current_node1);
colocated_scores2 = new_node_table(current_node2);
// Apply parental colocations
apply_parent_colocations(instance1, &colocated_scores1);
apply_parent_colocations(instance2, &colocated_scores2);
// Find original nodes again, with scores updated for colocations
node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id);
node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id);
// Compare nodes by updated scores
if (node1->weight < node2->weight) {
crm_trace("Assign %s (%d on %s) after %s (%d on %s)",
instance1->id, node1->weight, node1->details->uname,
instance2->id, node2->weight, node2->details->uname);
rc = 1;
} else if (node1->weight > node2->weight) {
crm_trace("Assign %s (%d on %s) before %s (%d on %s)",
instance1->id, node1->weight, node1->details->uname,
instance2->id, node2->weight, node2->details->uname);
rc = -1;
}
g_hash_table_destroy(colocated_scores1);
g_hash_table_destroy(colocated_scores2);
return rc;
}
/*!
* \internal
* \brief Check whether a resource or any of its children are failed
*
* \param[in] rsc Resource to check
*
* \return true if \p rsc or any of its children are failed, otherwise false
*/
static bool
did_fail(const pe_resource_t * rsc)
{
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
return true;
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
if (did_fail((pe_resource_t *) iter->data)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether a node is allowed to run a resource
*
* \param[in] rsc Resource to check
* \param[in,out] node Node to check (will be set NULL if not allowed)
*
* \return true if *node is either NULL or allowed for \p rsc, otherwise false
*/
static bool
node_is_allowed(const pe_resource_t *rsc, pe_node_t **node)
{
if (*node != NULL) {
pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes,
(*node)->details->id);
if ((allowed == NULL) || (allowed->weight < 0)) {
pe_rsc_trace(rsc, "%s: current location (%s) is unavailable",
rsc->id, (*node)->details->uname);
*node = NULL;
return false;
}
}
return true;
}
/*!
* \internal
* \brief Compare two clone or bundle instances' instance numbers
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a's instance number is lower,
* a positive number if \p b's instance number is lower,
* or 0 if their instance numbers are the same
*/
gint
pcmk__cmp_instance_number(gconstpointer a, gconstpointer b)
{
const pe_resource_t *instance1 = (const pe_resource_t *) a;
const pe_resource_t *instance2 = (const pe_resource_t *) b;
char *div1 = NULL;
char *div2 = NULL;
CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
// Clone numbers are after a colon, bundle numbers after a dash
div1 = strrchr(instance1->id, ':');
if (div1 == NULL) {
div1 = strrchr(instance1->id, '-');
}
div2 = strrchr(instance2->id, ':');
if (div2 == NULL) {
div2 = strrchr(instance2->id, '-');
}
CRM_ASSERT((div1 != NULL) && (div2 != NULL));
return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10));
}
/*!
* \internal
* \brief Compare clone or bundle instances according to assignment order
*
* Compare two clone or bundle instances according to the order they should be
* assigned to nodes, preferring (in order):
*
* - Active instance that is less multiply active
* - Instance that is not active on a disallowed node
* - Instance with higher configured priority
* - Active instance whose current node can run resources
* - Active instance whose parent is allowed on current node
* - Active instance whose current node has fewer other instances
* - Active instance
* - Failed instance
* - Instance whose colocations result in higher score on current node
* - Instance with lower ID in lexicographic order
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a should be assigned first,
* a positive number if \p b should be assigned first,
* or 0 if assignment order doesn't matter
*/
gint
pcmk__cmp_instance(gconstpointer a, gconstpointer b)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
unsigned int nnodes1 = 0;
unsigned int nnodes2 = 0;
bool can1 = true;
bool can2 = true;
const pe_resource_t *instance1 = (const pe_resource_t *) a;
const pe_resource_t *instance2 = (const pe_resource_t *) b;
CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
node1 = pe__find_active_on(instance1, &nnodes1, NULL);
node2 = pe__find_active_on(instance2, &nnodes2, NULL);
/* If both instances are running and at least one is multiply
* active, prefer instance that's running on fewer nodes.
*/
if ((nnodes1 > 0) && (nnodes2 > 0)) {
if (nnodes1 < nnodes2) {
crm_trace("Assign %s (active on %d) before %s (active on %d): "
"less multiply active",
instance1->id, nnodes1, instance2->id, nnodes2);
return -1;
} else if (nnodes1 > nnodes2) {
crm_trace("Assign %s (active on %d) after %s (active on %d): "
"more multiply active",
instance1->id, nnodes1, instance2->id, nnodes2);
return 1;
}
}
/* An instance that is either inactive or active on an allowed node is
* preferred over an instance that is active on a no-longer-allowed node.
*/
can1 = node_is_allowed(instance1, &node1);
can2 = node_is_allowed(instance2, &node2);
if (can1 && !can2) {
crm_trace("Assign %s before %s: not active on a disallowed node",
instance1->id, instance2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("Assign %s after %s: active on a disallowed node",
instance1->id, instance2->id);
return 1;
}
// Prefer instance with higher configured priority
if (instance1->priority > instance2->priority) {
crm_trace("Assign %s before %s: priority (%d > %d)",
instance1->id, instance2->id,
instance1->priority, instance2->priority);
return -1;
} else if (instance1->priority < instance2->priority) {
crm_trace("Assign %s after %s: priority (%d < %d)",
instance1->id, instance2->id,
instance1->priority, instance2->priority);
return 1;
}
// Prefer active instance
if ((node1 == NULL) && (node2 == NULL)) {
crm_trace("No assignment preference for %s vs. %s: inactive",
instance1->id, instance2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("Assign %s after %s: active", instance1->id, instance2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("Assign %s before %s: active", instance1->id, instance2->id);
return -1;
}
// Prefer instance whose current node can run resources
can1 = pcmk__node_available(node1, false, false);
can2 = pcmk__node_available(node2, false, false);
if (can1 && !can2) {
crm_trace("Assign %s before %s: current node can run resources",
instance1->id, instance2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("Assign %s after %s: current node can't run resources",
instance1->id, instance2->id);
return 1;
}
// Prefer instance whose parent is allowed to run on instance's current node
node1 = pcmk__top_allowed_node(instance1, node1);
node2 = pcmk__top_allowed_node(instance2, node2);
if ((node1 == NULL) && (node2 == NULL)) {
crm_trace("No assignment preference for %s vs. %s: "
"parent not allowed on either instance's current node",
instance1->id, instance2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("Assign %s after %s: parent not allowed on current node",
instance1->id, instance2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("Assign %s before %s: parent allowed on current node",
instance1->id, instance2->id);
return -1;
}
// Prefer instance whose current node is running fewer other instances
if (node1->count < node2->count) {
crm_trace("Assign %s before %s: fewer active instances on current node",
instance1->id, instance2->id);
return -1;
} else if (node1->count > node2->count) {
crm_trace("Assign %s after %s: more active instances on current node",
instance1->id, instance2->id);
return 1;
}
// Prefer failed instance
can1 = did_fail(instance1);
can2 = did_fail(instance2);
if (!can1 && can2) {
crm_trace("Assign %s before %s: failed", instance1->id, instance2->id);
return -1;
} else if (can1 && !can2) {
crm_trace("Assign %s after %s: not failed",
instance1->id, instance2->id);
return 1;
}
// Prefer instance with higher cumulative colocation score on current node
rc = cmp_instance_by_colocation(instance1, instance2);
if (rc != 0) {
return rc;
}
// Prefer instance with lower instance number
rc = pcmk__cmp_instance_number(instance1, instance2);
if (rc < 0) {
crm_trace("Assign %s before %s: instance number",
instance1->id, instance2->id);
} else if (rc > 0) {
crm_trace("Assign %s after %s: instance number",
instance1->id, instance2->id);
} else {
crm_trace("No assignment preference for %s vs. %s",
instance1->id, instance2->id);
}
return rc;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jan 25, 11:01 AM (1 d, 7 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1322241
Default Alt Text
(220 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment