Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h
index 410cf0c9de..58ddc0ceaf 100644
--- a/include/crm/pengine/pe_types.h
+++ b/include/crm/pengine/pe_types.h
@@ -1,468 +1,467 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_PENGINE_PE_TYPES__H
# define PCMK__CRM_PENGINE_PE_TYPES__H
# include <stdbool.h> // bool
# include <sys/types.h> // time_t
# include <libxml/tree.h> // xmlNode
# include <glib.h> // gboolean, guint, GList, GHashTable
# include <crm/common/iso8601.h>
# include <crm/common/scheduler.h>
# include <crm/pengine/common.h>
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \file
* \brief Data types for cluster status
* \ingroup pengine
*/
typedef struct pe_node_s pe_node_t;
typedef struct pe_action_s pe_action_t;
typedef struct pe_resource_s pe_resource_t;
typedef struct pe_working_set_s pe_working_set_t;
typedef struct resource_object_functions_s {
gboolean (*unpack) (pe_resource_t*, pe_working_set_t*);
pe_resource_t *(*find_rsc) (pe_resource_t *parent, const char *search,
const pe_node_t *node, int flags);
/* parameter result must be free'd */
char *(*parameter) (pe_resource_t*, pe_node_t*, gboolean, const char*,
pe_working_set_t*);
//! \deprecated will be removed in a future release
void (*print) (pe_resource_t*, const char*, long, void*);
gboolean (*active) (pe_resource_t*, gboolean);
enum rsc_role_e (*state) (const pe_resource_t*, gboolean);
pe_node_t *(*location) (const pe_resource_t*, GList**, int);
void (*free) (pe_resource_t*);
void (*count) (pe_resource_t*);
gboolean (*is_filtered) (const pe_resource_t*, GList *, gboolean);
/*!
* \brief Find a node (and optionally count all) where resource is active
*
* \param[in] rsc Resource to check
* \param[out] count_all If not NULL, set this to count of active nodes
* \param[out] count_clean If not NULL, set this to count of clean nodes
*
* \return A node where the resource is active, preferring the source node
* if the resource is involved in a partial migration or a clean,
* online node if the resource's "requires" is "quorum" or
* "nothing", or NULL if the resource is inactive.
*/
pe_node_t *(*active_node)(const pe_resource_t *rsc, unsigned int *count_all,
unsigned int *count_clean);
/*!
* \brief Get maximum resource instances per node
*
* \param[in] rsc Resource to check
*
* \return Maximum number of \p rsc instances that can be active on one node
*/
unsigned int (*max_per_node)(const pe_resource_t *rsc);
} resource_object_functions_t;
typedef struct resource_alloc_functions_s resource_alloc_functions_t;
struct pe_working_set_s {
xmlNode *input;
crm_time_t *now;
/* options extracted from the input */
char *dc_uuid;
pe_node_t *dc_node;
const char *stonith_action;
const char *placement_strategy;
unsigned long long flags;
int stonith_timeout;
enum pe_quorum_policy no_quorum_policy;
GHashTable *config_hash;
GHashTable *tickets;
// Actions for which there can be only one (e.g. fence nodeX)
GHashTable *singletons;
GList *nodes;
GList *resources;
GList *placement_constraints;
GList *ordering_constraints;
GList *colocation_constraints;
GList *ticket_constraints;
GList *actions;
xmlNode *failed;
xmlNode *op_defaults;
xmlNode *rsc_defaults;
/* stats */
int num_synapse;
int max_valid_nodes; //! Deprecated (will be removed in a future release)
int order_id;
int action_id;
/* final output */
xmlNode *graph;
GHashTable *template_rsc_sets;
const char *localhost;
GHashTable *tags;
int blocked_resources;
int disabled_resources;
GList *param_check; // History entries that need to be checked
GList *stop_needed; // Containers that need stop actions
time_t recheck_by; // Hint to controller to re-run scheduler by this time
int ninstances; // Total number of resource instances
guint shutdown_lock;// How long (seconds) to lock resources to shutdown node
int priority_fencing_delay; // Priority fencing delay
void *priv;
guint node_pending_timeout; // Node pending timeout
};
struct pe_node_shared_s {
const char *id;
const char *uname;
enum node_type type;
/* @TODO convert these flags into a bitfield */
gboolean online;
gboolean standby;
gboolean standby_onfail;
gboolean pending;
gboolean unclean;
gboolean unseen;
gboolean shutdown;
gboolean expected_up;
gboolean is_dc;
gboolean maintenance;
gboolean rsc_discovery_enabled;
gboolean remote_requires_reset;
gboolean remote_was_fenced;
gboolean remote_maintenance; /* what the remote-rsc is thinking */
gboolean unpacked;
int num_resources;
pe_resource_t *remote_rsc;
GList *running_rsc; /* pe_resource_t* */
GList *allocated_rsc; /* pe_resource_t* */
GHashTable *attrs; /* char* => char* */
GHashTable *utilization;
GHashTable *digest_cache; //!< cache of calculated resource digests
int priority; // calculated based on the priority of resources running on the node
pe_working_set_t *data_set; //!< Cluster that this node is part of
};
struct pe_node_s {
int weight;
gboolean fixed; //!< \deprecated Will be removed in a future release
int count;
struct pe_node_shared_s *details;
int rsc_discover_mode;
};
-# define pe_rsc_provisional 0x00000100ULL
# define pe_rsc_allocating 0x00000200ULL
# define pe_rsc_merging 0x00000400ULL
# define pe_rsc_restarting 0x00000800ULL
# define pe_rsc_stop 0x00001000ULL
# define pe_rsc_reload 0x00002000ULL
# define pe_rsc_allow_remote_remotes 0x00004000ULL
# define pe_rsc_critical 0x00008000ULL
# define pe_rsc_failed 0x00010000ULL
# define pe_rsc_detect_loop 0x00020000ULL
# define pe_rsc_runnable 0x00040000ULL
# define pe_rsc_start_pending 0x00080000ULL
//!< \deprecated Do not use
# define pe_rsc_starting 0x00100000ULL
//!< \deprecated Do not use
# define pe_rsc_stopping 0x00200000ULL
# define pe_rsc_stop_unexpected 0x00400000ULL
# define pe_rsc_allow_migrate 0x00800000ULL
# define pe_rsc_failure_ignored 0x01000000ULL
# define pe_rsc_replica_container 0x02000000ULL
# define pe_rsc_maintenance 0x04000000ULL
# define pe_rsc_is_container 0x08000000ULL
# define pe_rsc_needs_quorum 0x10000000ULL
# define pe_rsc_needs_fencing 0x20000000ULL
# define pe_rsc_needs_unfencing 0x40000000ULL
/* *INDENT-OFF* */
enum pe_action_flags {
pe_action_pseudo = 0x00001,
pe_action_runnable = 0x00002,
pe_action_optional = 0x00004,
pe_action_print_always = 0x00008,
pe_action_have_node_attrs = 0x00010,
pe_action_implied_by_stonith = 0x00040,
pe_action_migrate_runnable = 0x00080,
pe_action_dumped = 0x00100,
pe_action_processed = 0x00200,
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
pe_action_clear = 0x00400, //! \deprecated Unused
#endif
pe_action_dangle = 0x00800,
/* This action requires one or more of its dependencies to be runnable.
* We use this to clear the runnable flag before checking dependencies.
*/
pe_action_requires_any = 0x01000,
pe_action_reschedule = 0x02000,
pe_action_tracking = 0x04000,
pe_action_dedup = 0x08000, //! Internal state tracking when creating graph
pe_action_dc = 0x10000, //! Action may run on DC instead of target
};
/* *INDENT-ON* */
struct pe_resource_s {
char *id;
char *clone_name;
xmlNode *xml;
xmlNode *orig_xml;
xmlNode *ops_xml;
pe_working_set_t *cluster;
pe_resource_t *parent;
enum pe_obj_types variant;
void *variant_opaque;
resource_object_functions_t *fns;
resource_alloc_functions_t *cmds;
enum rsc_recovery_type recovery_type;
enum pe_restart restart_type; //!< \deprecated will be removed in future release
int priority;
int stickiness;
int sort_index;
int failure_timeout;
int migration_threshold;
guint remote_reconnect_ms;
char *pending_task;
unsigned long long flags;
// @TODO merge these into flags
gboolean is_remote_node;
gboolean exclusive_discover;
/* Pay special attention to whether you want to use rsc_cons_lhs and
* rsc_cons directly, which include only colocations explicitly involving
* this resource, or call libpacemaker's pcmk__with_this_colocations() and
* pcmk__this_with_colocations() functions, which may return relevant
* colocations involving the resource's ancestors as well.
*/
//!@{
//! This field should be treated as internal to Pacemaker
GList *rsc_cons_lhs; // List of pcmk__colocation_t*
GList *rsc_cons; // List of pcmk__colocation_t*
GList *rsc_location; // List of pe__location_t*
GList *actions; // List of pe_action_t*
GList *rsc_tickets; // List of rsc_ticket*
//!@}
pe_node_t *allocated_to;
pe_node_t *partial_migration_target;
pe_node_t *partial_migration_source;
GList *running_on; /* pe_node_t* */
GHashTable *known_on; /* pe_node_t* */
GHashTable *allowed_nodes; /* pe_node_t* */
enum rsc_role_e role;
enum rsc_role_e next_role;
GHashTable *meta;
GHashTable *parameters; //! \deprecated Use pe_rsc_params() instead
GHashTable *utilization;
GList *children; /* pe_resource_t* */
GList *dangling_migrations; /* pe_node_t* */
pe_resource_t *container;
GList *fillers;
// @COMPAT These should be made const at next API compatibility break
pe_node_t *pending_node; // Node on which pending_task is happening
pe_node_t *lock_node; // Resource is shutdown-locked to this node
time_t lock_time; // When shutdown lock started
/* Resource parameters may have node-attribute-based rules, which means the
* values can vary by node. This table is a cache of parameter name/value
* tables for each node (as needed). Use pe_rsc_params() to get the table
* for a given node.
*/
GHashTable *parameter_cache; // Key = node name, value = parameters table
};
struct pe_action_s {
int id;
int priority;
pe_resource_t *rsc;
pe_node_t *node;
xmlNode *op_entry;
char *task;
char *uuid;
char *cancel_task;
char *reason;
enum pe_action_flags flags;
enum rsc_start_requirement needs;
enum action_fail_response on_fail;
enum rsc_role_e fail_role;
GHashTable *meta;
GHashTable *extra;
/*
* These two varables are associated with the constraint logic
* that involves first having one or more actions runnable before
* then allowing this action to execute.
*
* These varables are used with features such as 'clone-min' which
* requires at minimum X number of cloned instances to be running
* before an order dependency can run. Another option that uses
* this is 'require-all=false' in ordering constrants. This option
* says "only require one instance of a resource to start before
* allowing dependencies to start" -- basically, require-all=false is
* the same as clone-min=1.
*/
/* current number of known runnable actions in the before list. */
int runnable_before;
/* the number of "before" runnable actions required for this action
* to be considered runnable */
int required_runnable_before;
GList *actions_before; /* pe_action_wrapper_t* */
GList *actions_after; /* pe_action_wrapper_t* */
/* Some of the above fields could be moved to the details,
* except for API backward compatibility.
*/
void *action_details; // varies by type of action
};
typedef struct pe_ticket_s {
char *id;
gboolean granted;
time_t last_granted;
gboolean standby;
GHashTable *state;
} pe_ticket_t;
typedef struct pe_tag_s {
char *id;
GList *refs;
} pe_tag_t;
//! Internal tracking for transition graph creation
enum pe_link_state {
pe_link_not_dumped, //! Internal tracking for transition graph creation
pe_link_dumped, //! Internal tracking for transition graph creation
pe_link_dup, //! \deprecated No longer used by Pacemaker
};
enum pe_discover_e {
pe_discover_always = 0,
pe_discover_never,
pe_discover_exclusive,
};
/* *INDENT-OFF* */
enum pe_ordering {
pe_order_none = 0x0, /* deleted */
pe_order_optional = 0x1, /* pure ordering, nothing implied */
pe_order_apply_first_non_migratable = 0x2, /* Only apply this constraint's ordering if first is not migratable. */
pe_order_implies_first = 0x10, /* If 'then' is required, ensure 'first' is too */
pe_order_implies_then = 0x20, /* If 'first' is required, ensure 'then' is too */
pe_order_promoted_implies_first = 0x40, /* If 'then' is required and then's rsc is promoted, ensure 'first' becomes required too */
/* first requires then to be both runnable and migrate runnable. */
pe_order_implies_first_migratable = 0x80,
pe_order_runnable_left = 0x100, /* 'then' requires 'first' to be runnable */
pe_order_pseudo_left = 0x200, /* 'then' can only be pseudo if 'first' is runnable */
pe_order_implies_then_on_node = 0x400, /* If 'first' is required on 'nodeX',
* ensure instances of 'then' on 'nodeX' are too.
* Only really useful if 'then' is a clone and 'first' is not
*/
pe_order_probe = 0x800, /* If 'first->rsc' is
* - running but about to stop, ignore the constraint
* - otherwise, behave as runnable_left
*/
pe_order_restart = 0x1000, /* 'then' is runnable if 'first' is optional or runnable */
pe_order_stonith_stop = 0x2000, //<! \deprecated Will be removed in future release
pe_order_serialize_only = 0x4000, /* serialize */
pe_order_same_node = 0x8000, /* applies only if 'first' and 'then' are on same node */
pe_order_implies_first_printed = 0x10000, /* Like ..implies_first but only ensures 'first' is printed, not mandatory */
pe_order_implies_then_printed = 0x20000, /* Like ..implies_then but only ensures 'then' is printed, not mandatory */
pe_order_asymmetrical = 0x100000, /* Indicates asymmetrical one way ordering constraint. */
pe_order_load = 0x200000, /* Only relevant if... */
pe_order_one_or_more = 0x400000, /* 'then' is runnable only if one or more of its dependencies are too */
pe_order_anti_colocation = 0x800000,
pe_order_preserve = 0x1000000, /* Hack for breaking user ordering constraints with container resources */
pe_order_then_cancels_first = 0x2000000, // if 'then' becomes required, 'first' becomes optional
pe_order_trace = 0x4000000, /* test marker */
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
// \deprecated Use pe_order_promoted_implies_first instead
pe_order_implies_first_master = pe_order_promoted_implies_first,
#endif
};
/* *INDENT-ON* */
typedef struct pe_action_wrapper_s {
enum pe_ordering type;
enum pe_link_state state;
pe_action_t *action;
} pe_action_wrapper_t;
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
#include <crm/pengine/pe_types_compat.h>
#endif
#ifdef __cplusplus
}
#endif
#endif // PCMK__CRM_PENGINE_PE_TYPES__H
diff --git a/include/crm/pengine/pe_types_compat.h b/include/crm/pengine/pe_types_compat.h
index bcd1fa8e38..ff98db85a6 100644
--- a/include/crm/pengine/pe_types_compat.h
+++ b/include/crm/pengine/pe_types_compat.h
@@ -1,170 +1,173 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_PENGINE_PE_TYPES_COMPAT__H
# define PCMK__CRM_PENGINE_PE_TYPES_COMPAT__H
#include <crm/pengine/pe_types.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief Deprecated Pacemaker scheduler API
* \ingroup pengine
* \deprecated Do not include this header directly. The scheduler APIs in this
* header, and the header itself, will be removed in a future
* release.
*/
//! \deprecated Use pcmk_rsc_removed instead
#define pe_rsc_orphan pcmk_rsc_removed
//! \deprecated Use pcmk_rsc_managed instead
#define pe_rsc_managed pcmk_rsc_managed
//! \deprecated Use pcmk_rsc_blocked instead
#define pe_rsc_block pcmk_rsc_blocked
//! \deprecated Use pcmk_rsc_removed_filler instead
#define pe_rsc_orphan_container_filler pcmk_rsc_removed_filler
//! \deprecated Use pcmk_rsc_notify instead
#define pe_rsc_notify pcmk_rsc_notify
//! \deprecated Use pcmk_rsc_unique instead
#define pe_rsc_unique pcmk_rsc_unique
//! \deprecated Use pcmk_rsc_fence_device instead
#define pe_rsc_fence_device pcmk_rsc_fence_device
//! \deprecated Use pcmk_rsc_promotable instead
#define pe_rsc_promotable pcmk_rsc_promotable
+//! \deprecated Use pcmk_rsc_unassigned instead
+#define pe_rsc_provisional pcmk_rsc_unassigned
+
//! \deprecated Use pcmk_sched_quorate instead
#define pe_flag_have_quorum pcmk_sched_quorate
//! \deprecated Use pcmk_sched_symmetric_cluster instead
#define pe_flag_symmetric_cluster pcmk_sched_symmetric_cluster
//! \deprecated Use pcmk_sched_in_maintenance instead
#define pe_flag_maintenance_mode pcmk_sched_in_maintenance
//! \deprecated Use pcmk_sched_fencing_enabled instead
#define pe_flag_stonith_enabled pcmk_sched_fencing_enabled
//! \deprecated Use pcmk_sched_have_fencing instead
#define pe_flag_have_stonith_resource pcmk_sched_have_fencing
//! \deprecated Use pcmk_sched_enable_unfencing instead
#define pe_flag_enable_unfencing pcmk_sched_enable_unfencing
//! \deprecated Use pcmk_sched_concurrent_fencing instead
#define pe_flag_concurrent_fencing pcmk_sched_concurrent_fencing
//! \deprecated Use pcmk_sched_stop_removed_resources instead
#define pe_flag_stop_rsc_orphans pcmk_sched_stop_removed_resources
//! \deprecated Use pcmk_sched_cancel_removed_actions instead
#define pe_flag_stop_action_orphans pcmk_sched_cancel_removed_actions
//! \deprecated Use pcmk_sched_stop_all instead
#define pe_flag_stop_everything pcmk_sched_stop_all
//! \deprecated Use pcmk_sched_start_failure_fatal instead
#define pe_flag_start_failure_fatal pcmk_sched_start_failure_fatal
//! \deprecated Do not use
#define pe_flag_remove_after_stop pcmk_sched_remove_after_stop
//! \deprecated Use pcmk_sched_startup_fencing instead
#define pe_flag_startup_fencing pcmk_sched_startup_fencing
//! \deprecated Use pcmk_sched_shutdown_lock instead
#define pe_flag_shutdown_lock pcmk_sched_shutdown_lock
//! \deprecated Use pcmk_sched_probe_resources instead
#define pe_flag_startup_probes pcmk_sched_probe_resources
//! \deprecated Use pcmk_sched_have_status instead
#define pe_flag_have_status pcmk_sched_have_status
//! \deprecated Use pcmk_sched_have_remote_nodes instead
#define pe_flag_have_remote_nodes pcmk_sched_have_remote_nodes
//! \deprecated Use pcmk_sched_location_only instead
#define pe_flag_quick_location pcmk_sched_location_only
//! \deprecated Use pcmk_sched_sanitized instead
#define pe_flag_sanitized pcmk_sched_sanitized
//! \deprecated Do not use
#define pe_flag_stdout (1ULL << 22)
//! \deprecated Use pcmk_sched_no_counts instead
#define pe_flag_no_counts pcmk_sched_no_counts
//! \deprecated Use pcmk_sched_no_compat instead
#define pe_flag_no_compat pcmk_sched_no_compat
//! \deprecated Use pcmk_sched_output_scores instead
#define pe_flag_show_scores pcmk_sched_output_scores
//! \deprecated Use pcmk_sched_show_utilization instead
#define pe_flag_show_utilization pcmk_sched_show_utilization
//! \deprecated Use pcmk_sched_validate_only instead
#define pe_flag_check_config pcmk_sched_validate_only
//!@{
//! \deprecated Do not use (unused by Pacemaker)
enum pe_graph_flags {
pe_graph_none = 0x00000,
pe_graph_updated_first = 0x00001,
pe_graph_updated_then = 0x00002,
pe_graph_disable = 0x00004,
};
//!@}
//!@{
//! \deprecated Do not use
enum pe_check_parameters {
pe_check_last_failure,
pe_check_active,
};
//!@}
//!< \deprecated Use pe_action_t instead
typedef struct pe_action_s action_t;
//!< \deprecated Use pe_action_wrapper_t instead
typedef struct pe_action_wrapper_s action_wrapper_t;
//!< \deprecated Use pe_node_t instead
typedef struct pe_node_s node_t;
//!< \deprecated Use enum pe_quorum_policy instead
typedef enum pe_quorum_policy no_quorum_policy_t;
//!< \deprecated use pe_resource_t instead
typedef struct pe_resource_s resource_t;
//!< \deprecated Use pe_tag_t instead
typedef struct pe_tag_s tag_t;
//!< \deprecated Use pe_ticket_t instead
typedef struct pe_ticket_s ticket_t;
#ifdef __cplusplus
}
#endif
#endif // PCMK__CRM_PENGINE_PE_TYPES_COMPAT__H
diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c
index 60e323bb95..4d7c65ba25 100644
--- a/lib/pacemaker/pcmk_sched_bundle.c
+++ b/lib/pacemaker/pcmk_sched_bundle.c
@@ -1,1053 +1,1053 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
struct assign_data {
const pe_node_t *prefer;
bool stop_if_fail;
};
/*!
* \internal
* \brief Assign a single bundle replica's resources (other than container)
*
* \param[in,out] replica Replica to assign
* \param[in] user_data Preferred node, if any
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
assign_replica(pe__bundle_replica_t *replica, void *user_data)
{
pe_node_t *container_host = NULL;
struct assign_data *assign_data = user_data;
const pe_node_t *prefer = assign_data->prefer;
bool stop_if_fail = assign_data->stop_if_fail;
const pe_resource_t *bundle = pe__const_top_resource(replica->container,
true);
if (replica->ip != NULL) {
pe_rsc_trace(bundle, "Assigning bundle %s IP %s",
bundle->id, replica->ip->id);
replica->ip->cmds->assign(replica->ip, prefer, stop_if_fail);
}
container_host = replica->container->allocated_to;
if (replica->remote != NULL) {
if (pe__is_guest_or_remote_node(container_host)) {
/* REMOTE_CONTAINER_HACK: "Nested" connection resources must be on
* the same host because Pacemaker Remote only supports a single
* active connection.
*/
pcmk__new_colocation("#replica-remote-with-host-remote", NULL,
INFINITY, replica->remote,
container_host->details->remote_rsc, NULL,
NULL, pcmk__coloc_influence);
}
pe_rsc_trace(bundle, "Assigning bundle %s connection %s",
bundle->id, replica->remote->id);
replica->remote->cmds->assign(replica->remote, prefer, stop_if_fail);
}
if (replica->child != NULL) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, replica->child->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (!pe__same_node(node, replica->node)) {
node->weight = -INFINITY;
} else if (!pcmk__threshold_reached(replica->child, node, NULL)) {
node->weight = INFINITY;
}
}
pe__set_resource_flags(replica->child->parent, pe_rsc_allocating);
pe_rsc_trace(bundle, "Assigning bundle %s replica child %s",
bundle->id, replica->child->id);
replica->child->cmds->assign(replica->child, replica->node,
stop_if_fail);
pe__clear_resource_flags(replica->child->parent, pe_rsc_allocating);
}
return true;
}
/*!
* \internal
* \brief Assign a bundle resource to a node
*
* \param[in,out] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
* \param[in] stop_if_fail If \c true and a primitive descendant of \p rsc
* can't be assigned to a node, set the
* descendant's next role to stopped and update
* existing actions
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
pe_node_t *
pcmk__bundle_assign(pe_resource_t *rsc, const pe_node_t *prefer,
bool stop_if_fail)
{
GList *containers = NULL;
pe_resource_t *bundled_resource = NULL;
struct assign_data assign_data = { prefer, stop_if_fail };
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe_rsc_trace(rsc, "Assigning bundle %s", rsc->id);
pe__set_resource_flags(rsc, pe_rsc_allocating);
pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags,
pcmk_sched_output_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
// Assign all containers first, so we know what nodes the bundle will be on
containers = g_list_sort(pe__bundle_containers(rsc), pcmk__cmp_instance);
pcmk__assign_instances(rsc, containers, pe__bundle_max(rsc),
rsc->fns->max_per_node(rsc));
g_list_free(containers);
// Then assign remaining replica resources
pe__foreach_bundle_replica(rsc, assign_replica, (void *) &assign_data);
// Finally, assign the bundled resources to each bundle node
bundled_resource = pe__bundled_resource(rsc);
if (bundled_resource != NULL) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, bundled_resource->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
if (pe__node_is_bundle_instance(rsc, node)) {
node->weight = 0;
} else {
node->weight = -INFINITY;
}
}
bundled_resource->cmds->assign(bundled_resource, prefer, stop_if_fail);
}
- pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional);
+ pe__clear_resource_flags(rsc, pe_rsc_allocating|pcmk_rsc_unassigned);
return NULL;
}
/*!
* \internal
* \brief Create actions for a bundle replica's resources (other than child)
*
* \param[in,out] replica Replica to create actions for
* \param[in] user_data Unused
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
create_replica_actions(pe__bundle_replica_t *replica, void *user_data)
{
if (replica->ip != NULL) {
replica->ip->cmds->create_actions(replica->ip);
}
if (replica->container != NULL) {
replica->container->cmds->create_actions(replica->container);
}
if (replica->remote != NULL) {
replica->remote->cmds->create_actions(replica->remote);
}
return true;
}
/*!
* \internal
* \brief Create all actions needed for a given bundle resource
*
* \param[in,out] rsc Bundle resource to create actions for
*/
void
pcmk__bundle_create_actions(pe_resource_t *rsc)
{
pe_action_t *action = NULL;
GList *containers = NULL;
pe_resource_t *bundled_resource = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe__foreach_bundle_replica(rsc, create_replica_actions, NULL);
containers = pe__bundle_containers(rsc);
pcmk__create_instance_actions(rsc, containers);
g_list_free(containers);
bundled_resource = pe__bundled_resource(rsc);
if (bundled_resource != NULL) {
bundled_resource->cmds->create_actions(bundled_resource);
if (pcmk_is_set(bundled_resource->flags, pcmk_rsc_promotable)) {
pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_PROMOTE, true, true);
action = pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_PROMOTED,
true, true);
action->priority = INFINITY;
pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_DEMOTE, true, true);
action = pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_DEMOTED,
true, true);
action->priority = INFINITY;
}
}
}
/*!
* \internal
* \brief Create internal constraints for a bundle replica's resources
*
* \param[in,out] replica Replica to create internal constraints for
* \param[in,out] user_data Replica's parent bundle
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
replica_internal_constraints(pe__bundle_replica_t *replica, void *user_data)
{
pe_resource_t *bundle = user_data;
replica->container->cmds->internal_constraints(replica->container);
// Start bundle -> start replica container
pcmk__order_starts(bundle, replica->container,
pe_order_runnable_left|pe_order_implies_first_printed);
// Stop bundle -> stop replica child and container
if (replica->child != NULL) {
pcmk__order_stops(bundle, replica->child,
pe_order_implies_first_printed);
}
pcmk__order_stops(bundle, replica->container,
pe_order_implies_first_printed);
// Start replica container -> bundle is started
pcmk__order_resource_actions(replica->container, PCMK_ACTION_START, bundle,
PCMK_ACTION_RUNNING,
pe_order_implies_then_printed);
// Stop replica container -> bundle is stopped
pcmk__order_resource_actions(replica->container, PCMK_ACTION_STOP, bundle,
PCMK_ACTION_STOPPED,
pe_order_implies_then_printed);
if (replica->ip != NULL) {
replica->ip->cmds->internal_constraints(replica->ip);
// Replica IP address -> replica container (symmetric)
pcmk__order_starts(replica->ip, replica->container,
pe_order_runnable_left|pe_order_preserve);
pcmk__order_stops(replica->container, replica->ip,
pe_order_implies_first|pe_order_preserve);
pcmk__new_colocation("#ip-with-container", NULL, INFINITY, replica->ip,
replica->container, NULL, NULL,
pcmk__coloc_influence);
}
if (replica->remote != NULL) {
/* This handles ordering and colocating remote relative to container
* (via "#resource-with-container"). Since IP is also ordered and
* colocated relative to the container, we don't need to do anything
* explicit here with IP.
*/
replica->remote->cmds->internal_constraints(replica->remote);
}
if (replica->child != NULL) {
CRM_ASSERT(replica->remote != NULL);
// "Start remote then child" is implicit in scheduler's remote logic
}
return true;
}
/*!
* \internal
* \brief Create implicit constraints needed for a bundle resource
*
* \param[in,out] rsc Bundle resource to create implicit constraints for
*/
void
pcmk__bundle_internal_constraints(pe_resource_t *rsc)
{
pe_resource_t *bundled_resource = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe__foreach_bundle_replica(rsc, replica_internal_constraints, rsc);
bundled_resource = pe__bundled_resource(rsc);
if (bundled_resource == NULL) {
return;
}
// Start bundle -> start bundled clone
pcmk__order_resource_actions(rsc, PCMK_ACTION_START, bundled_resource,
PCMK_ACTION_START,
pe_order_implies_first_printed);
// Bundled clone is started -> bundle is started
pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_RUNNING,
rsc, PCMK_ACTION_RUNNING,
pe_order_implies_then_printed);
// Stop bundle -> stop bundled clone
pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP, bundled_resource,
PCMK_ACTION_STOP,
pe_order_implies_first_printed);
// Bundled clone is stopped -> bundle is stopped
pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_STOPPED,
rsc, PCMK_ACTION_STOPPED,
pe_order_implies_then_printed);
bundled_resource->cmds->internal_constraints(bundled_resource);
if (!pcmk_is_set(bundled_resource->flags, pcmk_rsc_promotable)) {
return;
}
pcmk__promotable_restart_ordering(rsc);
// Demote bundle -> demote bundled clone
pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTE, bundled_resource,
PCMK_ACTION_DEMOTE,
pe_order_implies_first_printed);
// Bundled clone is demoted -> bundle is demoted
pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_DEMOTED,
rsc, PCMK_ACTION_DEMOTED,
pe_order_implies_then_printed);
// Promote bundle -> promote bundled clone
pcmk__order_resource_actions(rsc, PCMK_ACTION_PROMOTE,
bundled_resource, PCMK_ACTION_PROMOTE,
pe_order_implies_first_printed);
// Bundled clone is promoted -> bundle is promoted
pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_PROMOTED,
rsc, PCMK_ACTION_PROMOTED,
pe_order_implies_then_printed);
}
struct match_data {
const pe_node_t *node; // Node to compare against replica
pe_resource_t *container; // Replica container corresponding to node
};
/*!
* \internal
* \brief Check whether a replica container is assigned to a given node
*
* \param[in] replica Replica to check
* \param[in,out] user_data struct match_data with node to compare against
*
* \return true if the replica does not match (to indicate further replicas
* should be processed), otherwise false
*/
static bool
match_replica_container(const pe__bundle_replica_t *replica, void *user_data)
{
struct match_data *match_data = user_data;
if (pcmk__instance_matches(replica->container, match_data->node,
pcmk_role_unknown, false)) {
match_data->container = replica->container;
return false; // Match found, don't bother searching further replicas
}
return true; // No match, keep searching
}
/*!
* \internal
* \brief Get the host to which a bundle node is assigned
*
* \param[in] node Possible bundle node to check
*
* \return Node to which the container for \p node is assigned if \p node is a
* bundle node, otherwise \p node itself
*/
static const pe_node_t *
get_bundle_node_host(const pe_node_t *node)
{
if (pe__is_bundle_node(node)) {
const pe_resource_t *container = node->details->remote_rsc->container;
return container->fns->location(container, NULL, 0);
}
return node;
}
/*!
* \internal
* \brief Find a bundle container compatible with a dependent resource
*
* \param[in] dependent Dependent resource in colocation with bundle
* \param[in] bundle Bundle that \p dependent is colocated with
*
* \return A container from \p bundle assigned to the same node as \p dependent
* if assigned, otherwise assigned to any of dependent's allowed nodes,
* otherwise NULL.
*/
static pe_resource_t *
compatible_container(const pe_resource_t *dependent,
const pe_resource_t *bundle)
{
GList *scratch = NULL;
struct match_data match_data = { NULL, NULL };
// If dependent is assigned, only check there
match_data.node = dependent->fns->location(dependent, NULL, 0);
match_data.node = get_bundle_node_host(match_data.node);
if (match_data.node != NULL) {
pe__foreach_const_bundle_replica(bundle, match_replica_container,
&match_data);
return match_data.container;
}
// Otherwise, check for any of the dependent's allowed nodes
scratch = g_hash_table_get_values(dependent->allowed_nodes);
scratch = pcmk__sort_nodes(scratch, NULL);
for (const GList *iter = scratch; iter != NULL; iter = iter->next) {
match_data.node = iter->data;
match_data.node = get_bundle_node_host(match_data.node);
if (match_data.node == NULL) {
continue;
}
pe__foreach_const_bundle_replica(bundle, match_replica_container,
&match_data);
if (match_data.container != NULL) {
break;
}
}
g_list_free(scratch);
return match_data.container;
}
struct coloc_data {
const pcmk__colocation_t *colocation;
pe_resource_t *dependent;
GList *container_hosts;
};
/*!
* \internal
* \brief Apply a colocation score to replica node scores or resource priority
*
* \param[in] replica Replica of primary bundle resource in colocation
* \param[in,out] user_data struct coloc_data for colocation being applied
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
replica_apply_coloc_score(const pe__bundle_replica_t *replica, void *user_data)
{
struct coloc_data *coloc_data = user_data;
pe_node_t *chosen = NULL;
if (coloc_data->colocation->score < INFINITY) {
replica->container->cmds->apply_coloc_score(coloc_data->dependent,
replica->container,
coloc_data->colocation,
false);
return true;
}
chosen = replica->container->fns->location(replica->container, NULL, 0);
if ((chosen == NULL)
|| is_set_recursive(replica->container, pcmk_rsc_blocked, true)) {
return true;
}
if ((coloc_data->colocation->primary_role >= pcmk_role_promoted)
&& ((replica->child == NULL)
|| (replica->child->next_role < pcmk_role_promoted))) {
return true;
}
pe_rsc_trace(pe__const_top_resource(replica->container, true),
"Allowing mandatory colocation %s using %s @%d",
coloc_data->colocation->id, pe__node_name(chosen),
chosen->weight);
coloc_data->container_hosts = g_list_prepend(coloc_data->container_hosts,
chosen);
return true;
}
/*!
* \internal
* \brief Apply a colocation's score to node scores or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node scores (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__bundle_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent)
{
struct coloc_data coloc_data = { colocation, dependent, NULL };
/* This should never be called for the bundle itself as a dependent.
* Instead, we add its colocation constraints to its containers and bundled
* primitive and call the apply_coloc_score() method for them as dependents.
*/
CRM_ASSERT((primary != NULL)
&& (primary->variant == pcmk_rsc_variant_bundle)
&& (dependent != NULL)
&& (dependent->variant == pcmk_rsc_variant_primitive)
&& (colocation != NULL) && !for_dependent);
- if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
+ if (pcmk_is_set(primary->flags, pcmk_rsc_unassigned)) {
pe_rsc_trace(primary,
"Skipping applying colocation %s "
"because %s is still provisional",
colocation->id, primary->id);
return;
}
pe_rsc_trace(primary, "Applying colocation %s (%s with %s at %s)",
colocation->id, dependent->id, primary->id,
pcmk_readable_score(colocation->score));
/* If the constraint dependent is a clone or bundle, "dependent" here is one
* of its instances. Look for a compatible instance of this bundle.
*/
if (colocation->dependent->variant > pcmk_rsc_variant_group) {
const pe_resource_t *primary_container = compatible_container(dependent,
primary);
if (primary_container != NULL) { // Success, we found one
pe_rsc_debug(primary, "Pairing %s with %s",
dependent->id, primary_container->id);
dependent->cmds->apply_coloc_score(dependent, primary_container,
colocation, true);
} else if (colocation->score >= INFINITY) { // Failure, and it's fatal
crm_notice("%s cannot run because there is no compatible "
"instance of %s to colocate with",
dependent->id, primary->id);
pcmk__assign_resource(dependent, NULL, true, true);
} else { // Failure, but we can ignore it
pe_rsc_debug(primary,
"%s cannot be colocated with any instance of %s",
dependent->id, primary->id);
}
return;
}
pe__foreach_const_bundle_replica(primary, replica_apply_coloc_score,
&coloc_data);
if (colocation->score >= INFINITY) {
pcmk__colocation_intersect_nodes(dependent, primary, colocation,
coloc_data.container_hosts, false);
}
g_list_free(coloc_data.container_hosts);
}
// Bundle implementation of resource_alloc_functions_t:with_this_colocations()
void
pcmk__with_bundle_colocations(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList **list)
{
const pe_resource_t *bundled_rsc = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle)
&& (orig_rsc != NULL) && (list != NULL));
// The bundle itself and its containers always get its colocations
if ((orig_rsc == rsc)
|| pcmk_is_set(orig_rsc->flags, pe_rsc_replica_container)) {
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
return;
}
/* The bundled resource gets the colocations if it's promotable and we've
* begun choosing roles
*/
bundled_rsc = pe__bundled_resource(rsc);
if ((bundled_rsc == NULL)
|| !pcmk_is_set(bundled_rsc->flags, pcmk_rsc_promotable)
|| (pe__const_top_resource(orig_rsc, false) != bundled_rsc)) {
return;
}
if (orig_rsc == bundled_rsc) {
if (pe__clone_flag_is_set(orig_rsc, pe__clone_promotion_constrained)) {
/* orig_rsc is the clone and we're setting roles (or have already
* done so)
*/
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
}
- } else if (!pcmk_is_set(orig_rsc->flags, pe_rsc_provisional)) {
+ } else if (!pcmk_is_set(orig_rsc->flags, pcmk_rsc_unassigned)) {
/* orig_rsc is an instance and is already assigned. If something
* requests colocations for orig_rsc now, it's for setting roles.
*/
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
}
}
// Bundle implementation of resource_alloc_functions_t:this_with_colocations()
void
pcmk__bundle_with_colocations(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList **list)
{
const pe_resource_t *bundled_rsc = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle)
&& (orig_rsc != NULL) && (list != NULL));
// The bundle itself and its containers always get its colocations
if ((orig_rsc == rsc)
|| pcmk_is_set(orig_rsc->flags, pe_rsc_replica_container)) {
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
return;
}
/* The bundled resource gets the colocations if it's promotable and we've
* begun choosing roles
*/
bundled_rsc = pe__bundled_resource(rsc);
if ((bundled_rsc == NULL)
|| !pcmk_is_set(bundled_rsc->flags, pcmk_rsc_promotable)
|| (pe__const_top_resource(orig_rsc, false) != bundled_rsc)) {
return;
}
if (orig_rsc == bundled_rsc) {
if (pe__clone_flag_is_set(orig_rsc, pe__clone_promotion_constrained)) {
/* orig_rsc is the clone and we're setting roles (or have already
* done so)
*/
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
}
- } else if (!pcmk_is_set(orig_rsc->flags, pe_rsc_provisional)) {
+ } else if (!pcmk_is_set(orig_rsc->flags, pcmk_rsc_unassigned)) {
/* orig_rsc is an instance and is already assigned. If something
* requests colocations for orig_rsc now, it's for setting roles.
*/
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
}
}
/*!
* \internal
* \brief Return action flags for a given bundle resource action
*
* \param[in,out] action Bundle resource action to get flags for
* \param[in] node If not NULL, limit effects to this node
*
* \return Flags appropriate to \p action on \p node
*/
uint32_t
pcmk__bundle_action_flags(pe_action_t *action, const pe_node_t *node)
{
GList *containers = NULL;
uint32_t flags = 0;
pe_resource_t *bundled_resource = NULL;
CRM_ASSERT((action != NULL) && (action->rsc != NULL)
&& (action->rsc->variant == pcmk_rsc_variant_bundle));
bundled_resource = pe__bundled_resource(action->rsc);
if (bundled_resource != NULL) {
// Clone actions are done on the bundled clone resource, not container
switch (get_complex_task(bundled_resource, action->task)) {
case pcmk_action_unspecified:
case pcmk_action_notify:
case pcmk_action_notified:
case pcmk_action_promote:
case pcmk_action_promoted:
case pcmk_action_demote:
case pcmk_action_demoted:
return pcmk__collective_action_flags(action,
bundled_resource->children,
node);
default:
break;
}
}
containers = pe__bundle_containers(action->rsc);
flags = pcmk__collective_action_flags(action, containers, node);
g_list_free(containers);
return flags;
}
/*!
* \internal
* \brief Apply a location constraint to a bundle replica
*
* \param[in,out] replica Replica to apply constraint to
* \param[in,out] user_data Location constraint to apply
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
apply_location_to_replica(pe__bundle_replica_t *replica, void *user_data)
{
pe__location_t *location = user_data;
if (replica->container != NULL) {
replica->container->cmds->apply_location(replica->container, location);
}
if (replica->ip != NULL) {
replica->ip->cmds->apply_location(replica->ip, location);
}
return true;
}
/*!
* \internal
* \brief Apply a location constraint to a bundle resource's allowed node scores
*
* \param[in,out] rsc Bundle resource to apply constraint to
* \param[in,out] location Location constraint to apply
*/
void
pcmk__bundle_apply_location(pe_resource_t *rsc, pe__location_t *location)
{
pe_resource_t *bundled_resource = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle)
&& (location != NULL));
pcmk__apply_location(rsc, location);
pe__foreach_bundle_replica(rsc, apply_location_to_replica, location);
bundled_resource = pe__bundled_resource(rsc);
if ((bundled_resource != NULL)
&& ((location->role_filter == pcmk_role_unpromoted)
|| (location->role_filter == pcmk_role_promoted))) {
bundled_resource->cmds->apply_location(bundled_resource, location);
bundled_resource->rsc_location = g_list_prepend(
bundled_resource->rsc_location, location);
}
}
#define XPATH_REMOTE "//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']"
/*!
* \internal
* \brief Add a bundle replica's actions to transition graph
*
* \param[in,out] replica Replica to add to graph
* \param[in] user_data Bundle that replica belongs to (for logging only)
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
add_replica_actions_to_graph(pe__bundle_replica_t *replica, void *user_data)
{
if ((replica->remote != NULL) && (replica->container != NULL)
&& pe__bundle_needs_remote_name(replica->remote)) {
/* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that
* run pacemaker-remoted inside, without needing a separate IP for
* the container. This is done by configuring the inner remote's
* connection host as the magic string "#uname", then
* replacing it with the underlying host when needed.
*/
xmlNode *nvpair = get_xpath_object(XPATH_REMOTE, replica->remote->xml,
LOG_ERR);
const char *calculated_addr = NULL;
// Replace the value in replica->remote->xml (if appropriate)
calculated_addr = pe__add_bundle_remote_name(replica->remote,
replica->remote->cluster,
nvpair, "value");
if (calculated_addr != NULL) {
/* Since this is for the bundle as a resource, and not any
* particular action, replace the value in the default
* parameters (not evaluated for node). create_graph_action()
* will grab it from there to replace it in node-evaluated
* parameters.
*/
GHashTable *params = pe_rsc_params(replica->remote,
NULL, replica->remote->cluster);
g_hash_table_replace(params,
strdup(XML_RSC_ATTR_REMOTE_RA_ADDR),
strdup(calculated_addr));
} else {
pe_resource_t *bundle = user_data;
/* The only way to get here is if the remote connection is
* neither currently running nor scheduled to run. That means we
* won't be doing any operations that require addr (only start
* requires it; we additionally use it to compare digests when
* unpacking status, promote, and migrate_from history, but
* that's already happened by this point).
*/
pe_rsc_info(bundle,
"Unable to determine address for bundle %s "
"remote connection", bundle->id);
}
}
if (replica->ip != NULL) {
replica->ip->cmds->add_actions_to_graph(replica->ip);
}
if (replica->container != NULL) {
replica->container->cmds->add_actions_to_graph(replica->container);
}
if (replica->remote != NULL) {
replica->remote->cmds->add_actions_to_graph(replica->remote);
}
return true;
}
/*!
* \internal
* \brief Add a bundle resource's actions to the transition graph
*
* \param[in,out] rsc Bundle resource whose actions should be added
*/
void
pcmk__bundle_add_actions_to_graph(pe_resource_t *rsc)
{
pe_resource_t *bundled_resource = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
bundled_resource = pe__bundled_resource(rsc);
if (bundled_resource != NULL) {
bundled_resource->cmds->add_actions_to_graph(bundled_resource);
}
pe__foreach_bundle_replica(rsc, add_replica_actions_to_graph, rsc);
}
struct probe_data {
pe_resource_t *bundle; // Bundle being probed
pe_node_t *node; // Node to create probes on
bool any_created; // Whether any probes have been created
};
/*!
* \internal
* \brief Order a bundle replica's start after another replica's probe
*
* \param[in,out] replica Replica to order start for
* \param[in,out] user_data Replica with probe to order after
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
order_replica_start_after(pe__bundle_replica_t *replica, void *user_data)
{
pe__bundle_replica_t *probed_replica = user_data;
if ((replica == probed_replica) || (replica->container == NULL)) {
return true;
}
pcmk__new_ordering(probed_replica->container,
pcmk__op_key(probed_replica->container->id,
PCMK_ACTION_MONITOR, 0),
NULL, replica->container,
pcmk__op_key(replica->container->id, PCMK_ACTION_START,
0),
NULL, pe_order_optional|pe_order_same_node,
replica->container->cluster);
return true;
}
/*!
* \internal
* \brief Create probes for a bundle replica's resources
*
* \param[in,out] replica Replica to create probes for
* \param[in,out] user_data struct probe_data
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
create_replica_probes(pe__bundle_replica_t *replica, void *user_data)
{
struct probe_data *probe_data = user_data;
if ((replica->ip != NULL)
&& replica->ip->cmds->create_probe(replica->ip, probe_data->node)) {
probe_data->any_created = true;
}
if ((replica->child != NULL)
&& pe__same_node(probe_data->node, replica->node)
&& replica->child->cmds->create_probe(replica->child,
probe_data->node)) {
probe_data->any_created = true;
}
if ((replica->container != NULL)
&& replica->container->cmds->create_probe(replica->container,
probe_data->node)) {
probe_data->any_created = true;
/* If we're limited to one replica per host (due to
* the lack of an IP range probably), then we don't
* want any of our peer containers starting until
* we've established that no other copies are already
* running.
*
* Partly this is to ensure that the maximum replicas per host is
* observed, but also to ensure that the containers
* don't fail to start because the necessary port
* mappings (which won't include an IP for uniqueness)
* are already taken
*/
if (probe_data->bundle->fns->max_per_node(probe_data->bundle) == 1) {
pe__foreach_bundle_replica(probe_data->bundle,
order_replica_start_after, replica);
}
}
if ((replica->container != NULL) && (replica->remote != NULL)
&& replica->remote->cmds->create_probe(replica->remote,
probe_data->node)) {
/* Do not probe the remote resource until we know where the container is
* running. This is required for REMOTE_CONTAINER_HACK to correctly
* probe remote resources.
*/
char *probe_uuid = pcmk__op_key(replica->remote->id,
PCMK_ACTION_MONITOR, 0);
pe_action_t *probe = find_first_action(replica->remote->actions,
probe_uuid, NULL,
probe_data->node);
free(probe_uuid);
if (probe != NULL) {
probe_data->any_created = true;
pe_rsc_trace(probe_data->bundle, "Ordering %s probe on %s",
replica->remote->id, pe__node_name(probe_data->node));
pcmk__new_ordering(replica->container,
pcmk__op_key(replica->container->id,
PCMK_ACTION_START, 0),
NULL, replica->remote, NULL, probe,
pe_order_probe, probe_data->bundle->cluster);
}
}
return true;
}
/*!
* \internal
*
* \brief Schedule any probes needed for a bundle resource on a node
*
* \param[in,out] rsc Bundle resource to create probes for
* \param[in,out] node Node to create probe on
*
* \return true if any probe was created, otherwise false
*/
bool
pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node)
{
struct probe_data probe_data = { rsc, node, false };
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe__foreach_bundle_replica(rsc, create_replica_probes, &probe_data);
return probe_data.any_created;
}
/*!
* \internal
* \brief Output actions for one bundle replica
*
* \param[in,out] replica Replica to output actions for
* \param[in] user_data Unused
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
output_replica_actions(pe__bundle_replica_t *replica, void *user_data)
{
if (replica->ip != NULL) {
replica->ip->cmds->output_actions(replica->ip);
}
if (replica->container != NULL) {
replica->container->cmds->output_actions(replica->container);
}
if (replica->remote != NULL) {
replica->remote->cmds->output_actions(replica->remote);
}
if (replica->child != NULL) {
replica->child->cmds->output_actions(replica->child);
}
return true;
}
/*!
* \internal
* \brief Output a summary of scheduled actions for a bundle resource
*
* \param[in,out] rsc Bundle resource to output actions for
*/
void
pcmk__output_bundle_actions(pe_resource_t *rsc)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe__foreach_bundle_replica(rsc, output_replica_actions, NULL);
}
// Bundle implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__bundle_add_utilization(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization)
{
pe_resource_t *container = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
- if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
return;
}
/* All bundle replicas are identical, so using the utilization of the first
* is sufficient for any. Only the implicit container resource can have
* utilization values.
*/
container = pe__first_container(rsc);
if (container != NULL) {
container->cmds->add_utilization(container, orig_rsc, all_rscs,
utilization);
}
}
// Bundle implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__bundle_shutdown_lock(pe_resource_t *rsc)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
// Bundles currently don't support shutdown locks
}
diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c
index 1adaac4a74..ab9ded60fc 100644
--- a/lib/pacemaker/pcmk_sched_clone.c
+++ b/lib/pacemaker/pcmk_sched_clone.c
@@ -1,709 +1,709 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Assign a clone resource's instances to nodes
*
* \param[in,out] rsc Clone resource to assign
* \param[in] prefer Node to prefer, if all else is equal
* \param[in] stop_if_fail If \c true and a primitive descendant of \p rsc
* can't be assigned to a node, set the
* descendant's next role to stopped and update
* existing actions
*
* \return NULL (clones are not assigned to a single node)
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
pe_node_t *
pcmk__clone_assign(pe_resource_t *rsc, const pe_node_t *prefer,
bool stop_if_fail)
{
GList *colocations = NULL;
CRM_ASSERT(pe_rsc_is_clone(rsc));
- if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
return NULL; // Assignment has already been done
}
// Detect assignment loops
if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
return NULL;
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
// If this clone is promotable, consider nodes' promotion scores
if (pcmk_is_set(rsc->flags, pcmk_rsc_promotable)) {
pcmk__add_promotion_scores(rsc);
}
// If this clone is colocated with any other resources, assign those first
colocations = pcmk__this_with_colocations(rsc);
for (GList *iter = colocations; iter != NULL; iter = iter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) iter->data;
pe_rsc_trace(rsc, "%s: Assigning colocation %s primary %s first",
rsc->id, constraint->id, constraint->primary->id);
constraint->primary->cmds->assign(constraint->primary, prefer,
stop_if_fail);
}
g_list_free(colocations);
// If any resources are colocated with this one, consider their preferences
colocations = pcmk__with_this_colocations(rsc);
g_list_foreach(colocations, pcmk__add_dependent_scores, rsc);
g_list_free(colocations);
pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags,
pcmk_sched_output_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance);
pcmk__assign_instances(rsc, rsc->children, pe__clone_max(rsc),
pe__clone_node_max(rsc));
if (pcmk_is_set(rsc->flags, pcmk_rsc_promotable)) {
pcmk__set_instance_roles(rsc);
}
- pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating);
+ pe__clear_resource_flags(rsc, pcmk_rsc_unassigned|pe_rsc_allocating);
pe_rsc_trace(rsc, "Assigned clone %s", rsc->id);
return NULL;
}
/*!
* \internal
* \brief Create all actions needed for a given clone resource
*
* \param[in,out] rsc Clone resource to create actions for
*/
void
pcmk__clone_create_actions(pe_resource_t *rsc)
{
CRM_ASSERT(pe_rsc_is_clone(rsc));
pe_rsc_trace(rsc, "Creating actions for clone %s", rsc->id);
pcmk__create_instance_actions(rsc, rsc->children);
if (pcmk_is_set(rsc->flags, pcmk_rsc_promotable)) {
pcmk__create_promotable_actions(rsc);
}
}
/*!
* \internal
* \brief Create implicit constraints needed for a clone resource
*
* \param[in,out] rsc Clone resource to create implicit constraints for
*/
void
pcmk__clone_internal_constraints(pe_resource_t *rsc)
{
bool ordered = false;
CRM_ASSERT(pe_rsc_is_clone(rsc));
pe_rsc_trace(rsc, "Creating internal constraints for clone %s", rsc->id);
// Restart ordering: Stop -> stopped -> start -> started
pcmk__order_resource_actions(rsc, PCMK_ACTION_STOPPED,
rsc, PCMK_ACTION_START,
pe_order_optional);
pcmk__order_resource_actions(rsc, PCMK_ACTION_START,
rsc, PCMK_ACTION_RUNNING,
pe_order_runnable_left);
pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP,
rsc, PCMK_ACTION_STOPPED,
pe_order_runnable_left);
// Demoted -> stop and started -> promote
if (pcmk_is_set(rsc->flags, pcmk_rsc_promotable)) {
pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTED,
rsc, PCMK_ACTION_STOP,
pe_order_optional);
pcmk__order_resource_actions(rsc, PCMK_ACTION_RUNNING,
rsc, PCMK_ACTION_PROMOTE,
pe_order_runnable_left);
}
ordered = pe__clone_is_ordered(rsc);
if (ordered) {
/* Ordered clone instances must start and stop by instance number. The
* instances might have been previously shuffled for assignment or
* promotion purposes, so re-sort them.
*/
rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number);
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
instance->cmds->internal_constraints(instance);
// Start clone -> start instance -> clone started
pcmk__order_starts(rsc, instance, pe_order_runnable_left
|pe_order_implies_first_printed);
pcmk__order_resource_actions(instance, PCMK_ACTION_START,
rsc, PCMK_ACTION_RUNNING,
pe_order_implies_then_printed);
// Stop clone -> stop instance -> clone stopped
pcmk__order_stops(rsc, instance, pe_order_implies_first_printed);
pcmk__order_resource_actions(instance, PCMK_ACTION_STOP,
rsc, PCMK_ACTION_STOPPED,
pe_order_implies_then_printed);
/* Instances of ordered clones must be started and stopped by instance
* number. Since only some instances may be starting or stopping, order
* each instance relative to every later instance.
*/
if (ordered) {
for (GList *later = iter->next;
later != NULL; later = later->next) {
pcmk__order_starts(instance, (pe_resource_t *) later->data,
pe_order_optional);
pcmk__order_stops((pe_resource_t *) later->data, instance,
pe_order_optional);
}
}
}
if (pcmk_is_set(rsc->flags, pcmk_rsc_promotable)) {
pcmk__order_promotable_instances(rsc);
}
}
/*!
* \internal
* \brief Check whether colocated resources can be interleaved
*
* \param[in] colocation Colocation constraint with clone as primary
*
* \return true if colocated resources can be interleaved, otherwise false
*/
static bool
can_interleave(const pcmk__colocation_t *colocation)
{
const pe_resource_t *dependent = colocation->dependent;
// Only colocations between clone or bundle resources use interleaving
if (dependent->variant <= pcmk_rsc_variant_group) {
return false;
}
// Only the dependent needs to be marked for interleaving
if (!crm_is_true(g_hash_table_lookup(dependent->meta,
XML_RSC_ATTR_INTERLEAVE))) {
return false;
}
/* @TODO Do we actually care about multiple primary instances sharing a
* dependent instance?
*/
if (dependent->fns->max_per_node(dependent)
!= colocation->primary->fns->max_per_node(colocation->primary)) {
pcmk__config_err("Cannot interleave %s and %s because they do not "
"support the same number of instances per node",
dependent->id, colocation->primary->id);
return false;
}
return true;
}
/*!
* \internal
* \brief Apply a colocation's score to node scores or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node scores (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__clone_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent)
{
const GList *iter = NULL;
/* This should never be called for the clone itself as a dependent. Instead,
* we add its colocation constraints to its instances and call the
* apply_coloc_score() method for the instances as dependents.
*/
CRM_ASSERT(!for_dependent);
CRM_ASSERT((colocation != NULL) && pe_rsc_is_clone(primary)
&& (dependent != NULL)
&& (dependent->variant == pcmk_rsc_variant_primitive));
- if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
+ if (pcmk_is_set(primary->flags, pcmk_rsc_unassigned)) {
pe_rsc_trace(primary,
"Delaying processing colocation %s "
"because cloned primary %s is still provisional",
colocation->id, primary->id);
return;
}
pe_rsc_trace(primary, "Processing colocation %s (%s with clone %s @%s)",
colocation->id, dependent->id, primary->id,
pcmk_readable_score(colocation->score));
// Apply role-specific colocations
if (pcmk_is_set(primary->flags, pcmk_rsc_promotable)
&& (colocation->primary_role != pcmk_role_unknown)) {
- if (pcmk_is_set(dependent->flags, pe_rsc_provisional)) {
+ if (pcmk_is_set(dependent->flags, pcmk_rsc_unassigned)) {
// We're assigning the dependent to a node
pcmk__update_dependent_with_promotable(primary, dependent,
colocation);
return;
}
if (colocation->dependent_role == pcmk_role_promoted) {
// We're choosing a role for the dependent
pcmk__update_promotable_dependent_priority(primary, dependent,
colocation);
return;
}
}
// Apply interleaved colocations
if (can_interleave(colocation)) {
const pe_resource_t *primary_instance = NULL;
primary_instance = pcmk__find_compatible_instance(dependent, primary,
pcmk_role_unknown,
false);
if (primary_instance != NULL) {
pe_rsc_debug(primary, "Interleaving %s with %s",
dependent->id, primary_instance->id);
dependent->cmds->apply_coloc_score(dependent, primary_instance,
colocation, true);
} else if (colocation->score >= INFINITY) {
crm_notice("%s cannot run because it cannot interleave with "
"any instance of %s", dependent->id, primary->id);
pcmk__assign_resource(dependent, NULL, true, true);
} else {
pe_rsc_debug(primary,
"%s will not colocate with %s "
"because no instance can interleave with it",
dependent->id, primary->id);
}
return;
}
// Apply mandatory colocations
if (colocation->score >= INFINITY) {
GList *primary_nodes = NULL;
// Dependent can run only where primary will have unblocked instances
for (iter = primary->children; iter != NULL; iter = iter->next) {
const pe_resource_t *instance = iter->data;
pe_node_t *chosen = instance->fns->location(instance, NULL, 0);
if ((chosen != NULL)
&& !is_set_recursive(instance, pcmk_rsc_blocked, TRUE)) {
pe_rsc_trace(primary, "Allowing %s: %s %d",
colocation->id, pe__node_name(chosen),
chosen->weight);
primary_nodes = g_list_prepend(primary_nodes, chosen);
}
}
pcmk__colocation_intersect_nodes(dependent, primary, colocation,
primary_nodes, false);
g_list_free(primary_nodes);
return;
}
// Apply optional colocations
for (iter = primary->children; iter != NULL; iter = iter->next) {
const pe_resource_t *instance = iter->data;
instance->cmds->apply_coloc_score(dependent, instance, colocation,
false);
}
}
// Clone implementation of resource_alloc_functions_t:with_this_colocations()
void
pcmk__with_clone_colocations(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList **list)
{
CRM_CHECK((rsc != NULL) && (orig_rsc != NULL) && (list != NULL), return);
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
if (rsc->parent != NULL) {
rsc->parent->cmds->with_this_colocations(rsc->parent, orig_rsc, list);
}
}
// Clone implementation of resource_alloc_functions_t:this_with_colocations()
void
pcmk__clone_with_colocations(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList **list)
{
CRM_CHECK((rsc != NULL) && (orig_rsc != NULL) && (list != NULL), return);
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
if (rsc->parent != NULL) {
rsc->parent->cmds->this_with_colocations(rsc->parent, orig_rsc, list);
}
}
/*!
* \internal
* \brief Return action flags for a given clone resource action
*
* \param[in,out] action Action to get flags for
* \param[in] node If not NULL, limit effects to this node
*
* \return Flags appropriate to \p action on \p node
*/
uint32_t
pcmk__clone_action_flags(pe_action_t *action, const pe_node_t *node)
{
CRM_ASSERT((action != NULL) && pe_rsc_is_clone(action->rsc));
return pcmk__collective_action_flags(action, action->rsc->children, node);
}
/*!
* \internal
* \brief Apply a location constraint to a clone resource's allowed node scores
*
* \param[in,out] rsc Clone resource to apply constraint to
* \param[in,out] location Location constraint to apply
*/
void
pcmk__clone_apply_location(pe_resource_t *rsc, pe__location_t *location)
{
CRM_CHECK((location != NULL) && pe_rsc_is_clone(rsc), return);
pcmk__apply_location(rsc, location);
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
instance->cmds->apply_location(instance, location);
}
}
// GFunc wrapper for calling the action_flags() resource method
static void
call_action_flags(gpointer data, gpointer user_data)
{
pe_resource_t *rsc = user_data;
rsc->cmds->action_flags((pe_action_t *) data, NULL);
}
/*!
* \internal
* \brief Add a clone resource's actions to the transition graph
*
* \param[in,out] rsc Resource whose actions should be added
*/
void
pcmk__clone_add_actions_to_graph(pe_resource_t *rsc)
{
CRM_ASSERT(pe_rsc_is_clone(rsc));
g_list_foreach(rsc->actions, call_action_flags, rsc);
pe__create_clone_notifications(rsc);
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
child_rsc->cmds->add_actions_to_graph(child_rsc);
}
pcmk__add_rsc_actions_to_graph(rsc);
pe__free_clone_notification_data(rsc);
}
/*!
* \internal
* \brief Check whether a resource or any children have been probed on a node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p node is in the known_on table of \p rsc or any of its
* children, otherwise false
*/
static bool
rsc_probed_on(const pe_resource_t *rsc, const pe_node_t *node)
{
if (rsc->children != NULL) {
for (GList *child_iter = rsc->children; child_iter != NULL;
child_iter = child_iter->next) {
pe_resource_t *child = (pe_resource_t *) child_iter->data;
if (rsc_probed_on(child, node)) {
return true;
}
}
return false;
}
if (rsc->known_on != NULL) {
GHashTableIter iter;
pe_node_t *known_node = NULL;
g_hash_table_iter_init(&iter, rsc->known_on);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) {
if (pe__same_node(node, known_node)) {
return true;
}
}
}
return false;
}
/*!
* \internal
* \brief Find clone instance that has been probed on given node
*
* \param[in] clone Clone resource to check
* \param[in] node Node to check
*
* \return Instance of \p clone that has been probed on \p node if any,
* otherwise NULL
*/
static pe_resource_t *
find_probed_instance_on(const pe_resource_t *clone, const pe_node_t *node)
{
for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
if (rsc_probed_on(instance, node)) {
return instance;
}
}
return NULL;
}
/*!
* \internal
* \brief Probe an anonymous clone on a node
*
* \param[in,out] clone Anonymous clone to probe
* \param[in,out] node Node to probe \p clone on
*/
static bool
probe_anonymous_clone(pe_resource_t *clone, pe_node_t *node)
{
// Check whether we already probed an instance on this node
pe_resource_t *child = find_probed_instance_on(clone, node);
// Otherwise, check if we plan to start an instance on this node
for (GList *iter = clone->children; (iter != NULL) && (child == NULL);
iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
const pe_node_t *instance_node = NULL;
instance_node = instance->fns->location(instance, NULL, 0);
if (pe__same_node(instance_node, node)) {
child = instance;
}
}
// Otherwise, use the first clone instance
if (child == NULL) {
child = clone->children->data;
}
// Anonymous clones only need to probe a single instance
return child->cmds->create_probe(child, node);
}
/*!
* \internal
* \brief Schedule any probes needed for a resource on a node
*
* \param[in,out] rsc Resource to create probe for
* \param[in,out] node Node to create probe on
*
* \return true if any probe was created, otherwise false
*/
bool
pcmk__clone_create_probe(pe_resource_t *rsc, pe_node_t *node)
{
CRM_ASSERT((node != NULL) && pe_rsc_is_clone(rsc));
if (rsc->exclusive_discover) {
/* The clone is configured to be probed only where a location constraint
* exists with resource-discovery set to exclusive.
*
* This check is not strictly necessary here since the instance's
* create_probe() method would also check, but doing it here is more
* efficient (especially for unique clones with a large number of
* instances), and affects the CRM_meta_notify_available_uname variable
* passed with notify actions.
*/
pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes,
node->details->id);
if ((allowed == NULL)
|| (allowed->rsc_discover_mode != pe_discover_exclusive)) {
/* This node is not marked for resource discovery. Remove it from
* allowed_nodes so that notifications contain only nodes that the
* clone can possibly run on.
*/
pe_rsc_trace(rsc,
"Skipping probe for %s on %s because resource has "
"exclusive discovery but is not allowed on node",
rsc->id, pe__node_name(node));
g_hash_table_remove(rsc->allowed_nodes, node->details->id);
return false;
}
}
rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number);
if (pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
return pcmk__probe_resource_list(rsc->children, node);
} else {
return probe_anonymous_clone(rsc, node);
}
}
/*!
* \internal
* \brief Add meta-attributes relevant to transition graph actions to XML
*
* Add clone-specific meta-attributes needed for transition graph actions.
*
* \param[in] rsc Clone resource whose meta-attributes should be added
* \param[in,out] xml Transition graph action attributes XML to add to
*/
void
pcmk__clone_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml)
{
char *name = NULL;
CRM_ASSERT(pe_rsc_is_clone(rsc) && (xml != NULL));
name = crm_meta_name(XML_RSC_ATTR_UNIQUE);
crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pcmk_rsc_unique));
free(name);
name = crm_meta_name(XML_RSC_ATTR_NOTIFY);
crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pcmk_rsc_notify));
free(name);
name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX);
crm_xml_add_int(xml, name, pe__clone_max(rsc));
free(name);
name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX);
crm_xml_add_int(xml, name, pe__clone_node_max(rsc));
free(name);
if (pcmk_is_set(rsc->flags, pcmk_rsc_promotable)) {
int promoted_max = pe__clone_promoted_max(rsc);
int promoted_node_max = pe__clone_promoted_node_max(rsc);
name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX);
crm_xml_add_int(xml, name, promoted_max);
free(name);
name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX);
crm_xml_add_int(xml, name, promoted_node_max);
free(name);
/* @COMPAT Maintain backward compatibility with resource agents that
* expect the old names (deprecated since 2.0.0).
*/
name = crm_meta_name(PCMK_XA_PROMOTED_MAX_LEGACY);
crm_xml_add_int(xml, name, promoted_max);
free(name);
name = crm_meta_name(PCMK_XA_PROMOTED_NODE_MAX_LEGACY);
crm_xml_add_int(xml, name, promoted_node_max);
free(name);
}
}
// Clone implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__clone_add_utilization(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization)
{
bool existing = false;
pe_resource_t *child = NULL;
CRM_ASSERT(pe_rsc_is_clone(rsc) && (orig_rsc != NULL)
&& (utilization != NULL));
- if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
return;
}
// Look for any child already existing in the list
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
child = (pe_resource_t *) iter->data;
if (g_list_find(all_rscs, child)) {
existing = true; // Keep checking remaining children
} else {
// If this is a clone of a group, look for group's members
for (GList *member_iter = child->children; member_iter != NULL;
member_iter = member_iter->next) {
pe_resource_t *member = (pe_resource_t *) member_iter->data;
if (g_list_find(all_rscs, member) != NULL) {
// Add *child's* utilization, not group member's
child->cmds->add_utilization(child, orig_rsc, all_rscs,
utilization);
existing = true;
break;
}
}
}
}
if (!existing && (rsc->children != NULL)) {
// If nothing was found, still add first child's utilization
child = (pe_resource_t *) rsc->children->data;
child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization);
}
}
// Clone implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__clone_shutdown_lock(pe_resource_t *rsc)
{
CRM_ASSERT(pe_rsc_is_clone(rsc));
return; // Clones currently don't support shutdown locks
}
diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c
index cee11b0127..e1f7318d0b 100644
--- a/lib/pacemaker/pcmk_sched_colocation.c
+++ b/lib/pacemaker/pcmk_sched_colocation.c
@@ -1,1906 +1,1906 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <glib.h>
#include <crm/crm.h>
#include <crm/common/scheduler_internal.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
#include "crm/common/util.h"
#include "crm/common/xml_internal.h"
#include "crm/msg_xml.h"
#include "libpacemaker_private.h"
// Used to temporarily mark a node as unusable
#define INFINITY_HACK (INFINITY * -100)
/*!
* \internal
* \brief Compare two colocations according to priority
*
* Compare two colocations according to the order in which they should be
* considered, based on either their dependent resources or their primary
* resources -- preferring (in order):
* * Colocation that is not \c NULL
* * Colocation whose resource has higher priority
* * Colocation whose resource is of a higher-level variant
* (bundle > clone > group > primitive)
* * Colocation whose resource is promotable, if both are clones
* * Colocation whose resource has lower ID in lexicographic order
*
* \param[in] colocation1 First colocation to compare
* \param[in] colocation2 Second colocation to compare
* \param[in] dependent If \c true, compare colocations by dependent
* priority; otherwise compare them by primary priority
*
* \return A negative number if \p colocation1 should be considered first,
* a positive number if \p colocation2 should be considered first,
* or 0 if order doesn't matter
*/
static gint
cmp_colocation_priority(const pcmk__colocation_t *colocation1,
const pcmk__colocation_t *colocation2, bool dependent)
{
const pe_resource_t *rsc1 = NULL;
const pe_resource_t *rsc2 = NULL;
if (colocation1 == NULL) {
return 1;
}
if (colocation2 == NULL) {
return -1;
}
if (dependent) {
rsc1 = colocation1->dependent;
rsc2 = colocation2->dependent;
CRM_ASSERT(colocation1->primary != NULL);
} else {
rsc1 = colocation1->primary;
rsc2 = colocation2->primary;
CRM_ASSERT(colocation1->dependent != NULL);
}
CRM_ASSERT((rsc1 != NULL) && (rsc2 != NULL));
if (rsc1->priority > rsc2->priority) {
return -1;
}
if (rsc1->priority < rsc2->priority) {
return 1;
}
// Process clones before primitives and groups
if (rsc1->variant > rsc2->variant) {
return -1;
}
if (rsc1->variant < rsc2->variant) {
return 1;
}
/* @COMPAT scheduler <2.0.0: Process promotable clones before nonpromotable
* clones (probably unnecessary, but avoids having to update regression
* tests)
*/
if (rsc1->variant == pcmk_rsc_variant_clone) {
if (pcmk_is_set(rsc1->flags, pcmk_rsc_promotable)
&& !pcmk_is_set(rsc2->flags, pcmk_rsc_promotable)) {
return -1;
}
if (!pcmk_is_set(rsc1->flags, pcmk_rsc_promotable)
&& pcmk_is_set(rsc2->flags, pcmk_rsc_promotable)) {
return 1;
}
}
return strcmp(rsc1->id, rsc2->id);
}
/*!
* \internal
* \brief Compare two colocations according to priority based on dependents
*
* Compare two colocations according to the order in which they should be
* considered, based on their dependent resources -- preferring (in order):
* * Colocation that is not \c NULL
* * Colocation whose resource has higher priority
* * Colocation whose resource is of a higher-level variant
* (bundle > clone > group > primitive)
* * Colocation whose resource is promotable, if both are clones
* * Colocation whose resource has lower ID in lexicographic order
*
* \param[in] a First colocation to compare
* \param[in] b Second colocation to compare
*
* \return A negative number if \p a should be considered first,
* a positive number if \p b should be considered first,
* or 0 if order doesn't matter
*/
static gint
cmp_dependent_priority(gconstpointer a, gconstpointer b)
{
return cmp_colocation_priority(a, b, true);
}
/*!
* \internal
* \brief Compare two colocations according to priority based on primaries
*
* Compare two colocations according to the order in which they should be
* considered, based on their primary resources -- preferring (in order):
* * Colocation that is not \c NULL
* * Colocation whose primary has higher priority
* * Colocation whose primary is of a higher-level variant
* (bundle > clone > group > primitive)
* * Colocation whose primary is promotable, if both are clones
* * Colocation whose primary has lower ID in lexicographic order
*
* \param[in] a First colocation to compare
* \param[in] b Second colocation to compare
*
* \return A negative number if \p a should be considered first,
* a positive number if \p b should be considered first,
* or 0 if order doesn't matter
*/
static gint
cmp_primary_priority(gconstpointer a, gconstpointer b)
{
return cmp_colocation_priority(a, b, false);
}
/*!
* \internal
* \brief Add a "this with" colocation constraint to a sorted list
*
* \param[in,out] list List of constraints to add \p colocation to
* \param[in] colocation Colocation constraint to add to \p list
* \param[in] rsc Resource whose colocations we're getting (for
* logging only)
*
* \note The list will be sorted using cmp_primary_priority().
*/
void
pcmk__add_this_with(GList **list, const pcmk__colocation_t *colocation,
const pe_resource_t *rsc)
{
CRM_ASSERT((list != NULL) && (colocation != NULL) && (rsc != NULL));
pe_rsc_trace(rsc,
"Adding colocation %s (%s with %s using %s @%s) to "
"'this with' list for %s",
colocation->id, colocation->dependent->id,
colocation->primary->id, colocation->node_attribute,
pcmk_readable_score(colocation->score), rsc->id);
*list = g_list_insert_sorted(*list, (gpointer) colocation,
cmp_primary_priority);
}
/*!
* \internal
* \brief Add a list of "this with" colocation constraints to a list
*
* \param[in,out] list List of constraints to add \p addition to
* \param[in] addition List of colocation constraints to add to \p list
* \param[in] rsc Resource whose colocations we're getting (for
* logging only)
*
* \note The lists must be pre-sorted by cmp_primary_priority().
*/
void
pcmk__add_this_with_list(GList **list, GList *addition,
const pe_resource_t *rsc)
{
CRM_ASSERT((list != NULL) && (rsc != NULL));
pcmk__if_tracing(
{}, // Always add each colocation individually if tracing
{
if (*list == NULL) {
// Trivial case for efficiency if not tracing
*list = g_list_copy(addition);
return;
}
}
);
for (const GList *iter = addition; iter != NULL; iter = iter->next) {
pcmk__add_this_with(list, addition->data, rsc);
}
}
/*!
* \internal
* \brief Add a "with this" colocation constraint to a sorted list
*
* \param[in,out] list List of constraints to add \p colocation to
* \param[in] colocation Colocation constraint to add to \p list
* \param[in] rsc Resource whose colocations we're getting (for
* logging only)
*
* \note The list will be sorted using cmp_dependent_priority().
*/
void
pcmk__add_with_this(GList **list, const pcmk__colocation_t *colocation,
const pe_resource_t *rsc)
{
CRM_ASSERT((list != NULL) && (colocation != NULL) && (rsc != NULL));
pe_rsc_trace(rsc,
"Adding colocation %s (%s with %s using %s @%s) to "
"'with this' list for %s",
colocation->id, colocation->dependent->id,
colocation->primary->id, colocation->node_attribute,
pcmk_readable_score(colocation->score), rsc->id);
*list = g_list_insert_sorted(*list, (gpointer) colocation,
cmp_dependent_priority);
}
/*!
* \internal
* \brief Add a list of "with this" colocation constraints to a list
*
* \param[in,out] list List of constraints to add \p addition to
* \param[in] addition List of colocation constraints to add to \p list
* \param[in] rsc Resource whose colocations we're getting (for
* logging only)
*
* \note The lists must be pre-sorted by cmp_dependent_priority().
*/
void
pcmk__add_with_this_list(GList **list, GList *addition,
const pe_resource_t *rsc)
{
CRM_ASSERT((list != NULL) && (rsc != NULL));
pcmk__if_tracing(
{}, // Always add each colocation individually if tracing
{
if (*list == NULL) {
// Trivial case for efficiency if not tracing
*list = g_list_copy(addition);
return;
}
}
);
for (const GList *iter = addition; iter != NULL; iter = iter->next) {
pcmk__add_with_this(list, addition->data, rsc);
}
}
/*!
* \internal
* \brief Add orderings necessary for an anti-colocation constraint
*
* \param[in,out] first_rsc One resource in an anti-colocation
* \param[in] first_role Anti-colocation role of \p first_rsc
* \param[in] then_rsc Other resource in the anti-colocation
* \param[in] then_role Anti-colocation role of \p then_rsc
*/
static void
anti_colocation_order(pe_resource_t *first_rsc, int first_role,
pe_resource_t *then_rsc, int then_role)
{
const char *first_tasks[] = { NULL, NULL };
const char *then_tasks[] = { NULL, NULL };
/* Actions to make first_rsc lose first_role */
if (first_role == pcmk_role_promoted) {
first_tasks[0] = PCMK_ACTION_DEMOTE;
} else {
first_tasks[0] = PCMK_ACTION_STOP;
if (first_role == pcmk_role_unpromoted) {
first_tasks[1] = PCMK_ACTION_PROMOTE;
}
}
/* Actions to make then_rsc gain then_role */
if (then_role == pcmk_role_promoted) {
then_tasks[0] = PCMK_ACTION_PROMOTE;
} else {
then_tasks[0] = PCMK_ACTION_START;
if (then_role == pcmk_role_unpromoted) {
then_tasks[1] = PCMK_ACTION_DEMOTE;
}
}
for (int first_lpc = 0;
(first_lpc <= 1) && (first_tasks[first_lpc] != NULL); first_lpc++) {
for (int then_lpc = 0;
(then_lpc <= 1) && (then_tasks[then_lpc] != NULL); then_lpc++) {
pcmk__order_resource_actions(first_rsc, first_tasks[first_lpc],
then_rsc, then_tasks[then_lpc],
pe_order_anti_colocation);
}
}
}
/*!
* \internal
* \brief Add a new colocation constraint to a cluster working set
*
* \param[in] id XML ID for this constraint
* \param[in] node_attr Colocate by this attribute (NULL for #uname)
* \param[in] score Constraint score
* \param[in,out] dependent Resource to be colocated
* \param[in,out] primary Resource to colocate \p dependent with
* \param[in] dependent_role Current role of \p dependent
* \param[in] primary_role Current role of \p primary
* \param[in] flags Group of enum pcmk__coloc_flags
*/
void
pcmk__new_colocation(const char *id, const char *node_attr, int score,
pe_resource_t *dependent, pe_resource_t *primary,
const char *dependent_role, const char *primary_role,
uint32_t flags)
{
pcmk__colocation_t *new_con = NULL;
CRM_CHECK(id != NULL, return);
if ((dependent == NULL) || (primary == NULL)) {
pcmk__config_err("Ignoring colocation '%s' because resource "
"does not exist", id);
return;
}
if (score == 0) {
pe_rsc_trace(dependent,
"Ignoring colocation '%s' (%s with %s) because score is 0",
id, dependent->id, primary->id);
return;
}
new_con = calloc(1, sizeof(pcmk__colocation_t));
CRM_ASSERT(new_con != NULL);
if (pcmk__str_eq(dependent_role, PCMK__ROLE_STARTED,
pcmk__str_null_matches|pcmk__str_casei)) {
dependent_role = PCMK__ROLE_UNKNOWN;
}
if (pcmk__str_eq(primary_role, PCMK__ROLE_STARTED,
pcmk__str_null_matches|pcmk__str_casei)) {
primary_role = PCMK__ROLE_UNKNOWN;
}
new_con->id = id;
new_con->dependent = dependent;
new_con->primary = primary;
new_con->score = score;
new_con->dependent_role = text2role(dependent_role);
new_con->primary_role = text2role(primary_role);
new_con->node_attribute = pcmk__s(node_attr, CRM_ATTR_UNAME);
new_con->flags = flags;
pcmk__add_this_with(&(dependent->rsc_cons), new_con, dependent);
pcmk__add_with_this(&(primary->rsc_cons_lhs), new_con, primary);
dependent->cluster->colocation_constraints = g_list_prepend(
dependent->cluster->colocation_constraints, new_con);
if (score <= -INFINITY) {
anti_colocation_order(dependent, new_con->dependent_role, primary,
new_con->primary_role);
anti_colocation_order(primary, new_con->primary_role, dependent,
new_con->dependent_role);
}
}
/*!
* \internal
* \brief Return the boolean influence corresponding to configuration
*
* \param[in] coloc_id Colocation XML ID (for error logging)
* \param[in] rsc Resource involved in constraint (for default)
* \param[in] influence_s String value of influence option
*
* \return pcmk__coloc_influence if string evaluates true, or string is NULL or
* invalid and resource's critical option evaluates true, otherwise
* pcmk__coloc_none
*/
static uint32_t
unpack_influence(const char *coloc_id, const pe_resource_t *rsc,
const char *influence_s)
{
if (influence_s != NULL) {
int influence_i = 0;
if (crm_str_to_boolean(influence_s, &influence_i) < 0) {
pcmk__config_err("Constraint '%s' has invalid value for "
XML_COLOC_ATTR_INFLUENCE " (using default)",
coloc_id);
} else {
return (influence_i == 0)? pcmk__coloc_none : pcmk__coloc_influence;
}
}
if (pcmk_is_set(rsc->flags, pe_rsc_critical)) {
return pcmk__coloc_influence;
}
return pcmk__coloc_none;
}
static void
unpack_colocation_set(xmlNode *set, int score, const char *coloc_id,
const char *influence_s, pe_working_set_t *data_set)
{
xmlNode *xml_rsc = NULL;
pe_resource_t *other = NULL;
pe_resource_t *resource = NULL;
const char *set_id = ID(set);
const char *role = crm_element_value(set, "role");
bool with_previous = false;
int local_score = score;
bool sequential = false;
uint32_t flags = pcmk__coloc_none;
const char *xml_rsc_id = NULL;
const char *score_s = crm_element_value(set, XML_RULE_ATTR_SCORE);
if (score_s) {
local_score = char2score(score_s);
}
if (local_score == 0) {
crm_trace("Ignoring colocation '%s' for set '%s' because score is 0",
coloc_id, set_id);
return;
}
/* @COMPAT The deprecated "ordering" attribute specifies whether resources
* in a positive-score set are colocated with the previous or next resource.
*/
if (pcmk__str_eq(crm_element_value(set, "ordering"), "group",
pcmk__str_null_matches|pcmk__str_casei)) {
with_previous = true;
} else {
pe_warn_once(pe_wo_set_ordering,
"Support for 'ordering' other than 'group' in "
XML_CONS_TAG_RSC_SET " (such as %s) is deprecated and "
"will be removed in a future release", set_id);
}
if ((pcmk__xe_get_bool_attr(set, "sequential", &sequential) == pcmk_rc_ok)
&& !sequential) {
return;
}
if (local_score > 0) {
for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
xml_rsc_id = ID(xml_rsc);
resource = pcmk__find_constraint_resource(data_set->resources,
xml_rsc_id);
if (resource == NULL) {
// Should be possible only with validation disabled
pcmk__config_err("Ignoring %s and later resources in set %s: "
"No such resource", xml_rsc_id, set_id);
return;
}
if (other != NULL) {
flags = pcmk__coloc_explicit
| unpack_influence(coloc_id, resource, influence_s);
if (with_previous) {
pe_rsc_trace(resource, "Colocating %s with %s in set %s",
resource->id, other->id, set_id);
pcmk__new_colocation(set_id, NULL, local_score, resource,
other, role, role, flags);
} else {
pe_rsc_trace(resource, "Colocating %s with %s in set %s",
other->id, resource->id, set_id);
pcmk__new_colocation(set_id, NULL, local_score, other,
resource, role, role, flags);
}
}
other = resource;
}
} else {
/* Anti-colocating with every prior resource is
* the only way to ensure the intuitive result
* (i.e. that no one in the set can run with anyone else in the set)
*/
for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
xmlNode *xml_rsc_with = NULL;
xml_rsc_id = ID(xml_rsc);
resource = pcmk__find_constraint_resource(data_set->resources,
xml_rsc_id);
if (resource == NULL) {
// Should be possible only with validation disabled
pcmk__config_err("Ignoring %s and later resources in set %s: "
"No such resource", xml_rsc_id, set_id);
return;
}
flags = pcmk__coloc_explicit
| unpack_influence(coloc_id, resource, influence_s);
for (xml_rsc_with = first_named_child(set, XML_TAG_RESOURCE_REF);
xml_rsc_with != NULL;
xml_rsc_with = crm_next_same_xml(xml_rsc_with)) {
xml_rsc_id = ID(xml_rsc_with);
if (pcmk__str_eq(resource->id, xml_rsc_id, pcmk__str_none)) {
break;
}
other = pcmk__find_constraint_resource(data_set->resources,
xml_rsc_id);
CRM_ASSERT(other != NULL); // We already processed it
pcmk__new_colocation(set_id, NULL, local_score,
resource, other, role, role, flags);
}
}
}
}
/*!
* \internal
* \brief Colocate two resource sets relative to each other
*
* \param[in] id Colocation XML ID
* \param[in] set1 Dependent set
* \param[in] set2 Primary set
* \param[in] score Colocation score
* \param[in] influence_s Value of colocation's "influence" attribute
* \param[in,out] data_set Cluster working set
*/
static void
colocate_rsc_sets(const char *id, const xmlNode *set1, const xmlNode *set2,
int score, const char *influence_s,
pe_working_set_t *data_set)
{
xmlNode *xml_rsc = NULL;
pe_resource_t *rsc_1 = NULL;
pe_resource_t *rsc_2 = NULL;
const char *xml_rsc_id = NULL;
const char *role_1 = crm_element_value(set1, "role");
const char *role_2 = crm_element_value(set2, "role");
int rc = pcmk_rc_ok;
bool sequential = false;
uint32_t flags = pcmk__coloc_none;
if (score == 0) {
crm_trace("Ignoring colocation '%s' between sets %s and %s "
"because score is 0", id, ID(set1), ID(set2));
return;
}
rc = pcmk__xe_get_bool_attr(set1, "sequential", &sequential);
if ((rc != pcmk_rc_ok) || sequential) {
// Get the first one
xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
if (xml_rsc != NULL) {
xml_rsc_id = ID(xml_rsc);
rsc_1 = pcmk__find_constraint_resource(data_set->resources,
xml_rsc_id);
if (rsc_1 == NULL) {
// Should be possible only with validation disabled
pcmk__config_err("Ignoring colocation of set %s with set %s "
"because first resource %s not found",
ID(set1), ID(set2), xml_rsc_id);
return;
}
}
}
rc = pcmk__xe_get_bool_attr(set2, "sequential", &sequential);
if ((rc != pcmk_rc_ok) || sequential) {
// Get the last one
for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
xml_rsc_id = ID(xml_rsc);
}
rsc_2 = pcmk__find_constraint_resource(data_set->resources, xml_rsc_id);
if (rsc_2 == NULL) {
// Should be possible only with validation disabled
pcmk__config_err("Ignoring colocation of set %s with set %s "
"because last resource %s not found",
ID(set1), ID(set2), xml_rsc_id);
return;
}
}
if ((rsc_1 != NULL) && (rsc_2 != NULL)) { // Both sets are sequential
flags = pcmk__coloc_explicit | unpack_influence(id, rsc_1, influence_s);
pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2,
flags);
} else if (rsc_1 != NULL) { // Only set1 is sequential
flags = pcmk__coloc_explicit | unpack_influence(id, rsc_1, influence_s);
for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
xml_rsc_id = ID(xml_rsc);
rsc_2 = pcmk__find_constraint_resource(data_set->resources,
xml_rsc_id);
if (rsc_2 == NULL) {
// Should be possible only with validation disabled
pcmk__config_err("Ignoring set %s colocation with resource %s "
"in set %s: No such resource",
ID(set1), xml_rsc_id, ID(set2));
continue;
}
pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1,
role_2, flags);
}
} else if (rsc_2 != NULL) { // Only set2 is sequential
for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
xml_rsc_id = ID(xml_rsc);
rsc_1 = pcmk__find_constraint_resource(data_set->resources,
xml_rsc_id);
if (rsc_1 == NULL) {
// Should be possible only with validation disabled
pcmk__config_err("Ignoring colocation of set %s resource %s "
"with set %s: No such resource",
ID(set1), xml_rsc_id, ID(set2));
continue;
}
flags = pcmk__coloc_explicit
| unpack_influence(id, rsc_1, influence_s);
pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1,
role_2, flags);
}
} else { // Neither set is sequential
for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF);
xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) {
xmlNode *xml_rsc_2 = NULL;
xml_rsc_id = ID(xml_rsc);
rsc_1 = pcmk__find_constraint_resource(data_set->resources,
xml_rsc_id);
if (rsc_1 == NULL) {
// Should be possible only with validation disabled
pcmk__config_err("Ignoring colocation of set %s resource %s "
"with set %s: No such resource",
ID(set1), xml_rsc_id, ID(set2));
continue;
}
flags = pcmk__coloc_explicit
| unpack_influence(id, rsc_1, influence_s);
for (xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF);
xml_rsc_2 != NULL;
xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) {
xml_rsc_id = ID(xml_rsc_2);
rsc_2 = pcmk__find_constraint_resource(data_set->resources,
xml_rsc_id);
if (rsc_2 == NULL) {
// Should be possible only with validation disabled
pcmk__config_err("Ignoring colocation of set %s resource "
"%s with set %s resource %s: No such "
"resource", ID(set1), ID(xml_rsc),
ID(set2), xml_rsc_id);
continue;
}
pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2,
role_1, role_2, flags);
}
}
}
}
static void
unpack_simple_colocation(xmlNode *xml_obj, const char *id,
const char *influence_s, pe_working_set_t *data_set)
{
int score_i = 0;
uint32_t flags = pcmk__coloc_none;
const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
const char *dependent_id = crm_element_value(xml_obj,
XML_COLOC_ATTR_SOURCE);
const char *primary_id = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET);
const char *dependent_role = crm_element_value(xml_obj,
XML_COLOC_ATTR_SOURCE_ROLE);
const char *primary_role = crm_element_value(xml_obj,
XML_COLOC_ATTR_TARGET_ROLE);
const char *attr = crm_element_value(xml_obj, XML_COLOC_ATTR_NODE_ATTR);
const char *primary_instance = NULL;
const char *dependent_instance = NULL;
pe_resource_t *primary = NULL;
pe_resource_t *dependent = NULL;
primary = pcmk__find_constraint_resource(data_set->resources, primary_id);
dependent = pcmk__find_constraint_resource(data_set->resources,
dependent_id);
// @COMPAT: Deprecated since 2.1.5
primary_instance = crm_element_value(xml_obj,
XML_COLOC_ATTR_TARGET_INSTANCE);
dependent_instance = crm_element_value(xml_obj,
XML_COLOC_ATTR_SOURCE_INSTANCE);
if (dependent_instance != NULL) {
pe_warn_once(pe_wo_coloc_inst,
"Support for " XML_COLOC_ATTR_SOURCE_INSTANCE " is "
"deprecated and will be removed in a future release.");
}
if (primary_instance != NULL) {
pe_warn_once(pe_wo_coloc_inst,
"Support for " XML_COLOC_ATTR_TARGET_INSTANCE " is "
"deprecated and will be removed in a future release.");
}
if (dependent == NULL) {
pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
"does not exist", id, dependent_id);
return;
} else if (primary == NULL) {
pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
"does not exist", id, primary_id);
return;
} else if ((dependent_instance != NULL) && !pe_rsc_is_clone(dependent)) {
pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
"is not a clone but instance '%s' was requested",
id, dependent_id, dependent_instance);
return;
} else if ((primary_instance != NULL) && !pe_rsc_is_clone(primary)) {
pcmk__config_err("Ignoring constraint '%s' because resource '%s' "
"is not a clone but instance '%s' was requested",
id, primary_id, primary_instance);
return;
}
if (dependent_instance != NULL) {
dependent = find_clone_instance(dependent, dependent_instance);
if (dependent == NULL) {
pcmk__config_warn("Ignoring constraint '%s' because resource '%s' "
"does not have an instance '%s'",
id, dependent_id, dependent_instance);
return;
}
}
if (primary_instance != NULL) {
primary = find_clone_instance(primary, primary_instance);
if (primary == NULL) {
pcmk__config_warn("Ignoring constraint '%s' because resource '%s' "
"does not have an instance '%s'",
"'%s'", id, primary_id, primary_instance);
return;
}
}
if (pcmk__xe_attr_is_true(xml_obj, XML_CONS_ATTR_SYMMETRICAL)) {
pcmk__config_warn("The colocation constraint '"
XML_CONS_ATTR_SYMMETRICAL
"' attribute has been removed");
}
if (score) {
score_i = char2score(score);
}
flags = pcmk__coloc_explicit | unpack_influence(id, dependent, influence_s);
pcmk__new_colocation(id, attr, score_i, dependent, primary,
dependent_role, primary_role, flags);
}
// \return Standard Pacemaker return code
static int
unpack_colocation_tags(xmlNode *xml_obj, xmlNode **expanded_xml,
pe_working_set_t *data_set)
{
const char *id = NULL;
const char *dependent_id = NULL;
const char *primary_id = NULL;
const char *dependent_role = NULL;
const char *primary_role = NULL;
pe_resource_t *dependent = NULL;
pe_resource_t *primary = NULL;
pe_tag_t *dependent_tag = NULL;
pe_tag_t *primary_tag = NULL;
xmlNode *dependent_set = NULL;
xmlNode *primary_set = NULL;
bool any_sets = false;
*expanded_xml = NULL;
CRM_CHECK(xml_obj != NULL, return EINVAL);
id = ID(xml_obj);
if (id == NULL) {
pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID,
xml_obj->name);
return pcmk_rc_unpack_error;
}
// Check whether there are any resource sets with template or tag references
*expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set);
if (*expanded_xml != NULL) {
crm_log_xml_trace(*expanded_xml, "Expanded rsc_colocation");
return pcmk_rc_ok;
}
dependent_id = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE);
primary_id = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET);
if ((dependent_id == NULL) || (primary_id == NULL)) {
return pcmk_rc_ok;
}
if (!pcmk__valid_resource_or_tag(data_set, dependent_id, &dependent,
&dependent_tag)) {
pcmk__config_err("Ignoring constraint '%s' because '%s' is not a "
"valid resource or tag", id, dependent_id);
return pcmk_rc_unpack_error;
}
if (!pcmk__valid_resource_or_tag(data_set, primary_id, &primary,
&primary_tag)) {
pcmk__config_err("Ignoring constraint '%s' because '%s' is not a "
"valid resource or tag", id, primary_id);
return pcmk_rc_unpack_error;
}
if ((dependent != NULL) && (primary != NULL)) {
/* Neither side references any template/tag. */
return pcmk_rc_ok;
}
if ((dependent_tag != NULL) && (primary_tag != NULL)) {
// A colocation constraint between two templates/tags makes no sense
pcmk__config_err("Ignoring constraint '%s' because two templates or "
"tags cannot be colocated", id);
return pcmk_rc_unpack_error;
}
dependent_role = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE);
primary_role = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE);
*expanded_xml = copy_xml(xml_obj);
// Convert dependent's template/tag reference into constraint resource_set
if (!pcmk__tag_to_set(*expanded_xml, &dependent_set, XML_COLOC_ATTR_SOURCE,
true, data_set)) {
free_xml(*expanded_xml);
*expanded_xml = NULL;
return pcmk_rc_unpack_error;
}
if (dependent_set != NULL) {
if (dependent_role != NULL) {
// Move "rsc-role" into converted resource_set as "role"
crm_xml_add(dependent_set, "role", dependent_role);
xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_SOURCE_ROLE);
}
any_sets = true;
}
// Convert primary's template/tag reference into constraint resource_set
if (!pcmk__tag_to_set(*expanded_xml, &primary_set, XML_COLOC_ATTR_TARGET,
true, data_set)) {
free_xml(*expanded_xml);
*expanded_xml = NULL;
return pcmk_rc_unpack_error;
}
if (primary_set != NULL) {
if (primary_role != NULL) {
// Move "with-rsc-role" into converted resource_set as "role"
crm_xml_add(primary_set, "role", primary_role);
xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_TARGET_ROLE);
}
any_sets = true;
}
if (any_sets) {
crm_log_xml_trace(*expanded_xml, "Expanded rsc_colocation");
} else {
free_xml(*expanded_xml);
*expanded_xml = NULL;
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Parse a colocation constraint from XML into a cluster working set
*
* \param[in,out] xml_obj Colocation constraint XML to unpack
* \param[in,out] data_set Cluster working set to add constraint to
*/
void
pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set)
{
int score_i = 0;
xmlNode *set = NULL;
xmlNode *last = NULL;
xmlNode *orig_xml = NULL;
xmlNode *expanded_xml = NULL;
const char *id = crm_element_value(xml_obj, XML_ATTR_ID);
const char *score = NULL;
const char *influence_s = NULL;
if (pcmk__str_empty(id)) {
pcmk__config_err("Ignoring " XML_CONS_TAG_RSC_DEPEND
" without " CRM_ATTR_ID);
return;
}
if (unpack_colocation_tags(xml_obj, &expanded_xml,
data_set) != pcmk_rc_ok) {
return;
}
if (expanded_xml != NULL) {
orig_xml = xml_obj;
xml_obj = expanded_xml;
}
score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
if (score != NULL) {
score_i = char2score(score);
}
influence_s = crm_element_value(xml_obj, XML_COLOC_ATTR_INFLUENCE);
for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL;
set = crm_next_same_xml(set)) {
set = expand_idref(set, data_set->input);
if (set == NULL) { // Configuration error, message already logged
if (expanded_xml != NULL) {
free_xml(expanded_xml);
}
return;
}
if (pcmk__str_empty(ID(set))) {
pcmk__config_err("Ignoring " XML_CONS_TAG_RSC_SET
" without " CRM_ATTR_ID);
continue;
}
unpack_colocation_set(set, score_i, id, influence_s, data_set);
if (last != NULL) {
colocate_rsc_sets(id, last, set, score_i, influence_s, data_set);
}
last = set;
}
if (expanded_xml) {
free_xml(expanded_xml);
xml_obj = orig_xml;
}
if (last == NULL) {
unpack_simple_colocation(xml_obj, id, influence_s, data_set);
}
}
/*!
* \internal
* \brief Make actions of a given type unrunnable for a given resource
*
* \param[in,out] rsc Resource whose actions should be blocked
* \param[in] task Name of action to block
* \param[in] reason Unrunnable start action causing the block
*/
static void
mark_action_blocked(pe_resource_t *rsc, const char *task,
const pe_resource_t *reason)
{
GList *iter = NULL;
char *reason_text = crm_strdup_printf("colocation with %s", reason->id);
for (iter = rsc->actions; iter != NULL; iter = iter->next) {
pe_action_t *action = iter->data;
if (pcmk_is_set(action->flags, pe_action_runnable)
&& pcmk__str_eq(action->task, task, pcmk__str_none)) {
pe__clear_action_flags(action, pe_action_runnable);
pe_action_set_reason(action, reason_text, false);
pcmk__block_colocation_dependents(action);
pcmk__update_action_for_orderings(action, rsc->cluster);
}
}
// If parent resource can't perform an action, neither can any children
for (iter = rsc->children; iter != NULL; iter = iter->next) {
mark_action_blocked((pe_resource_t *) (iter->data), task, reason);
}
free(reason_text);
}
/*!
* \internal
* \brief If an action is unrunnable, block any relevant dependent actions
*
* If a given action is an unrunnable start or promote, block the start or
* promote actions of resources colocated with it, as appropriate to the
* colocations' configured roles.
*
* \param[in,out] action Action to check
*/
void
pcmk__block_colocation_dependents(pe_action_t *action)
{
GList *iter = NULL;
GList *colocations = NULL;
pe_resource_t *rsc = NULL;
bool is_start = false;
if (pcmk_is_set(action->flags, pe_action_runnable)) {
return; // Only unrunnable actions block dependents
}
is_start = pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_none);
if (!is_start
&& !pcmk__str_eq(action->task, PCMK_ACTION_PROMOTE, pcmk__str_none)) {
return; // Only unrunnable starts and promotes block dependents
}
CRM_ASSERT(action->rsc != NULL); // Start and promote are resource actions
/* If this resource is part of a collective resource, dependents are blocked
* only if all instances of the collective are unrunnable, so check the
* collective resource.
*/
rsc = uber_parent(action->rsc);
if (rsc->parent != NULL) {
rsc = rsc->parent; // Bundle
}
// Colocation fails only if entire primary can't reach desired role
for (iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = iter->data;
pe_action_t *child_action = find_first_action(child->actions, NULL,
action->task, NULL);
if ((child_action == NULL)
|| pcmk_is_set(child_action->flags, pe_action_runnable)) {
crm_trace("Not blocking %s colocation dependents because "
"at least %s has runnable %s",
rsc->id, child->id, action->task);
return; // At least one child can reach desired role
}
}
crm_trace("Blocking %s colocation dependents due to unrunnable %s %s",
rsc->id, action->rsc->id, action->task);
// Check each colocation where this resource is primary
colocations = pcmk__with_this_colocations(rsc);
for (iter = colocations; iter != NULL; iter = iter->next) {
pcmk__colocation_t *colocation = iter->data;
if (colocation->score < INFINITY) {
continue; // Only mandatory colocations block dependent
}
/* If the primary can't start, the dependent can't reach its colocated
* role, regardless of what the primary or dependent colocation role is.
*
* If the primary can't be promoted, the dependent can't reach its
* colocated role if the primary's colocation role is promoted.
*/
if (!is_start && (colocation->primary_role != pcmk_role_promoted)) {
continue;
}
// Block the dependent from reaching its colocated role
if (colocation->dependent_role == pcmk_role_promoted) {
mark_action_blocked(colocation->dependent, PCMK_ACTION_PROMOTE,
action->rsc);
} else {
mark_action_blocked(colocation->dependent, PCMK_ACTION_START,
action->rsc);
}
}
g_list_free(colocations);
}
/*!
* \internal
* \brief Get the resource to use for role comparisons
*
* A bundle replica includes a container and possibly an instance of the bundled
* resource. The dependent in a "with bundle" colocation is colocated with a
* particular bundle container. However, if the colocation includes a role, then
* the role must be checked on the bundled resource instance inside the
* container. The container itself will never be promoted; the bundled resource
* may be.
*
* If the given resource is a bundle replica container, return the resource
* inside it, if any. Otherwise, return the resource itself.
*
* \param[in] rsc Resource to check
*
* \return Resource to use for role comparisons
*/
static const pe_resource_t *
get_resource_for_role(const pe_resource_t *rsc)
{
if (pcmk_is_set(rsc->flags, pe_rsc_replica_container)) {
const pe_resource_t *child = pe__get_rsc_in_container(rsc);
if (child != NULL) {
return child;
}
}
return rsc;
}
/*!
* \internal
* \brief Determine how a colocation constraint should affect a resource
*
* Colocation constraints have different effects at different points in the
* scheduler sequence. Initially, they affect a resource's location; once that
* is determined, then for promotable clones they can affect a resource
* instance's role; after both are determined, the constraints no longer matter.
* Given a specific colocation constraint, check what has been done so far to
* determine what should be affected at the current point in the scheduler.
*
* \param[in] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint
* \param[in] preview If true, pretend resources have already been assigned
*
* \return How colocation constraint should be applied at this point
*/
enum pcmk__coloc_affects
pcmk__colocation_affects(const pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation, bool preview)
{
const pe_resource_t *dependent_role_rsc = NULL;
const pe_resource_t *primary_role_rsc = NULL;
CRM_ASSERT((dependent != NULL) && (primary != NULL)
&& (colocation != NULL));
- if (!preview && pcmk_is_set(primary->flags, pe_rsc_provisional)) {
+ if (!preview && pcmk_is_set(primary->flags, pcmk_rsc_unassigned)) {
// Primary resource has not been assigned yet, so we can't do anything
return pcmk__coloc_affects_nothing;
}
dependent_role_rsc = get_resource_for_role(dependent);
primary_role_rsc = get_resource_for_role(primary);
if ((colocation->dependent_role >= pcmk_role_unpromoted)
&& (dependent_role_rsc->parent != NULL)
&& pcmk_is_set(dependent_role_rsc->parent->flags, pcmk_rsc_promotable)
- && !pcmk_is_set(dependent_role_rsc->flags, pe_rsc_provisional)) {
+ && !pcmk_is_set(dependent_role_rsc->flags, pcmk_rsc_unassigned)) {
/* This is a colocation by role, and the dependent is a promotable clone
* that has already been assigned, so the colocation should now affect
* the role.
*/
return pcmk__coloc_affects_role;
}
- if (!preview && !pcmk_is_set(dependent->flags, pe_rsc_provisional)) {
+ if (!preview && !pcmk_is_set(dependent->flags, pcmk_rsc_unassigned)) {
/* The dependent resource has already been through assignment, so the
* constraint no longer has any effect. Log an error if a mandatory
* colocation constraint has been violated.
*/
const pe_node_t *primary_node = primary->allocated_to;
if (dependent->allocated_to == NULL) {
crm_trace("Skipping colocation '%s': %s will not run anywhere",
colocation->id, dependent->id);
} else if (colocation->score >= INFINITY) {
// Dependent resource must colocate with primary resource
if (!pe__same_node(primary_node, dependent->allocated_to)) {
crm_err("%s must be colocated with %s but is not (%s vs. %s)",
dependent->id, primary->id,
pe__node_name(dependent->allocated_to),
pe__node_name(primary_node));
}
} else if (colocation->score <= -CRM_SCORE_INFINITY) {
// Dependent resource must anti-colocate with primary resource
if (pe__same_node(dependent->allocated_to, primary_node)) {
crm_err("%s and %s must be anti-colocated but are assigned "
"to the same node (%s)",
dependent->id, primary->id,
pe__node_name(primary_node));
}
}
return pcmk__coloc_affects_nothing;
}
if ((colocation->dependent_role != pcmk_role_unknown)
&& (colocation->dependent_role != dependent_role_rsc->next_role)) {
crm_trace("Skipping %scolocation '%s': dependent limited to %s role "
"but %s next role is %s",
((colocation->score < 0)? "anti-" : ""),
colocation->id, role2text(colocation->dependent_role),
dependent_role_rsc->id,
role2text(dependent_role_rsc->next_role));
return pcmk__coloc_affects_nothing;
}
if ((colocation->primary_role != pcmk_role_unknown)
&& (colocation->primary_role != primary_role_rsc->next_role)) {
crm_trace("Skipping %scolocation '%s': primary limited to %s role "
"but %s next role is %s",
((colocation->score < 0)? "anti-" : ""),
colocation->id, role2text(colocation->primary_role),
primary_role_rsc->id, role2text(primary_role_rsc->next_role));
return pcmk__coloc_affects_nothing;
}
return pcmk__coloc_affects_location;
}
/*!
* \internal
* \brief Apply colocation to dependent for assignment purposes
*
* Update the allowed node scores of the dependent resource in a colocation,
* for the purposes of assigning it to a node.
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint
*/
void
pcmk__apply_coloc_to_scores(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation)
{
const char *attr = colocation->node_attribute;
const char *value = NULL;
GHashTable *work = NULL;
GHashTableIter iter;
pe_node_t *node = NULL;
if (primary->allocated_to != NULL) {
value = pcmk__colocation_node_attr(primary->allocated_to, attr,
primary);
} else if (colocation->score < 0) {
// Nothing to do (anti-colocation with something that is not running)
return;
}
work = pcmk__copy_node_table(dependent->allowed_nodes);
g_hash_table_iter_init(&iter, work);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (primary->allocated_to == NULL) {
node->weight = pcmk__add_scores(-colocation->score, node->weight);
pe_rsc_trace(dependent,
"Applied %s to %s score on %s (now %s after "
"subtracting %s because primary %s inactive)",
colocation->id, dependent->id, pe__node_name(node),
pcmk_readable_score(node->weight),
pcmk_readable_score(colocation->score), primary->id);
continue;
}
if (pcmk__str_eq(pcmk__colocation_node_attr(node, attr, dependent),
value, pcmk__str_casei)) {
/* Add colocation score only if optional (or minus infinity). A
* mandatory colocation is a requirement rather than a preference,
* so we don't need to consider it for relative assignment purposes.
* The resource will simply be forbidden from running on the node if
* the primary isn't active there (via the condition above).
*/
if (colocation->score < CRM_SCORE_INFINITY) {
node->weight = pcmk__add_scores(colocation->score,
node->weight);
pe_rsc_trace(dependent,
"Applied %s to %s score on %s (now %s after "
"adding %s)",
colocation->id, dependent->id, pe__node_name(node),
pcmk_readable_score(node->weight),
pcmk_readable_score(colocation->score));
}
continue;
}
if (colocation->score >= CRM_SCORE_INFINITY) {
/* Only mandatory colocations are relevant when the colocation
* attribute doesn't match, because an attribute not matching is not
* a negative preference -- the colocation is simply relevant only
* where it matches.
*/
node->weight = -CRM_SCORE_INFINITY;
pe_rsc_trace(dependent,
"Banned %s from %s because colocation %s attribute %s "
"does not match",
dependent->id, pe__node_name(node), colocation->id,
attr);
}
}
if ((colocation->score <= -INFINITY) || (colocation->score >= INFINITY)
|| pcmk__any_node_available(work)) {
g_hash_table_destroy(dependent->allowed_nodes);
dependent->allowed_nodes = work;
work = NULL;
} else {
pe_rsc_info(dependent,
"%s: Rolling back scores from %s (no available nodes)",
dependent->id, primary->id);
}
if (work != NULL) {
g_hash_table_destroy(work);
}
}
/*!
* \internal
* \brief Apply colocation to dependent for role purposes
*
* Update the priority of the dependent resource in a colocation, for the
* purposes of selecting its role
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint
*/
void
pcmk__apply_coloc_to_priority(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation)
{
const char *dependent_value = NULL;
const char *primary_value = NULL;
const char *attr = colocation->node_attribute;
int score_multiplier = 1;
const pe_resource_t *primary_role_rsc = NULL;
CRM_ASSERT((dependent != NULL) && (primary != NULL) &&
(colocation != NULL));
if ((primary->allocated_to == NULL) || (dependent->allocated_to == NULL)) {
return;
}
dependent_value = pcmk__colocation_node_attr(dependent->allocated_to, attr,
dependent);
primary_value = pcmk__colocation_node_attr(primary->allocated_to, attr,
primary);
primary_role_rsc = get_resource_for_role(primary);
if (!pcmk__str_eq(dependent_value, primary_value, pcmk__str_casei)) {
if ((colocation->score == INFINITY)
&& (colocation->dependent_role == pcmk_role_promoted)) {
dependent->priority = -INFINITY;
}
return;
}
if ((colocation->primary_role != pcmk_role_unknown)
&& (colocation->primary_role != primary_role_rsc->next_role)) {
return;
}
if (colocation->dependent_role == pcmk_role_unpromoted) {
score_multiplier = -1;
}
dependent->priority = pcmk__add_scores(score_multiplier * colocation->score,
dependent->priority);
pe_rsc_trace(dependent,
"Applied %s to %s promotion priority (now %s after %s %s)",
colocation->id, dependent->id,
pcmk_readable_score(dependent->priority),
((score_multiplier == 1)? "adding" : "subtracting"),
pcmk_readable_score(colocation->score));
}
/*!
* \internal
* \brief Find score of highest-scored node that matches colocation attribute
*
* \param[in] rsc Resource whose allowed nodes should be searched
* \param[in] attr Colocation attribute name (must not be NULL)
* \param[in] value Colocation attribute value to require
*/
static int
best_node_score_matching_attr(const pe_resource_t *rsc, const char *attr,
const char *value)
{
GHashTableIter iter;
pe_node_t *node = NULL;
int best_score = -INFINITY;
const char *best_node = NULL;
// Find best allowed node with matching attribute
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if ((node->weight > best_score)
&& pcmk__node_available(node, false, false)
&& pcmk__str_eq(value, pcmk__colocation_node_attr(node, attr, rsc),
pcmk__str_casei)) {
best_score = node->weight;
best_node = node->details->uname;
}
}
if (!pcmk__str_eq(attr, CRM_ATTR_UNAME, pcmk__str_none)) {
if (best_node == NULL) {
crm_info("No allowed node for %s matches node attribute %s=%s",
rsc->id, attr, value);
} else {
crm_info("Allowed node %s for %s had best score (%d) "
"of those matching node attribute %s=%s",
best_node, rsc->id, best_score, attr, value);
}
}
return best_score;
}
/*!
* \internal
* \brief Check whether a resource is allowed only on a single node
*
* \param[in] rsc Resource to check
*
* \return \c true if \p rsc is allowed only on one node, otherwise \c false
*/
static bool
allowed_on_one(const pe_resource_t *rsc)
{
GHashTableIter iter;
pe_node_t *allowed_node = NULL;
int allowed_nodes = 0;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &allowed_node)) {
if ((allowed_node->weight >= 0) && (++allowed_nodes > 1)) {
pe_rsc_trace(rsc, "%s is allowed on multiple nodes", rsc->id);
return false;
}
}
pe_rsc_trace(rsc, "%s is allowed %s", rsc->id,
((allowed_nodes == 1)? "on a single node" : "nowhere"));
return (allowed_nodes == 1);
}
/*!
* \internal
* \brief Add resource's colocation matches to current node assignment scores
*
* For each node in a given table, if any of a given resource's allowed nodes
* have a matching value for the colocation attribute, add the highest of those
* nodes' scores to the node's score.
*
* \param[in,out] nodes Table of nodes with assignment scores so far
* \param[in] source_rsc Resource whose node scores to add
* \param[in] target_rsc Resource on whose behalf to update \p nodes
* \param[in] colocation Original colocation constraint (used to get
* configured primary resource's stickiness, and
* to get colocation node attribute; pass NULL to
* ignore stickiness and use default attribute)
* \param[in] factor Factor by which to multiply scores being added
* \param[in] only_positive Whether to add only positive scores
*/
static void
add_node_scores_matching_attr(GHashTable *nodes,
const pe_resource_t *source_rsc,
const pe_resource_t *target_rsc,
const pcmk__colocation_t *colocation,
float factor, bool only_positive)
{
GHashTableIter iter;
pe_node_t *node = NULL;
const char *attr = colocation->node_attribute;
// Iterate through each node
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
float delta_f = 0;
int delta = 0;
int score = 0;
int new_score = 0;
const char *value = pcmk__colocation_node_attr(node, attr, target_rsc);
score = best_node_score_matching_attr(source_rsc, attr, value);
if ((factor < 0) && (score < 0)) {
/* If the dependent is anti-colocated, we generally don't want the
* primary to prefer nodes that the dependent avoids. That could
* lead to unnecessary shuffling of the primary when the dependent
* hits its migration threshold somewhere, for example.
*
* However, there are cases when it is desirable. If the dependent
* can't run anywhere but where the primary is, it would be
* worthwhile to move the primary for the sake of keeping the
* dependent active.
*
* We can't know that exactly at this point since we don't know
* where the primary will be assigned, but we can limit considering
* the preference to when the dependent is allowed only on one node.
* This is less than ideal for multiple reasons:
*
* - the dependent could be allowed on more than one node but have
* anti-colocation primaries on each;
* - the dependent could be a clone or bundle with multiple
* instances, and the dependent as a whole is allowed on multiple
* nodes but some instance still can't run
* - the dependent has considered node-specific criteria such as
* location constraints and stickiness by this point, but might
* have other factors that end up disallowing a node
*
* but the alternative is making the primary move when it doesn't
* need to.
*
* We also consider the primary's stickiness and influence, so the
* user has some say in the matter. (This is the configured primary,
* not a particular instance of the primary, but that doesn't matter
* unless stickiness uses a rule to vary by node, and that seems
* acceptable to ignore.)
*/
if ((colocation->primary->stickiness >= -score)
|| !pcmk__colocation_has_influence(colocation, NULL)
|| !allowed_on_one(colocation->dependent)) {
crm_trace("%s: Filtering %d + %f * %d "
"(double negative disallowed)",
pe__node_name(node), node->weight, factor, score);
continue;
}
}
if (node->weight == INFINITY_HACK) {
crm_trace("%s: Filtering %d + %f * %d (node was marked unusable)",
pe__node_name(node), node->weight, factor, score);
continue;
}
delta_f = factor * score;
// Round the number; see http://c-faq.com/fp/round.html
delta = (int) ((delta_f < 0)? (delta_f - 0.5) : (delta_f + 0.5));
/* Small factors can obliterate the small scores that are often actually
* used in configurations. If the score and factor are nonzero, ensure
* that the result is nonzero as well.
*/
if ((delta == 0) && (score != 0)) {
if (factor > 0.0) {
delta = 1;
} else if (factor < 0.0) {
delta = -1;
}
}
new_score = pcmk__add_scores(delta, node->weight);
if (only_positive && (new_score < 0) && (node->weight > 0)) {
crm_trace("%s: Filtering %d + %f * %d = %d "
"(negative disallowed, marking node unusable)",
pe__node_name(node), node->weight, factor, score,
new_score);
node->weight = INFINITY_HACK;
continue;
}
if (only_positive && (new_score < 0) && (node->weight == 0)) {
crm_trace("%s: Filtering %d + %f * %d = %d (negative disallowed)",
pe__node_name(node), node->weight, factor, score,
new_score);
continue;
}
crm_trace("%s: %d + %f * %d = %d", pe__node_name(node),
node->weight, factor, score, new_score);
node->weight = new_score;
}
}
/*!
* \internal
* \brief Update nodes with scores of colocated resources' nodes
*
* Given a table of nodes and a resource, update the nodes' scores with the
* scores of the best nodes matching the attribute used for each of the
* resource's relevant colocations.
*
* \param[in,out] source_rsc Resource whose node scores to add
* \param[in] target_rsc Resource on whose behalf to update \p *nodes
* \param[in] log_id Resource ID for logs (if \c NULL, use
* \p source_rsc ID)
* \param[in,out] nodes Nodes to update (set initial contents to \c NULL
* to copy allowed nodes from \p source_rsc)
* \param[in] colocation Original colocation constraint (used to get
* configured primary resource's stickiness, and
* to get colocation node attribute; if \c NULL,
* <tt>source_rsc</tt>'s own matching node scores
* will not be added, and \p *nodes must be \c NULL
* as well)
* \param[in] factor Incorporate scores multiplied by this factor
* \param[in] flags Bitmask of enum pcmk__coloc_select values
*
* \note \c NULL \p target_rsc, \c NULL \p *nodes, \c NULL \p colocation, and
* the \c pcmk__coloc_select_this_with flag are used together (and only by
* \c cmp_resources()).
* \note The caller remains responsible for freeing \p *nodes.
* \note This is the shared implementation of
* \c resource_alloc_functions_t:add_colocated_node_scores().
*/
void
pcmk__add_colocated_node_scores(pe_resource_t *source_rsc,
const pe_resource_t *target_rsc,
const char *log_id,
GHashTable **nodes,
const pcmk__colocation_t *colocation,
float factor, uint32_t flags)
{
GHashTable *work = NULL;
CRM_ASSERT((source_rsc != NULL) && (nodes != NULL)
&& ((colocation != NULL)
|| ((target_rsc == NULL) && (*nodes == NULL))));
if (log_id == NULL) {
log_id = source_rsc->id;
}
// Avoid infinite recursion
if (pcmk_is_set(source_rsc->flags, pe_rsc_merging)) {
pe_rsc_info(source_rsc, "%s: Breaking dependency loop at %s",
log_id, source_rsc->id);
return;
}
pe__set_resource_flags(source_rsc, pe_rsc_merging);
if (*nodes == NULL) {
work = pcmk__copy_node_table(source_rsc->allowed_nodes);
target_rsc = source_rsc;
} else {
const bool pos = pcmk_is_set(flags, pcmk__coloc_select_nonnegative);
pe_rsc_trace(source_rsc, "%s: Merging %s scores from %s (at %.6f)",
log_id, (pos? "positive" : "all"), source_rsc->id, factor);
work = pcmk__copy_node_table(*nodes);
add_node_scores_matching_attr(work, source_rsc, target_rsc, colocation,
factor, pos);
}
if (work == NULL) {
pe__clear_resource_flags(source_rsc, pe_rsc_merging);
return;
}
if (pcmk__any_node_available(work)) {
GList *colocations = NULL;
if (pcmk_is_set(flags, pcmk__coloc_select_this_with)) {
colocations = pcmk__this_with_colocations(source_rsc);
pe_rsc_trace(source_rsc,
"Checking additional %d optional '%s with' "
"constraints",
g_list_length(colocations), source_rsc->id);
} else {
colocations = pcmk__with_this_colocations(source_rsc);
pe_rsc_trace(source_rsc,
"Checking additional %d optional 'with %s' "
"constraints",
g_list_length(colocations), source_rsc->id);
}
flags |= pcmk__coloc_select_active;
for (GList *iter = colocations; iter != NULL; iter = iter->next) {
pcmk__colocation_t *constraint = iter->data;
pe_resource_t *other = NULL;
float other_factor = factor * constraint->score / (float) INFINITY;
if (pcmk_is_set(flags, pcmk__coloc_select_this_with)) {
other = constraint->primary;
} else if (!pcmk__colocation_has_influence(constraint, NULL)) {
continue;
} else {
other = constraint->dependent;
}
pe_rsc_trace(source_rsc,
"Optionally merging score of '%s' constraint "
"(%s with %s)",
constraint->id, constraint->dependent->id,
constraint->primary->id);
other->cmds->add_colocated_node_scores(other, target_rsc, log_id,
&work, constraint,
other_factor, flags);
pe__show_node_scores(true, NULL, log_id, work, source_rsc->cluster);
}
g_list_free(colocations);
} else if (pcmk_is_set(flags, pcmk__coloc_select_active)) {
pe_rsc_info(source_rsc, "%s: Rolling back optional scores from %s",
log_id, source_rsc->id);
g_hash_table_destroy(work);
pe__clear_resource_flags(source_rsc, pe_rsc_merging);
return;
}
if (pcmk_is_set(flags, pcmk__coloc_select_nonnegative)) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, work);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (node->weight == INFINITY_HACK) {
node->weight = 1;
}
}
}
if (*nodes != NULL) {
g_hash_table_destroy(*nodes);
}
*nodes = work;
pe__clear_resource_flags(source_rsc, pe_rsc_merging);
}
/*!
* \internal
* \brief Apply a "with this" colocation to a resource's allowed node scores
*
* \param[in,out] data Colocation to apply
* \param[in,out] user_data Resource being assigned
*/
void
pcmk__add_dependent_scores(gpointer data, gpointer user_data)
{
pcmk__colocation_t *colocation = data;
pe_resource_t *target_rsc = user_data;
pe_resource_t *source_rsc = colocation->dependent;
const float factor = colocation->score / (float) INFINITY;
uint32_t flags = pcmk__coloc_select_active;
if (!pcmk__colocation_has_influence(colocation, NULL)) {
return;
}
if (target_rsc->variant == pcmk_rsc_variant_clone) {
flags |= pcmk__coloc_select_nonnegative;
}
pe_rsc_trace(target_rsc,
"%s: Incorporating attenuated %s assignment scores due "
"to colocation %s",
target_rsc->id, source_rsc->id, colocation->id);
source_rsc->cmds->add_colocated_node_scores(source_rsc, target_rsc,
source_rsc->id,
&target_rsc->allowed_nodes,
colocation, factor, flags);
}
/*!
* \internal
* \brief Exclude nodes from a dependent's node table if not in a given list
*
* Given a dependent resource in a colocation and a list of nodes where the
* primary resource will run, set a node's score to \c -INFINITY in the
* dependent's node table if not found in the primary nodes list.
*
* \param[in,out] dependent Dependent resource
* \param[in] primary Primary resource (for logging only)
* \param[in] colocation Colocation constraint (for logging only)
* \param[in] primary_nodes List of nodes where the primary will have
* unblocked instances in a suitable role
* \param[in] merge_scores If \c true and a node is found in both \p table
* and \p list, add the node's score in \p list to
* the node's score in \p table
*/
void
pcmk__colocation_intersect_nodes(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
const GList *primary_nodes, bool merge_scores)
{
GHashTableIter iter;
pe_node_t *dependent_node = NULL;
CRM_ASSERT((dependent != NULL) && (primary != NULL)
&& (colocation != NULL));
g_hash_table_iter_init(&iter, dependent->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &dependent_node)) {
const pe_node_t *primary_node = NULL;
primary_node = pe_find_node_id(primary_nodes,
dependent_node->details->id);
if (primary_node == NULL) {
dependent_node->weight = -INFINITY;
pe_rsc_trace(dependent,
"Banning %s from %s (no primary instance) for %s",
dependent->id, pe__node_name(dependent_node),
colocation->id);
} else if (merge_scores) {
dependent_node->weight = pcmk__add_scores(dependent_node->weight,
primary_node->weight);
pe_rsc_trace(dependent,
"Added %s's score %s to %s's score for %s (now %s) "
"for colocation %s",
primary->id, pcmk_readable_score(primary_node->weight),
dependent->id, pe__node_name(dependent_node),
pcmk_readable_score(dependent_node->weight),
colocation->id);
}
}
}
/*!
* \internal
* \brief Get all colocations affecting a resource as the primary
*
* \param[in] rsc Resource to get colocations for
*
* \return Newly allocated list of colocations affecting \p rsc as primary
*
* \note This is a convenience wrapper for the with_this_colocations() method.
*/
GList *
pcmk__with_this_colocations(const pe_resource_t *rsc)
{
GList *list = NULL;
rsc->cmds->with_this_colocations(rsc, rsc, &list);
return list;
}
/*!
* \internal
* \brief Get all colocations affecting a resource as the dependent
*
* \param[in] rsc Resource to get colocations for
*
* \return Newly allocated list of colocations affecting \p rsc as dependent
*
* \note This is a convenience wrapper for the this_with_colocations() method.
*/
GList *
pcmk__this_with_colocations(const pe_resource_t *rsc)
{
GList *list = NULL;
rsc->cmds->this_with_colocations(rsc, rsc, &list);
return list;
}
diff --git a/lib/pacemaker/pcmk_sched_group.c b/lib/pacemaker/pcmk_sched_group.c
index e461639d53..7ab2c0b564 100644
--- a/lib/pacemaker/pcmk_sched_group.c
+++ b/lib/pacemaker/pcmk_sched_group.c
@@ -1,948 +1,948 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Assign a group resource to a node
*
* \param[in,out] rsc Group resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
* \param[in] stop_if_fail If \c true and a child of \p rsc can't be
* assigned to a node, set the child's next role to
* stopped and update existing actions
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
pe_node_t *
pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer,
bool stop_if_fail)
{
pe_node_t *first_assigned_node = NULL;
pe_resource_t *first_member = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_group));
- if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
return rsc->allocated_to; // Assignment already done
}
if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Assignment dependency loop detected involving %s",
rsc->id);
return NULL;
}
if (rsc->children == NULL) {
// No members to assign
- pe__clear_resource_flags(rsc, pe_rsc_provisional);
+ pe__clear_resource_flags(rsc, pcmk_rsc_unassigned);
return NULL;
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
first_member = (pe_resource_t *) rsc->children->data;
rsc->role = first_member->role;
pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags,
pcmk_sched_output_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *member = (pe_resource_t *) iter->data;
pe_node_t *node = NULL;
pe_rsc_trace(rsc, "Assigning group %s member %s",
rsc->id, member->id);
node = member->cmds->assign(member, prefer, stop_if_fail);
if (first_assigned_node == NULL) {
first_assigned_node = node;
}
}
pe__set_next_role(rsc, first_member->next_role, "first group member");
- pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional);
+ pe__clear_resource_flags(rsc, pe_rsc_allocating|pcmk_rsc_unassigned);
if (!pe__group_flag_is_set(rsc, pe__group_colocated)) {
return NULL;
}
return first_assigned_node;
}
/*!
* \internal
* \brief Create a pseudo-operation for a group as an ordering point
*
* \param[in,out] group Group resource to create action for
* \param[in] action Action name
*
* \return Newly created pseudo-operation
*/
static pe_action_t *
create_group_pseudo_op(pe_resource_t *group, const char *action)
{
pe_action_t *op = custom_action(group, pcmk__op_key(group->id, action, 0),
action, NULL, TRUE, TRUE, group->cluster);
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
return op;
}
/*!
* \internal
* \brief Create all actions needed for a given group resource
*
* \param[in,out] rsc Group resource to create actions for
*/
void
pcmk__group_create_actions(pe_resource_t *rsc)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_group));
pe_rsc_trace(rsc, "Creating actions for group %s", rsc->id);
// Create actions for individual group members
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *member = (pe_resource_t *) iter->data;
member->cmds->create_actions(member);
}
// Create pseudo-actions for group itself to serve as ordering points
create_group_pseudo_op(rsc, PCMK_ACTION_START);
create_group_pseudo_op(rsc, PCMK_ACTION_RUNNING);
create_group_pseudo_op(rsc, PCMK_ACTION_STOP);
create_group_pseudo_op(rsc, PCMK_ACTION_STOPPED);
if (crm_is_true(g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROMOTABLE))) {
create_group_pseudo_op(rsc, PCMK_ACTION_DEMOTE);
create_group_pseudo_op(rsc, PCMK_ACTION_DEMOTED);
create_group_pseudo_op(rsc, PCMK_ACTION_PROMOTE);
create_group_pseudo_op(rsc, PCMK_ACTION_PROMOTED);
}
}
// User data for member_internal_constraints()
struct member_data {
// These could be derived from member but this avoids some function calls
bool ordered;
bool colocated;
bool promotable;
pe_resource_t *last_active;
pe_resource_t *previous_member;
};
/*!
* \internal
* \brief Create implicit constraints needed for a group member
*
* \param[in,out] data Group member to create implicit constraints for
* \param[in,out] user_data Member data (struct member_data *)
*/
static void
member_internal_constraints(gpointer data, gpointer user_data)
{
pe_resource_t *member = (pe_resource_t *) data;
struct member_data *member_data = (struct member_data *) user_data;
// For ordering demote vs demote or stop vs stop
uint32_t down_flags = pe_order_implies_first_printed;
// For ordering demote vs demoted or stop vs stopped
uint32_t post_down_flags = pe_order_implies_then_printed;
// Create the individual member's implicit constraints
member->cmds->internal_constraints(member);
if (member_data->previous_member == NULL) {
// This is first member
if (member_data->ordered) {
pe__set_order_flags(down_flags, pe_order_optional);
post_down_flags = pe_order_implies_then;
}
} else if (member_data->colocated) {
uint32_t flags = pcmk__coloc_none;
if (pcmk_is_set(member->flags, pe_rsc_critical)) {
flags |= pcmk__coloc_influence;
}
// Colocate this member with the previous one
pcmk__new_colocation("#group-members", NULL, INFINITY, member,
member_data->previous_member, NULL, NULL, flags);
}
if (member_data->promotable) {
// Demote group -> demote member -> group is demoted
pcmk__order_resource_actions(member->parent, PCMK_ACTION_DEMOTE,
member, PCMK_ACTION_DEMOTE, down_flags);
pcmk__order_resource_actions(member, PCMK_ACTION_DEMOTE,
member->parent, PCMK_ACTION_DEMOTED,
post_down_flags);
// Promote group -> promote member -> group is promoted
pcmk__order_resource_actions(member, PCMK_ACTION_PROMOTE,
member->parent, PCMK_ACTION_PROMOTED,
pe_order_runnable_left
|pe_order_implies_then
|pe_order_implies_then_printed);
pcmk__order_resource_actions(member->parent, PCMK_ACTION_PROMOTE,
member, PCMK_ACTION_PROMOTE,
pe_order_implies_first_printed);
}
// Stop group -> stop member -> group is stopped
pcmk__order_stops(member->parent, member, down_flags);
pcmk__order_resource_actions(member, PCMK_ACTION_STOP,
member->parent, PCMK_ACTION_STOPPED,
post_down_flags);
// Start group -> start member -> group is started
pcmk__order_starts(member->parent, member, pe_order_implies_first_printed);
pcmk__order_resource_actions(member, PCMK_ACTION_START,
member->parent, PCMK_ACTION_RUNNING,
pe_order_runnable_left
|pe_order_implies_then
|pe_order_implies_then_printed);
if (!member_data->ordered) {
pcmk__order_starts(member->parent, member,
pe_order_implies_then
|pe_order_runnable_left
|pe_order_implies_first_printed);
if (member_data->promotable) {
pcmk__order_resource_actions(member->parent, PCMK_ACTION_PROMOTE,
member, PCMK_ACTION_PROMOTE,
pe_order_implies_then
|pe_order_runnable_left
|pe_order_implies_first_printed);
}
} else if (member_data->previous_member == NULL) {
pcmk__order_starts(member->parent, member, pe_order_none);
if (member_data->promotable) {
pcmk__order_resource_actions(member->parent, PCMK_ACTION_PROMOTE,
member, PCMK_ACTION_PROMOTE,
pe_order_none);
}
} else {
// Order this member relative to the previous one
pcmk__order_starts(member_data->previous_member, member,
pe_order_implies_then|pe_order_runnable_left);
pcmk__order_stops(member, member_data->previous_member,
pe_order_optional|pe_order_restart);
/* In unusual circumstances (such as adding a new member to the middle
* of a group with unmanaged later members), this member may be active
* while the previous (new) member is inactive. In this situation, the
* usual restart orderings will be irrelevant, so we need to order this
* member's stop before the previous member's start.
*/
if ((member->running_on != NULL)
&& (member_data->previous_member->running_on == NULL)) {
pcmk__order_resource_actions(member, PCMK_ACTION_STOP,
member_data->previous_member,
PCMK_ACTION_START,
pe_order_implies_first
|pe_order_runnable_left);
}
if (member_data->promotable) {
pcmk__order_resource_actions(member_data->previous_member,
PCMK_ACTION_PROMOTE, member,
PCMK_ACTION_PROMOTE,
pe_order_implies_then
|pe_order_runnable_left);
pcmk__order_resource_actions(member, PCMK_ACTION_DEMOTE,
member_data->previous_member,
PCMK_ACTION_DEMOTE,
pe_order_optional);
}
}
// Make sure partially active groups shut down in sequence
if (member->running_on != NULL) {
if (member_data->ordered && (member_data->previous_member != NULL)
&& (member_data->previous_member->running_on == NULL)
&& (member_data->last_active != NULL)
&& (member_data->last_active->running_on != NULL)) {
pcmk__order_stops(member, member_data->last_active,
pe_order_optional);
}
member_data->last_active = member;
}
member_data->previous_member = member;
}
/*!
* \internal
* \brief Create implicit constraints needed for a group resource
*
* \param[in,out] rsc Group resource to create implicit constraints for
*/
void
pcmk__group_internal_constraints(pe_resource_t *rsc)
{
struct member_data member_data = { false, };
const pe_resource_t *top = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_group));
/* Order group pseudo-actions relative to each other for restarting:
* stop group -> group is stopped -> start group -> group is started
*/
pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP,
rsc, PCMK_ACTION_STOPPED,
pe_order_runnable_left);
pcmk__order_resource_actions(rsc, PCMK_ACTION_STOPPED,
rsc, PCMK_ACTION_START,
pe_order_optional);
pcmk__order_resource_actions(rsc, PCMK_ACTION_START,
rsc, PCMK_ACTION_RUNNING,
pe_order_runnable_left);
top = pe__const_top_resource(rsc, false);
member_data.ordered = pe__group_flag_is_set(rsc, pe__group_ordered);
member_data.colocated = pe__group_flag_is_set(rsc, pe__group_colocated);
member_data.promotable = pcmk_is_set(top->flags, pcmk_rsc_promotable);
g_list_foreach(rsc->children, member_internal_constraints, &member_data);
}
/*!
* \internal
* \brief Apply a colocation's score to node scores or resource priority
*
* Given a colocation constraint for a group with some other resource, apply the
* score to the dependent's allowed node scores (if we are still placing
* resources) or priority (if we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent group resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
*/
static void
colocate_group_with(pe_resource_t *dependent, const pe_resource_t *primary,
const pcmk__colocation_t *colocation)
{
pe_resource_t *member = NULL;
if (dependent->children == NULL) {
return;
}
pe_rsc_trace(primary, "Processing %s (group %s with %s) for dependent",
colocation->id, dependent->id, primary->id);
if (pe__group_flag_is_set(dependent, pe__group_colocated)) {
// Colocate first member (internal colocations will handle the rest)
member = (pe_resource_t *) dependent->children->data;
member->cmds->apply_coloc_score(member, primary, colocation, true);
return;
}
if (colocation->score >= INFINITY) {
pcmk__config_err("%s: Cannot perform mandatory colocation between "
"non-colocated group and %s",
dependent->id, primary->id);
return;
}
// Colocate each member individually
for (GList *iter = dependent->children; iter != NULL; iter = iter->next) {
member = (pe_resource_t *) iter->data;
member->cmds->apply_coloc_score(member, primary, colocation, true);
}
}
/*!
* \internal
* \brief Apply a colocation's score to node scores or resource priority
*
* Given a colocation constraint for some other resource with a group, apply the
* score to the dependent's allowed node scores (if we are still placing
* resources) or priority (if we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary group resource in colocation
* \param[in] colocation Colocation constraint to apply
*/
static void
colocate_with_group(pe_resource_t *dependent, const pe_resource_t *primary,
const pcmk__colocation_t *colocation)
{
const pe_resource_t *member = NULL;
pe_rsc_trace(primary,
"Processing colocation %s (%s with group %s) for primary",
colocation->id, dependent->id, primary->id);
- if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
+ if (pcmk_is_set(primary->flags, pcmk_rsc_unassigned)) {
return;
}
if (pe__group_flag_is_set(primary, pe__group_colocated)) {
if (colocation->score >= INFINITY) {
/* For mandatory colocations, the entire group must be assignable
* (and in the specified role if any), so apply the colocation based
* on the last member.
*/
member = pe__last_group_member(primary);
} else if (primary->children != NULL) {
/* For optional colocations, whether the group is partially or fully
* up doesn't matter, so apply the colocation based on the first
* member.
*/
member = (pe_resource_t *) primary->children->data;
}
if (member == NULL) {
return; // Nothing to colocate with
}
member->cmds->apply_coloc_score(dependent, member, colocation, false);
return;
}
if (colocation->score >= INFINITY) {
pcmk__config_err("%s: Cannot perform mandatory colocation with"
" non-colocated group %s",
dependent->id, primary->id);
return;
}
// Colocate dependent with each member individually
for (const GList *iter = primary->children; iter != NULL;
iter = iter->next) {
member = iter->data;
member->cmds->apply_coloc_score(dependent, member, colocation, false);
}
}
/*!
* \internal
* \brief Apply a colocation's score to node scores or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node scores (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__group_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent)
{
CRM_ASSERT((dependent != NULL) && (primary != NULL)
&& (colocation != NULL));
if (for_dependent) {
colocate_group_with(dependent, primary, colocation);
} else {
// Method should only be called for primitive dependents
CRM_ASSERT(dependent->variant == pcmk_rsc_variant_primitive);
colocate_with_group(dependent, primary, colocation);
}
}
/*!
* \internal
* \brief Return action flags for a given group resource action
*
* \param[in,out] action Group action to get flags for
* \param[in] node If not NULL, limit effects to this node
*
* \return Flags appropriate to \p action on \p node
*/
uint32_t
pcmk__group_action_flags(pe_action_t *action, const pe_node_t *node)
{
// Default flags for a group action
uint32_t flags = pe_action_optional|pe_action_runnable|pe_action_pseudo;
CRM_ASSERT(action != NULL);
// Update flags considering each member's own flags for same action
for (GList *iter = action->rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *member = (pe_resource_t *) iter->data;
// Check whether member has the same action
enum action_tasks task = get_complex_task(member, action->task);
const char *task_s = task2text(task);
pe_action_t *member_action = find_first_action(member->actions, NULL,
task_s, node);
if (member_action != NULL) {
uint32_t member_flags = member->cmds->action_flags(member_action,
node);
// Group action is mandatory if any member action is
if (pcmk_is_set(flags, pe_action_optional)
&& !pcmk_is_set(member_flags, pe_action_optional)) {
pe_rsc_trace(action->rsc, "%s is mandatory because %s is",
action->uuid, member_action->uuid);
pe__clear_raw_action_flags(flags, "group action",
pe_action_optional);
pe__clear_action_flags(action, pe_action_optional);
}
// Group action is unrunnable if any member action is
if (!pcmk__str_eq(task_s, action->task, pcmk__str_none)
&& pcmk_is_set(flags, pe_action_runnable)
&& !pcmk_is_set(member_flags, pe_action_runnable)) {
pe_rsc_trace(action->rsc, "%s is unrunnable because %s is",
action->uuid, member_action->uuid);
pe__clear_raw_action_flags(flags, "group action",
pe_action_runnable);
pe__clear_action_flags(action, pe_action_runnable);
}
/* Group (pseudo-)actions other than stop or demote are unrunnable
* unless every member will do it.
*/
} else if ((task != pcmk_action_stop) && (task != pcmk_action_demote)) {
pe_rsc_trace(action->rsc,
"%s is not runnable because %s will not %s",
action->uuid, member->id, task_s);
pe__clear_raw_action_flags(flags, "group action",
pe_action_runnable);
}
}
return flags;
}
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two actions, update the actions' flags
* (and runnable_before members if appropriate) as appropriate for the ordering.
* Effects may cascade to other orderings involving the actions as well.
*
* \param[in,out] first 'First' action in an ordering
* \param[in,out] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in,out] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t
pcmk__group_update_ordered_actions(pe_action_t *first, pe_action_t *then,
const pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set)
{
uint32_t changed = pcmk__updated_none;
// Group method can be called only on behalf of "then" action
CRM_ASSERT((first != NULL) && (then != NULL) && (then->rsc != NULL)
&& (data_set != NULL));
// Update the actions for the group itself
changed |= pcmk__update_ordered_actions(first, then, node, flags, filter,
type, data_set);
// Update the actions for each group member
for (GList *iter = then->rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *member = (pe_resource_t *) iter->data;
pe_action_t *member_action = find_first_action(member->actions, NULL,
then->task, node);
if (member_action != NULL) {
changed |= member->cmds->update_ordered_actions(first,
member_action, node,
flags, filter, type,
data_set);
}
}
return changed;
}
/*!
* \internal
* \brief Apply a location constraint to a group's allowed node scores
*
* \param[in,out] rsc Group resource to apply constraint to
* \param[in,out] location Location constraint to apply
*/
void
pcmk__group_apply_location(pe_resource_t *rsc, pe__location_t *location)
{
GList *node_list_orig = NULL;
GList *node_list_copy = NULL;
bool reset_scores = true;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_group)
&& (location != NULL));
node_list_orig = location->node_list_rh;
node_list_copy = pcmk__copy_node_list(node_list_orig, true);
reset_scores = pe__group_flag_is_set(rsc, pe__group_colocated);
// Apply the constraint for the group itself (updates node scores)
pcmk__apply_location(rsc, location);
// Apply the constraint for each member
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *member = (pe_resource_t *) iter->data;
member->cmds->apply_location(member, location);
if (reset_scores) {
/* The first member of colocated groups needs to use the original
* node scores, but subsequent members should work on a copy, since
* the first member's scores already incorporate theirs.
*/
reset_scores = false;
location->node_list_rh = node_list_copy;
}
}
location->node_list_rh = node_list_orig;
g_list_free_full(node_list_copy, free);
}
// Group implementation of resource_alloc_functions_t:colocated_resources()
GList *
pcmk__group_colocated_resources(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc,
GList *colocated_rscs)
{
const pe_resource_t *member = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_group));
if (orig_rsc == NULL) {
orig_rsc = rsc;
}
if (pe__group_flag_is_set(rsc, pe__group_colocated)
|| pe_rsc_is_clone(rsc->parent)) {
/* This group has colocated members and/or is cloned -- either way,
* add every child's colocated resources to the list. The first and last
* members will include the group's own colocations.
*/
colocated_rscs = g_list_prepend(colocated_rscs, (gpointer) rsc);
for (const GList *iter = rsc->children;
iter != NULL; iter = iter->next) {
member = (const pe_resource_t *) iter->data;
colocated_rscs = member->cmds->colocated_resources(member, orig_rsc,
colocated_rscs);
}
} else if (rsc->children != NULL) {
/* This group's members are not colocated, and the group is not cloned,
* so just add the group's own colocations to the list.
*/
colocated_rscs = pcmk__colocated_resources(rsc, orig_rsc,
colocated_rscs);
}
return colocated_rscs;
}
// Group implementation of resource_alloc_functions_t:with_this_colocations()
void
pcmk__with_group_colocations(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList **list)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_group)
&& (orig_rsc != NULL) && (list != NULL));
// Ignore empty groups
if (rsc->children == NULL) {
return;
}
/* "With this" colocations are needed only for the group itself and for its
* last member. (Previous members will chain via the group internal
* colocations.)
*/
if ((orig_rsc != rsc) && (orig_rsc != pe__last_group_member(rsc))) {
return;
}
pe_rsc_trace(rsc, "Adding 'with %s' colocations to list for %s",
rsc->id, orig_rsc->id);
// Add the group's own colocations
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
// If cloned, add any relevant colocations with the clone
if (rsc->parent != NULL) {
rsc->parent->cmds->with_this_colocations(rsc->parent, orig_rsc,
list);
}
if (!pe__group_flag_is_set(rsc, pe__group_colocated)) {
// @COMPAT Non-colocated groups are deprecated
return;
}
// Add explicit colocations with the group's (other) children
for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) {
const pe_resource_t *member = iter->data;
if (member != orig_rsc) {
member->cmds->with_this_colocations(member, orig_rsc, list);
}
}
}
// Group implementation of resource_alloc_functions_t:this_with_colocations()
void
pcmk__group_with_colocations(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList **list)
{
const pe_resource_t *member = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_group)
&& (orig_rsc != NULL) && (list != NULL));
// Ignore empty groups
if (rsc->children == NULL) {
return;
}
/* "This with" colocations are normally needed only for the group itself and
* for its first member.
*/
if ((rsc == orig_rsc)
|| (orig_rsc == (const pe_resource_t *) rsc->children->data)) {
pe_rsc_trace(rsc, "Adding '%s with' colocations to list for %s",
rsc->id, orig_rsc->id);
// Add the group's own colocations
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
// If cloned, add any relevant colocations involving the clone
if (rsc->parent != NULL) {
rsc->parent->cmds->this_with_colocations(rsc->parent, orig_rsc,
list);
}
if (!pe__group_flag_is_set(rsc, pe__group_colocated)) {
// @COMPAT Non-colocated groups are deprecated
return;
}
// Add explicit colocations involving the group's (other) children
for (const GList *iter = rsc->children;
iter != NULL; iter = iter->next) {
member = iter->data;
if (member != orig_rsc) {
member->cmds->this_with_colocations(member, orig_rsc, list);
}
}
return;
}
/* Later group members honor the group's colocations indirectly, due to the
* internal group colocations that chain everything from the first member.
* However, if an earlier group member is unmanaged, this chaining will not
* happen, so the group's mandatory colocations must be explicitly added.
*/
for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) {
member = iter->data;
if (orig_rsc == member) {
break; // We've seen all earlier members, and none are unmanaged
}
if (!pcmk_is_set(member->flags, pcmk_rsc_managed)) {
crm_trace("Adding mandatory '%s with' colocations to list for "
"member %s because earlier member %s is unmanaged",
rsc->id, orig_rsc->id, member->id);
for (const GList *cons_iter = rsc->rsc_cons; cons_iter != NULL;
cons_iter = cons_iter->next) {
const pcmk__colocation_t *colocation = NULL;
colocation = (const pcmk__colocation_t *) cons_iter->data;
if (colocation->score == INFINITY) {
pcmk__add_this_with(list, colocation, orig_rsc);
}
}
// @TODO Add mandatory (or all?) clone constraints if cloned
break;
}
}
}
/*!
* \internal
* \brief Update nodes with scores of colocated resources' nodes
*
* Given a table of nodes and a resource, update the nodes' scores with the
* scores of the best nodes matching the attribute used for each of the
* resource's relevant colocations.
*
* \param[in,out] source_rsc Group resource whose node scores to add
* \param[in] target_rsc Resource on whose behalf to update \p *nodes
* \param[in] log_id Resource ID for logs (if \c NULL, use
* \p source_rsc ID)
* \param[in,out] nodes Nodes to update (set initial contents to \c NULL
* to copy allowed nodes from \p source_rsc)
* \param[in] colocation Original colocation constraint (used to get
* configured primary resource's stickiness, and
* to get colocation node attribute; if \c NULL,
* <tt>source_rsc</tt>'s own matching node scores will
* not be added, and \p *nodes must be \c NULL as
* well)
* \param[in] factor Incorporate scores multiplied by this factor
* \param[in] flags Bitmask of enum pcmk__coloc_select values
*
* \note \c NULL \p target_rsc, \c NULL \p *nodes, \c NULL \p colocation, and
* the \c pcmk__coloc_select_this_with flag are used together (and only by
* \c cmp_resources()).
* \note The caller remains responsible for freeing \p *nodes.
* \note This is the group implementation of
* \c resource_alloc_functions_t:add_colocated_node_scores().
*/
void
pcmk__group_add_colocated_node_scores(pe_resource_t *source_rsc,
const pe_resource_t *target_rsc,
const char *log_id, GHashTable **nodes,
const pcmk__colocation_t *colocation,
float factor, uint32_t flags)
{
pe_resource_t *member = NULL;
CRM_ASSERT((source_rsc != NULL)
&& (source_rsc->variant == pcmk_rsc_variant_group)
&& (nodes != NULL)
&& ((colocation != NULL)
|| ((target_rsc == NULL) && (*nodes == NULL))));
if (log_id == NULL) {
log_id = source_rsc->id;
}
// Avoid infinite recursion
if (pcmk_is_set(source_rsc->flags, pe_rsc_merging)) {
pe_rsc_info(source_rsc, "%s: Breaking dependency loop at %s",
log_id, source_rsc->id);
return;
}
pe__set_resource_flags(source_rsc, pe_rsc_merging);
// Ignore empty groups (only possible with schema validation disabled)
if (source_rsc->children == NULL) {
return;
}
/* Refer the operation to the first or last member as appropriate.
*
* cmp_resources() is the only caller that passes a NULL nodes table,
* and is also the only caller using pcmk__coloc_select_this_with.
* For "this with" colocations, the last member will recursively incorporate
* all the other members' "this with" colocations via the internal group
* colocations (and via the first member, the group's own colocations).
*
* For "with this" colocations, the first member works similarly.
*/
if (*nodes == NULL) {
member = pe__last_group_member(source_rsc);
} else {
member = source_rsc->children->data;
}
pe_rsc_trace(source_rsc, "%s: Merging scores from group %s using member %s "
"(at %.6f)", log_id, source_rsc->id, member->id, factor);
member->cmds->add_colocated_node_scores(member, target_rsc, log_id, nodes,
colocation, factor, flags);
pe__clear_resource_flags(source_rsc, pe_rsc_merging);
}
// Group implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__group_add_utilization(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization)
{
pe_resource_t *member = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_group)
&& (orig_rsc != NULL) && (utilization != NULL));
- if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
return;
}
pe_rsc_trace(orig_rsc, "%s: Adding group %s as colocated utilization",
orig_rsc->id, rsc->id);
if (pe__group_flag_is_set(rsc, pe__group_colocated)
|| pe_rsc_is_clone(rsc->parent)) {
// Every group member will be on same node, so sum all members
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
member = (pe_resource_t *) iter->data;
- if (pcmk_is_set(member->flags, pe_rsc_provisional)
+ if (pcmk_is_set(member->flags, pcmk_rsc_unassigned)
&& (g_list_find(all_rscs, member) == NULL)) {
member->cmds->add_utilization(member, orig_rsc, all_rscs,
utilization);
}
}
} else if (rsc->children != NULL) {
// Just add first member's utilization
member = (pe_resource_t *) rsc->children->data;
if ((member != NULL)
- && pcmk_is_set(member->flags, pe_rsc_provisional)
+ && pcmk_is_set(member->flags, pcmk_rsc_unassigned)
&& (g_list_find(all_rscs, member) == NULL)) {
member->cmds->add_utilization(member, orig_rsc, all_rscs,
utilization);
}
}
}
// Group implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__group_shutdown_lock(pe_resource_t *rsc)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_group));
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *member = (pe_resource_t *) iter->data;
member->cmds->shutdown_lock(member);
}
}
diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c
index bb33d7f15d..8c56a43117 100644
--- a/lib/pacemaker/pcmk_sched_instances.c
+++ b/lib/pacemaker/pcmk_sched_instances.c
@@ -1,1670 +1,1670 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
/* This file is intended for code usable with both clone instances and bundle
* replica containers.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Check whether a node is allowed to run an instance
*
* \param[in] instance Clone instance or bundle container to check
* \param[in] node Node to check
* \param[in] max_per_node Maximum number of instances allowed to run on a node
*
* \return true if \p node is allowed to run \p instance, otherwise false
*/
static bool
can_run_instance(const pe_resource_t *instance, const pe_node_t *node,
int max_per_node)
{
pe_node_t *allowed_node = NULL;
if (pcmk_is_set(instance->flags, pcmk_rsc_removed)) {
pe_rsc_trace(instance, "%s cannot run on %s: orphaned",
instance->id, pe__node_name(node));
return false;
}
if (!pcmk__node_available(node, false, false)) {
pe_rsc_trace(instance,
"%s cannot run on %s: node cannot run resources",
instance->id, pe__node_name(node));
return false;
}
allowed_node = pcmk__top_allowed_node(instance, node);
if (allowed_node == NULL) {
crm_warn("%s cannot run on %s: node not allowed",
instance->id, pe__node_name(node));
return false;
}
if (allowed_node->weight < 0) {
pe_rsc_trace(instance, "%s cannot run on %s: parent score is %s there",
instance->id, pe__node_name(node),
pcmk_readable_score(allowed_node->weight));
return false;
}
if (allowed_node->count >= max_per_node) {
pe_rsc_trace(instance,
"%s cannot run on %s: node already has %d instance%s",
instance->id, pe__node_name(node), max_per_node,
pcmk__plural_s(max_per_node));
return false;
}
pe_rsc_trace(instance, "%s can run on %s (%d already running)",
instance->id, pe__node_name(node), allowed_node->count);
return true;
}
/*!
* \internal
* \brief Ban a clone instance or bundle replica from unavailable allowed nodes
*
* \param[in,out] instance Clone instance or bundle replica to ban
* \param[in] max_per_node Maximum instances allowed to run on a node
*/
static void
ban_unavailable_allowed_nodes(pe_resource_t *instance, int max_per_node)
{
if (instance->allowed_nodes != NULL) {
GHashTableIter iter;
pe_node_t *node = NULL;
g_hash_table_iter_init(&iter, instance->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (!can_run_instance(instance, node, max_per_node)) {
pe_rsc_trace(instance, "Banning %s from unavailable node %s",
instance->id, pe__node_name(node));
node->weight = -INFINITY;
for (GList *child_iter = instance->children;
child_iter != NULL; child_iter = child_iter->next) {
pe_resource_t *child = (pe_resource_t *) child_iter->data;
pe_node_t *child_node = NULL;
child_node = g_hash_table_lookup(child->allowed_nodes,
node->details->id);
if (child_node != NULL) {
pe_rsc_trace(instance,
"Banning %s child %s "
"from unavailable node %s",
instance->id, child->id,
pe__node_name(node));
child_node->weight = -INFINITY;
}
}
}
}
}
}
/*!
* \internal
* \brief Create a hash table with a single node in it
*
* \param[in] node Node to copy into new table
*
* \return Newly created hash table containing a copy of \p node
* \note The caller is responsible for freeing the result with
* g_hash_table_destroy().
*/
static GHashTable *
new_node_table(pe_node_t *node)
{
GHashTable *table = pcmk__strkey_table(NULL, free);
node = pe__copy_node(node);
g_hash_table_insert(table, (gpointer) node->details->id, node);
return table;
}
/*!
* \internal
* \brief Apply a resource's parent's colocation scores to a node table
*
* \param[in] rsc Resource whose colocations should be applied
* \param[in,out] nodes Node table to apply colocations to
*/
static void
apply_parent_colocations(const pe_resource_t *rsc, GHashTable **nodes)
{
GList *colocations = pcmk__this_with_colocations(rsc);
for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
const pcmk__colocation_t *colocation = iter->data;
pe_resource_t *other = colocation->primary;
float factor = colocation->score / (float) INFINITY;
other->cmds->add_colocated_node_scores(other, rsc, rsc->id, nodes,
colocation, factor,
pcmk__coloc_select_default);
}
g_list_free(colocations);
colocations = pcmk__with_this_colocations(rsc);
for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
const pcmk__colocation_t *colocation = iter->data;
pe_resource_t *other = colocation->dependent;
float factor = colocation->score / (float) INFINITY;
if (!pcmk__colocation_has_influence(colocation, rsc)) {
continue;
}
other->cmds->add_colocated_node_scores(other, rsc, rsc->id, nodes,
colocation, factor,
pcmk__coloc_select_nonnegative);
}
g_list_free(colocations);
}
/*!
* \internal
* \brief Compare clone or bundle instances based on colocation scores
*
* Determine the relative order in which two clone or bundle instances should be
* assigned to nodes, considering the scores of colocation constraints directly
* or indirectly involving them.
*
* \param[in] instance1 First instance to compare
* \param[in] instance2 Second instance to compare
*
* \return A negative number if \p instance1 should be assigned first,
* a positive number if \p instance2 should be assigned first,
* or 0 if assignment order doesn't matter
*/
static int
cmp_instance_by_colocation(const pe_resource_t *instance1,
const pe_resource_t *instance2)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
pe_node_t *current_node1 = pe__current_node(instance1);
pe_node_t *current_node2 = pe__current_node(instance2);
GHashTable *colocated_scores1 = NULL;
GHashTable *colocated_scores2 = NULL;
CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL)
&& (instance2 != NULL) && (instance2->parent != NULL)
&& (current_node1 != NULL) && (current_node2 != NULL));
// Create node tables initialized with each node
colocated_scores1 = new_node_table(current_node1);
colocated_scores2 = new_node_table(current_node2);
// Apply parental colocations
apply_parent_colocations(instance1, &colocated_scores1);
apply_parent_colocations(instance2, &colocated_scores2);
// Find original nodes again, with scores updated for colocations
node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id);
node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id);
// Compare nodes by updated scores
if (node1->weight < node2->weight) {
crm_trace("Assign %s (%d on %s) after %s (%d on %s)",
instance1->id, node1->weight, pe__node_name(node1),
instance2->id, node2->weight, pe__node_name(node2));
rc = 1;
} else if (node1->weight > node2->weight) {
crm_trace("Assign %s (%d on %s) before %s (%d on %s)",
instance1->id, node1->weight, pe__node_name(node1),
instance2->id, node2->weight, pe__node_name(node2));
rc = -1;
}
g_hash_table_destroy(colocated_scores1);
g_hash_table_destroy(colocated_scores2);
return rc;
}
/*!
* \internal
* \brief Check whether a resource or any of its children are failed
*
* \param[in] rsc Resource to check
*
* \return true if \p rsc or any of its children are failed, otherwise false
*/
static bool
did_fail(const pe_resource_t *rsc)
{
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
return true;
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
if (did_fail((const pe_resource_t *) iter->data)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether a node is allowed to run a resource
*
* \param[in] rsc Resource to check
* \param[in,out] node Node to check (will be set NULL if not allowed)
*
* \return true if *node is either NULL or allowed for \p rsc, otherwise false
*/
static bool
node_is_allowed(const pe_resource_t *rsc, pe_node_t **node)
{
if (*node != NULL) {
pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes,
(*node)->details->id);
if ((allowed == NULL) || (allowed->weight < 0)) {
pe_rsc_trace(rsc, "%s: current location (%s) is unavailable",
rsc->id, pe__node_name(*node));
*node = NULL;
return false;
}
}
return true;
}
/*!
* \internal
* \brief Compare two clone or bundle instances' instance numbers
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a's instance number is lower,
* a positive number if \p b's instance number is lower,
* or 0 if their instance numbers are the same
*/
gint
pcmk__cmp_instance_number(gconstpointer a, gconstpointer b)
{
const pe_resource_t *instance1 = (const pe_resource_t *) a;
const pe_resource_t *instance2 = (const pe_resource_t *) b;
char *div1 = NULL;
char *div2 = NULL;
CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
// Clone numbers are after a colon, bundle numbers after a dash
div1 = strrchr(instance1->id, ':');
if (div1 == NULL) {
div1 = strrchr(instance1->id, '-');
}
div2 = strrchr(instance2->id, ':');
if (div2 == NULL) {
div2 = strrchr(instance2->id, '-');
}
CRM_ASSERT((div1 != NULL) && (div2 != NULL));
return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10));
}
/*!
* \internal
* \brief Compare clone or bundle instances according to assignment order
*
* Compare two clone or bundle instances according to the order they should be
* assigned to nodes, preferring (in order):
*
* - Active instance that is less multiply active
* - Instance that is not active on a disallowed node
* - Instance with higher configured priority
* - Active instance whose current node can run resources
* - Active instance whose parent is allowed on current node
* - Active instance whose current node has fewer other instances
* - Active instance
* - Instance that isn't failed
* - Instance whose colocations result in higher score on current node
* - Instance with lower ID in lexicographic order
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a should be assigned first,
* a positive number if \p b should be assigned first,
* or 0 if assignment order doesn't matter
*/
gint
pcmk__cmp_instance(gconstpointer a, gconstpointer b)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
unsigned int nnodes1 = 0;
unsigned int nnodes2 = 0;
bool can1 = true;
bool can2 = true;
const pe_resource_t *instance1 = (const pe_resource_t *) a;
const pe_resource_t *instance2 = (const pe_resource_t *) b;
CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
node1 = instance1->fns->active_node(instance1, &nnodes1, NULL);
node2 = instance2->fns->active_node(instance2, &nnodes2, NULL);
/* If both instances are running and at least one is multiply
* active, prefer instance that's running on fewer nodes.
*/
if ((nnodes1 > 0) && (nnodes2 > 0)) {
if (nnodes1 < nnodes2) {
crm_trace("Assign %s (active on %d) before %s (active on %d): "
"less multiply active",
instance1->id, nnodes1, instance2->id, nnodes2);
return -1;
} else if (nnodes1 > nnodes2) {
crm_trace("Assign %s (active on %d) after %s (active on %d): "
"more multiply active",
instance1->id, nnodes1, instance2->id, nnodes2);
return 1;
}
}
/* An instance that is either inactive or active on an allowed node is
* preferred over an instance that is active on a no-longer-allowed node.
*/
can1 = node_is_allowed(instance1, &node1);
can2 = node_is_allowed(instance2, &node2);
if (can1 && !can2) {
crm_trace("Assign %s before %s: not active on a disallowed node",
instance1->id, instance2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("Assign %s after %s: active on a disallowed node",
instance1->id, instance2->id);
return 1;
}
// Prefer instance with higher configured priority
if (instance1->priority > instance2->priority) {
crm_trace("Assign %s before %s: priority (%d > %d)",
instance1->id, instance2->id,
instance1->priority, instance2->priority);
return -1;
} else if (instance1->priority < instance2->priority) {
crm_trace("Assign %s after %s: priority (%d < %d)",
instance1->id, instance2->id,
instance1->priority, instance2->priority);
return 1;
}
// Prefer active instance
if ((node1 == NULL) && (node2 == NULL)) {
crm_trace("No assignment preference for %s vs. %s: inactive",
instance1->id, instance2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("Assign %s after %s: active", instance1->id, instance2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("Assign %s before %s: active", instance1->id, instance2->id);
return -1;
}
// Prefer instance whose current node can run resources
can1 = pcmk__node_available(node1, false, false);
can2 = pcmk__node_available(node2, false, false);
if (can1 && !can2) {
crm_trace("Assign %s before %s: current node can run resources",
instance1->id, instance2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("Assign %s after %s: current node can't run resources",
instance1->id, instance2->id);
return 1;
}
// Prefer instance whose parent is allowed to run on instance's current node
node1 = pcmk__top_allowed_node(instance1, node1);
node2 = pcmk__top_allowed_node(instance2, node2);
if ((node1 == NULL) && (node2 == NULL)) {
crm_trace("No assignment preference for %s vs. %s: "
"parent not allowed on either instance's current node",
instance1->id, instance2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("Assign %s after %s: parent not allowed on current node",
instance1->id, instance2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("Assign %s before %s: parent allowed on current node",
instance1->id, instance2->id);
return -1;
}
// Prefer instance whose current node is running fewer other instances
if (node1->count < node2->count) {
crm_trace("Assign %s before %s: fewer active instances on current node",
instance1->id, instance2->id);
return -1;
} else if (node1->count > node2->count) {
crm_trace("Assign %s after %s: more active instances on current node",
instance1->id, instance2->id);
return 1;
}
// Prefer instance that isn't failed
can1 = did_fail(instance1);
can2 = did_fail(instance2);
if (!can1 && can2) {
crm_trace("Assign %s before %s: not failed",
instance1->id, instance2->id);
return -1;
} else if (can1 && !can2) {
crm_trace("Assign %s after %s: failed",
instance1->id, instance2->id);
return 1;
}
// Prefer instance with higher cumulative colocation score on current node
rc = cmp_instance_by_colocation(instance1, instance2);
if (rc != 0) {
return rc;
}
// Prefer instance with lower instance number
rc = pcmk__cmp_instance_number(instance1, instance2);
if (rc < 0) {
crm_trace("Assign %s before %s: instance number",
instance1->id, instance2->id);
} else if (rc > 0) {
crm_trace("Assign %s after %s: instance number",
instance1->id, instance2->id);
} else {
crm_trace("No assignment preference for %s vs. %s",
instance1->id, instance2->id);
}
return rc;
}
/*!
* \internal
* \brief Increment the parent's instance count after assigning an instance
*
* An instance's parent tracks how many instances have been assigned to each
* node via its pe_node_t:count member. After assigning an instance to a node,
* find the corresponding node in the parent's allowed table and increment it.
*
* \param[in,out] instance Instance whose parent to update
* \param[in] assigned_to Node to which the instance was assigned
*/
static void
increment_parent_count(pe_resource_t *instance, const pe_node_t *assigned_to)
{
pe_node_t *allowed = NULL;
if (assigned_to == NULL) {
return;
}
allowed = pcmk__top_allowed_node(instance, assigned_to);
if (allowed == NULL) {
/* The instance is allowed on the node, but its parent isn't. This
* shouldn't be possible if the resource is managed, and we won't be
* able to limit the number of instances assigned to the node.
*/
CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pcmk_rsc_managed));
} else {
allowed->count++;
}
}
/*!
* \internal
* \brief Assign an instance to a node
*
* \param[in,out] instance Clone instance or bundle replica container
* \param[in] prefer If not NULL, attempt early assignment to this
* node, if still the best choice; otherwise,
* perform final assignment
* \param[in] max_per_node Assign at most this many instances to one node
*
* \return Node to which \p instance is assigned
*/
static const pe_node_t *
assign_instance(pe_resource_t *instance, const pe_node_t *prefer,
int max_per_node)
{
pe_node_t *chosen = NULL;
pe_rsc_trace(instance, "Assigning %s (preferring %s)", instance->id,
((prefer == NULL)? "no node" : prefer->details->uname));
if (pcmk_is_set(instance->flags, pe_rsc_allocating)) {
pe_rsc_debug(instance,
"Assignment loop detected involving %s colocations",
instance->id);
return NULL;
}
ban_unavailable_allowed_nodes(instance, max_per_node);
// Failed early assignments are reversible (stop_if_fail=false)
chosen = instance->cmds->assign(instance, prefer, (prefer == NULL));
increment_parent_count(instance, chosen);
return chosen;
}
/*!
* \internal
* \brief Try to assign an instance to its current node early
*
* \param[in] rsc Clone or bundle being assigned (for logs only)
* \param[in] instance Clone instance or bundle replica container
* \param[in] current Instance's current node
* \param[in] max_per_node Maximum number of instances per node
* \param[in] available Number of instances still available for assignment
*
* \return \c true if \p instance was successfully assigned to its current node,
* or \c false otherwise
*/
static bool
assign_instance_early(const pe_resource_t *rsc, pe_resource_t *instance,
const pe_node_t *current, int max_per_node, int available)
{
const pe_node_t *chosen = NULL;
int reserved = 0;
pe_resource_t *parent = instance->parent;
GHashTable *allowed_orig = NULL;
GHashTable *allowed_orig_parent = parent->allowed_nodes;
const pe_node_t *allowed_node = g_hash_table_lookup(instance->allowed_nodes,
current->details->id);
pe_rsc_trace(instance, "Trying to assign %s to its current node %s",
instance->id, pe__node_name(current));
if (!pcmk__node_available(allowed_node, true, false)) {
pe_rsc_info(instance,
"Not assigning %s to current node %s: unavailable",
instance->id, pe__node_name(current));
return false;
}
/* On each iteration, if instance gets assigned to a node other than its
* current one, we reserve one instance for the chosen node, unassign
* instance, restore instance's original node tables, and try again. This
* way, instances are proportionally assigned to nodes based on preferences,
* but shuffling of specific instances is minimized. If a node will be
* assigned instances at all, it preferentially receives instances that are
* currently active there.
*
* parent->allowed_nodes tracks the number of instances assigned to each
* node. If a node already has max_per_node instances assigned,
* ban_unavailable_allowed_nodes() marks it as unavailable.
*
* In the end, we restore the original parent->allowed_nodes to undo the
* changes to counts during tentative assignments. If we successfully
* assigned instance to its current node, we increment that node's counter.
*/
// Back up the allowed node tables of instance and its children recursively
pcmk__copy_node_tables(instance, &allowed_orig);
// Update instances-per-node counts in a scratch table
parent->allowed_nodes = pcmk__copy_node_table(parent->allowed_nodes);
while (reserved < available) {
chosen = assign_instance(instance, current, max_per_node);
if (pe__same_node(chosen, current)) {
// Successfully assigned to current node
break;
}
// Assignment updates scores, so restore to original state
pe_rsc_debug(instance, "Rolling back node scores for %s", instance->id);
pcmk__restore_node_tables(instance, allowed_orig);
if (chosen == NULL) {
// Assignment failed, so give up
pe_rsc_info(instance,
"Not assigning %s to current node %s: unavailable",
instance->id, pe__node_name(current));
- pe__set_resource_flags(instance, pe_rsc_provisional);
+ pe__set_resource_flags(instance, pcmk_rsc_unassigned);
break;
}
// We prefer more strongly to assign an instance to the chosen node
pe_rsc_debug(instance,
"Not assigning %s to current node %s: %s is better",
instance->id, pe__node_name(current),
pe__node_name(chosen));
// Reserve one instance for the chosen node and try again
if (++reserved >= available) {
pe_rsc_info(instance,
"Not assigning %s to current node %s: "
"other assignments are more important",
instance->id, pe__node_name(current));
} else {
pe_rsc_debug(instance,
"Reserved an instance of %s for %s. Retrying "
"assignment of %s to %s",
rsc->id, pe__node_name(chosen), instance->id,
pe__node_name(current));
}
// Clear this assignment (frees chosen); leave instance counts in parent
pcmk__unassign_resource(instance);
chosen = NULL;
}
g_hash_table_destroy(allowed_orig);
// Restore original instances-per-node counts
g_hash_table_destroy(parent->allowed_nodes);
parent->allowed_nodes = allowed_orig_parent;
if (chosen == NULL) {
// Couldn't assign instance to current node
return false;
}
pe_rsc_trace(instance, "Assigned %s to current node %s",
instance->id, pe__node_name(current));
increment_parent_count(instance, chosen);
return true;
}
/*!
* \internal
* \brief Reset the node counts of a resource's allowed nodes to zero
*
* \param[in,out] rsc Resource to reset
*
* \return Number of nodes that are available to run resources
*/
static unsigned int
reset_allowed_node_counts(pe_resource_t *rsc)
{
unsigned int available_nodes = 0;
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
node->count = 0;
if (pcmk__node_available(node, false, false)) {
available_nodes++;
}
}
return available_nodes;
}
/*!
* \internal
* \brief Check whether an instance has a preferred node
*
* \param[in] instance Clone instance or bundle replica container
* \param[in] optimal_per_node Optimal number of instances per node
*
* \return Instance's current node if still available, otherwise NULL
*/
static const pe_node_t *
preferred_node(const pe_resource_t *instance, int optimal_per_node)
{
const pe_node_t *node = NULL;
const pe_node_t *parent_node = NULL;
// Check whether instance is active, healthy, and not yet assigned
if ((instance->running_on == NULL)
- || !pcmk_is_set(instance->flags, pe_rsc_provisional)
+ || !pcmk_is_set(instance->flags, pcmk_rsc_unassigned)
|| pcmk_is_set(instance->flags, pe_rsc_failed)) {
return NULL;
}
// Check whether instance's current node can run resources
node = pe__current_node(instance);
if (!pcmk__node_available(node, true, false)) {
pe_rsc_trace(instance, "Not assigning %s to %s early (unavailable)",
instance->id, pe__node_name(node));
return NULL;
}
// Check whether node already has optimal number of instances assigned
parent_node = pcmk__top_allowed_node(instance, node);
if ((parent_node != NULL) && (parent_node->count >= optimal_per_node)) {
pe_rsc_trace(instance,
"Not assigning %s to %s early "
"(optimal instances already assigned)",
instance->id, pe__node_name(node));
return NULL;
}
return node;
}
/*!
* \internal
* \brief Assign collective instances to nodes
*
* \param[in,out] collective Clone or bundle resource being assigned
* \param[in,out] instances List of clone instances or bundle containers
* \param[in] max_total Maximum instances to assign in total
* \param[in] max_per_node Maximum instances to assign to any one node
*/
void
pcmk__assign_instances(pe_resource_t *collective, GList *instances,
int max_total, int max_per_node)
{
// Reuse node count to track number of assigned instances
unsigned int available_nodes = reset_allowed_node_counts(collective);
int optimal_per_node = 0;
int assigned = 0;
GList *iter = NULL;
pe_resource_t *instance = NULL;
const pe_node_t *current = NULL;
if (available_nodes > 0) {
optimal_per_node = max_total / available_nodes;
}
if (optimal_per_node < 1) {
optimal_per_node = 1;
}
pe_rsc_debug(collective,
"Assigning up to %d %s instance%s to up to %u node%s "
"(at most %d per host, %d optimal)",
max_total, collective->id, pcmk__plural_s(max_total),
available_nodes, pcmk__plural_s(available_nodes),
max_per_node, optimal_per_node);
// Assign as many instances as possible to their current location
for (iter = instances; (iter != NULL) && (assigned < max_total);
iter = iter->next) {
int available = max_total - assigned;
instance = iter->data;
- if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) {
+ if (!pcmk_is_set(instance->flags, pcmk_rsc_unassigned)) {
continue; // Already assigned
}
current = preferred_node(instance, optimal_per_node);
if ((current != NULL)
&& assign_instance_early(collective, instance, current,
max_per_node, available)) {
assigned++;
}
}
pe_rsc_trace(collective, "Assigned %d of %d instance%s to current node",
assigned, max_total, pcmk__plural_s(max_total));
for (iter = instances; iter != NULL; iter = iter->next) {
instance = (pe_resource_t *) iter->data;
- if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) {
+ if (!pcmk_is_set(instance->flags, pcmk_rsc_unassigned)) {
continue; // Already assigned
}
if (instance->running_on != NULL) {
current = pe__current_node(instance);
if (pcmk__top_allowed_node(instance, current) == NULL) {
const char *unmanaged = "";
if (!pcmk_is_set(instance->flags, pcmk_rsc_managed)) {
unmanaged = "Unmanaged resource ";
}
crm_notice("%s%s is running on %s which is no longer allowed",
unmanaged, instance->id, pe__node_name(current));
}
}
if (assigned >= max_total) {
pe_rsc_debug(collective,
"Not assigning %s because maximum %d instances "
"already assigned",
instance->id, max_total);
resource_location(instance, NULL, -INFINITY,
"collective_limit_reached", collective->cluster);
} else if (assign_instance(instance, NULL, max_per_node) != NULL) {
assigned++;
}
}
pe_rsc_debug(collective, "Assigned %d of %d possible instance%s of %s",
assigned, max_total, pcmk__plural_s(max_total),
collective->id);
}
enum instance_state {
instance_starting = (1 << 0),
instance_stopping = (1 << 1),
/* This indicates that some instance is restarting. It's not the same as
* instance_starting|instance_stopping, which would indicate that some
* instance is starting, and some instance (not necessarily the same one) is
* stopping.
*/
instance_restarting = (1 << 2),
instance_active = (1 << 3),
instance_all = instance_starting|instance_stopping
|instance_restarting|instance_active,
};
/*!
* \internal
* \brief Check whether an instance is active, starting, and/or stopping
*
* \param[in] instance Clone instance or bundle replica container
* \param[in,out] state Whether any instance is starting, stopping, etc.
*/
static void
check_instance_state(const pe_resource_t *instance, uint32_t *state)
{
const GList *iter = NULL;
uint32_t instance_state = 0; // State of just this instance
// No need to check further if all conditions have already been detected
if (pcmk_all_flags_set(*state, instance_all)) {
return;
}
// If instance is a collective (a cloned group), check its children instead
if (instance->variant > pcmk_rsc_variant_primitive) {
for (iter = instance->children;
(iter != NULL) && !pcmk_all_flags_set(*state, instance_all);
iter = iter->next) {
check_instance_state((const pe_resource_t *) iter->data, state);
}
return;
}
// If we get here, instance is a primitive
if (instance->running_on != NULL) {
instance_state |= instance_active;
}
// Check each of the instance's actions for runnable start or stop
for (iter = instance->actions;
(iter != NULL) && !pcmk_all_flags_set(instance_state,
instance_starting
|instance_stopping);
iter = iter->next) {
const pe_action_t *action = (const pe_action_t *) iter->data;
const bool optional = pcmk_is_set(action->flags, pe_action_optional);
if (pcmk__str_eq(PCMK_ACTION_START, action->task, pcmk__str_none)) {
if (!optional && pcmk_is_set(action->flags, pe_action_runnable)) {
pe_rsc_trace(instance, "Instance is starting due to %s",
action->uuid);
instance_state |= instance_starting;
} else {
pe_rsc_trace(instance, "%s doesn't affect %s state (%s)",
action->uuid, instance->id,
(optional? "optional" : "unrunnable"));
}
} else if (pcmk__str_eq(PCMK_ACTION_STOP, action->task,
pcmk__str_none)) {
/* Only stop actions can be pseudo-actions for primitives. That
* indicates that the node they are on is being fenced, so the stop
* is implied rather than actually executed.
*/
if (!optional
&& pcmk_any_flags_set(action->flags,
pe_action_pseudo|pe_action_runnable)) {
pe_rsc_trace(instance, "Instance is stopping due to %s",
action->uuid);
instance_state |= instance_stopping;
} else {
pe_rsc_trace(instance, "%s doesn't affect %s state (%s)",
action->uuid, instance->id,
(optional? "optional" : "unrunnable"));
}
}
}
if (pcmk_all_flags_set(instance_state,
instance_starting|instance_stopping)) {
instance_state |= instance_restarting;
}
*state |= instance_state;
}
/*!
* \internal
* \brief Create actions for collective resource instances
*
* \param[in,out] collective Clone or bundle resource to create actions for
* \param[in,out] instances List of clone instances or bundle containers
*/
void
pcmk__create_instance_actions(pe_resource_t *collective, GList *instances)
{
uint32_t state = 0;
pe_action_t *stop = NULL;
pe_action_t *stopped = NULL;
pe_action_t *start = NULL;
pe_action_t *started = NULL;
pe_rsc_trace(collective, "Creating collective instance actions for %s",
collective->id);
// Create actions for each instance appropriate to its variant
for (GList *iter = instances; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
instance->cmds->create_actions(instance);
check_instance_state(instance, &state);
}
// Create pseudo-actions for rsc start and started
start = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_START,
!pcmk_is_set(state, instance_starting),
true);
started = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_RUNNING,
!pcmk_is_set(state, instance_starting),
false);
started->priority = INFINITY;
if (pcmk_any_flags_set(state, instance_active|instance_starting)) {
pe__set_action_flags(started, pe_action_runnable);
}
// Create pseudo-actions for rsc stop and stopped
stop = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOP,
!pcmk_is_set(state, instance_stopping),
true);
stopped = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOPPED,
!pcmk_is_set(state, instance_stopping),
true);
stopped->priority = INFINITY;
if (!pcmk_is_set(state, instance_restarting)) {
pe__set_action_flags(stop, pe_action_migrate_runnable);
}
if (collective->variant == pcmk_rsc_variant_clone) {
pe__create_clone_notif_pseudo_ops(collective, start, started, stop,
stopped);
}
}
/*!
* \internal
* \brief Get a list of clone instances or bundle replica containers
*
* \param[in] rsc Clone or bundle resource
*
* \return Clone instances if \p rsc is a clone, or a newly created list of
* \p rsc's replica containers if \p rsc is a bundle
* \note The caller must call free_instance_list() on the result when the list
* is no longer needed.
*/
static inline GList *
get_instance_list(const pe_resource_t *rsc)
{
if (rsc->variant == pcmk_rsc_variant_bundle) {
return pe__bundle_containers(rsc);
} else {
return rsc->children;
}
}
/*!
* \internal
* \brief Free any memory created by get_instance_list()
*
* \param[in] rsc Clone or bundle resource passed to get_instance_list()
* \param[in,out] list Return value of get_instance_list() for \p rsc
*/
static inline void
free_instance_list(const pe_resource_t *rsc, GList *list)
{
if (list != rsc->children) {
g_list_free(list);
}
}
/*!
* \internal
* \brief Check whether an instance is compatible with a role and node
*
* \param[in] instance Clone instance or bundle replica container
* \param[in] node Instance must match this node
* \param[in] role If not pcmk_role_unknown, instance must match this role
* \param[in] current If true, compare instance's original node and role,
* otherwise compare assigned next node and role
*
* \return true if \p instance is compatible with \p node and \p role,
* otherwise false
*/
bool
pcmk__instance_matches(const pe_resource_t *instance, const pe_node_t *node,
enum rsc_role_e role, bool current)
{
pe_node_t *instance_node = NULL;
CRM_CHECK((instance != NULL) && (node != NULL), return false);
if ((role != pcmk_role_unknown)
&& (role != instance->fns->state(instance, current))) {
pe_rsc_trace(instance,
"%s is not a compatible instance (role is not %s)",
instance->id, role2text(role));
return false;
}
if (!is_set_recursive(instance, pcmk_rsc_blocked, true)) {
// We only want instances that haven't failed
instance_node = instance->fns->location(instance, NULL, current);
}
if (instance_node == NULL) {
pe_rsc_trace(instance,
"%s is not a compatible instance (not assigned to a node)",
instance->id);
return false;
}
if (!pe__same_node(instance_node, node)) {
pe_rsc_trace(instance,
"%s is not a compatible instance (assigned to %s not %s)",
instance->id, pe__node_name(instance_node),
pe__node_name(node));
return false;
}
return true;
}
/*!
* \internal
* \brief Find an instance that matches a given resource by node and role
*
* \param[in] match_rsc Resource that instance must match (for logging only)
* \param[in] rsc Clone or bundle resource to check for matching instance
* \param[in] node Instance must match this node
* \param[in] role If not pcmk_role_unknown, instance must match this role
* \param[in] current If true, compare instance's original node and role,
* otherwise compare assigned next node and role
*
* \return \p rsc instance matching \p node and \p role if any, otherwise NULL
*/
static pe_resource_t *
find_compatible_instance_on_node(const pe_resource_t *match_rsc,
const pe_resource_t *rsc,
const pe_node_t *node, enum rsc_role_e role,
bool current)
{
GList *instances = NULL;
instances = get_instance_list(rsc);
for (GList *iter = instances; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
if (pcmk__instance_matches(instance, node, role, current)) {
pe_rsc_trace(match_rsc,
"Found %s %s instance %s compatible with %s on %s",
role == pcmk_role_unknown? "matching" : role2text(role),
rsc->id, instance->id, match_rsc->id,
pe__node_name(node));
free_instance_list(rsc, instances); // Only frees list, not contents
return instance;
}
}
free_instance_list(rsc, instances);
pe_rsc_trace(match_rsc, "No %s %s instance found compatible with %s on %s",
((role == pcmk_role_unknown)? "matching" : role2text(role)),
rsc->id, match_rsc->id, pe__node_name(node));
return NULL;
}
/*!
* \internal
* \brief Find a clone instance or bundle container compatible with a resource
*
* \param[in] match_rsc Resource that instance must match
* \param[in] rsc Clone or bundle resource to check for matching instance
* \param[in] role If not pcmk_role_unknown, instance must match this role
* \param[in] current If true, compare instance's original node and role,
* otherwise compare assigned next node and role
*
* \return Compatible (by \p role and \p match_rsc location) instance of \p rsc
* if any, otherwise NULL
*/
pe_resource_t *
pcmk__find_compatible_instance(const pe_resource_t *match_rsc,
const pe_resource_t *rsc, enum rsc_role_e role,
bool current)
{
pe_resource_t *instance = NULL;
GList *nodes = NULL;
const pe_node_t *node = match_rsc->fns->location(match_rsc, NULL, current);
// If match_rsc has a node, check only that node
if (node != NULL) {
return find_compatible_instance_on_node(match_rsc, rsc, node, role,
current);
}
// Otherwise check for an instance matching any of match_rsc's allowed nodes
nodes = pcmk__sort_nodes(g_hash_table_get_values(match_rsc->allowed_nodes),
NULL);
for (GList *iter = nodes; (iter != NULL) && (instance == NULL);
iter = iter->next) {
instance = find_compatible_instance_on_node(match_rsc, rsc,
(pe_node_t *) iter->data,
role, current);
}
if (instance == NULL) {
pe_rsc_debug(rsc, "No %s instance found compatible with %s",
rsc->id, match_rsc->id);
}
g_list_free(nodes);
return instance;
}
/*!
* \internal
* \brief Unassign an instance if mandatory ordering has no interleave match
*
* \param[in] first 'First' action in an ordering
* \param[in] then 'Then' action in an ordering
* \param[in,out] then_instance 'Then' instance that has no interleave match
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in] current If true, "then" action is stopped or demoted
*
* \return true if \p then_instance was unassigned, otherwise false
*/
static bool
unassign_if_mandatory(const pe_action_t *first, const pe_action_t *then,
pe_resource_t *then_instance, uint32_t type, bool current)
{
// Allow "then" instance to go down even without an interleave match
if (current) {
pe_rsc_trace(then->rsc,
"%s has no instance to order before stopping "
"or demoting %s",
first->rsc->id, then_instance->id);
/* If the "first" action must be runnable, but there is no "first"
* instance, the "then" instance must not be allowed to come up.
*/
} else if (pcmk_any_flags_set(type, pe_order_runnable_left
|pe_order_implies_then)) {
pe_rsc_info(then->rsc,
"Inhibiting %s from being active "
"because there is no %s instance to interleave",
then_instance->id, first->rsc->id);
return pcmk__assign_resource(then_instance, NULL, true, true);
}
return false;
}
/*!
* \internal
* \brief Find first matching action for a clone instance or bundle container
*
* \param[in] action Action in an interleaved ordering
* \param[in] instance Clone instance or bundle container being interleaved
* \param[in] action_name Action to look for
* \param[in] node If not NULL, require action to be on this node
* \param[in] for_first If true, \p instance is the 'first' resource in the
* ordering, otherwise it is the 'then' resource
*
* \return First action for \p instance (or in some cases if \p instance is a
* bundle container, its containerized resource) that matches
* \p action_name and \p node if any, otherwise NULL
*/
static pe_action_t *
find_instance_action(const pe_action_t *action, const pe_resource_t *instance,
const char *action_name, const pe_node_t *node,
bool for_first)
{
const pe_resource_t *rsc = NULL;
pe_action_t *matching_action = NULL;
/* If instance is a bundle container, sometimes we should interleave the
* action for the container itself, and sometimes for the containerized
* resource.
*
* For example, given "start bundle A then bundle B", B likely requires the
* service inside A's container to be active, rather than just the
* container, so we should interleave the action for A's containerized
* resource. On the other hand, it's possible B's container itself requires
* something from A, so we should interleave the action for B's container.
*
* Essentially, for 'first', we should use the containerized resource for
* everything except stop, and for 'then', we should use the container for
* everything except promote and demote (which can only be performed on the
* containerized resource).
*/
if ((for_first && !pcmk__str_any_of(action->task, PCMK_ACTION_STOP,
PCMK_ACTION_STOPPED, NULL))
|| (!for_first && pcmk__str_any_of(action->task, PCMK_ACTION_PROMOTE,
PCMK_ACTION_PROMOTED,
PCMK_ACTION_DEMOTE,
PCMK_ACTION_DEMOTED, NULL))) {
rsc = pe__get_rsc_in_container(instance);
}
if (rsc == NULL) {
rsc = instance; // No containerized resource, use instance itself
} else {
node = NULL; // Containerized actions are on bundle-created guest
}
matching_action = find_first_action(rsc->actions, NULL, action_name, node);
if (matching_action != NULL) {
return matching_action;
}
if (pcmk_is_set(instance->flags, pcmk_rsc_removed)
|| pcmk__str_any_of(action_name, PCMK_ACTION_STOP, PCMK_ACTION_DEMOTE,
NULL)) {
crm_trace("No %s action found for %s%s",
action_name,
pcmk_is_set(instance->flags, pcmk_rsc_removed)? "orphan " : "",
instance->id);
} else {
crm_err("No %s action found for %s to interleave (bug?)",
action_name, instance->id);
}
return NULL;
}
/*!
* \internal
* \brief Get the original action name of a bundle or clone action
*
* Given an action for a bundle or clone, get the original action name,
* mapping notify to the action being notified, and if the instances are
* primitives, mapping completion actions to the action that was completed
* (for example, stopped to stop).
*
* \param[in] action Clone or bundle action to check
*
* \return Original action name for \p action
*/
static const char *
orig_action_name(const pe_action_t *action)
{
const pe_resource_t *instance = action->rsc->children->data; // Any instance
char *action_type = NULL;
const char *action_name = action->task;
enum action_tasks orig_task = pcmk_action_unspecified;
if (pcmk__strcase_any_of(action->task, PCMK_ACTION_NOTIFY,
PCMK_ACTION_NOTIFIED, NULL)) {
// action->uuid is RSC_(confirmed-){pre,post}_notify_ACTION_INTERVAL
CRM_CHECK(parse_op_key(action->uuid, NULL, &action_type, NULL),
return task2text(pcmk_action_unspecified));
action_name = strstr(action_type, "_notify_");
CRM_CHECK(action_name != NULL,
return task2text(pcmk_action_unspecified));
action_name += strlen("_notify_");
}
orig_task = get_complex_task(instance, action_name);
free(action_type);
return task2text(orig_task);
}
/*!
* \internal
* \brief Update two interleaved actions according to an ordering between them
*
* Given information about an ordering of two interleaved actions, update the
* actions' flags (and runnable_before members if appropriate) as appropriate
* for the ordering. Effects may cascade to other orderings involving the
* actions as well.
*
* \param[in,out] first 'First' action in an ordering
* \param[in,out] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
static uint32_t
update_interleaved_actions(pe_action_t *first, pe_action_t *then,
const pe_node_t *node, uint32_t filter,
uint32_t type)
{
GList *instances = NULL;
uint32_t changed = pcmk__updated_none;
const char *orig_first_task = orig_action_name(first);
// Stops and demotes must be interleaved with instance on current node
bool current = pcmk__ends_with(first->uuid, "_" PCMK_ACTION_STOPPED "_0")
|| pcmk__ends_with(first->uuid,
"_" PCMK_ACTION_DEMOTED "_0");
// Update the specified actions for each "then" instance individually
instances = get_instance_list(then->rsc);
for (GList *iter = instances; iter != NULL; iter = iter->next) {
pe_resource_t *first_instance = NULL;
pe_resource_t *then_instance = iter->data;
pe_action_t *first_action = NULL;
pe_action_t *then_action = NULL;
// Find a "first" instance to interleave with this "then" instance
first_instance = pcmk__find_compatible_instance(then_instance,
first->rsc,
pcmk_role_unknown,
current);
if (first_instance == NULL) { // No instance can be interleaved
if (unassign_if_mandatory(first, then, then_instance, type,
current)) {
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
continue;
}
first_action = find_instance_action(first, first_instance,
orig_first_task, node, true);
if (first_action == NULL) {
continue;
}
then_action = find_instance_action(then, then_instance, then->task,
node, false);
if (then_action == NULL) {
continue;
}
if (order_actions(first_action, then_action, type)) {
pcmk__set_updated_flags(changed, first,
pcmk__updated_first|pcmk__updated_then);
}
changed |= then_instance->cmds->update_ordered_actions(
first_action, then_action, node,
first_instance->cmds->action_flags(first_action, node), filter,
type, then->rsc->cluster);
}
free_instance_list(then->rsc, instances);
return changed;
}
/*!
* \internal
* \brief Check whether two actions in an ordering can be interleaved
*
* \param[in] first 'First' action in the ordering
* \param[in] then 'Then' action in the ordering
*
* \return true if \p first and \p then can be interleaved, otherwise false
*/
static bool
can_interleave_actions(const pe_action_t *first, const pe_action_t *then)
{
bool interleave = false;
pe_resource_t *rsc = NULL;
if ((first->rsc == NULL) || (then->rsc == NULL)) {
crm_trace("Not interleaving %s with %s: not resource actions",
first->uuid, then->uuid);
return false;
}
if (first->rsc == then->rsc) {
crm_trace("Not interleaving %s with %s: same resource",
first->uuid, then->uuid);
return false;
}
if ((first->rsc->variant < pcmk_rsc_variant_clone)
|| (then->rsc->variant < pcmk_rsc_variant_clone)) {
crm_trace("Not interleaving %s with %s: not clones or bundles",
first->uuid, then->uuid);
return false;
}
if (pcmk__ends_with(then->uuid, "_stop_0")
|| pcmk__ends_with(then->uuid, "_demote_0")) {
rsc = first->rsc;
} else {
rsc = then->rsc;
}
interleave = crm_is_true(g_hash_table_lookup(rsc->meta,
XML_RSC_ATTR_INTERLEAVE));
pe_rsc_trace(rsc, "'%s then %s' will %sbe interleaved (based on %s)",
first->uuid, then->uuid, (interleave? "" : "not "), rsc->id);
return interleave;
}
/*!
* \internal
* \brief Update non-interleaved instance actions according to an ordering
*
* Given information about an ordering of two non-interleaved actions, update
* the actions' flags (and runnable_before members if appropriate) as
* appropriate for the ordering. Effects may cascade to other orderings
* involving the actions as well.
*
* \param[in,out] instance Clone instance or bundle container
* \param[in,out] first "First" action in ordering
* \param[in] then "Then" action in ordering (for \p instance's parent)
* \param[in] node If not NULL, limit scope of ordering to this node
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
static uint32_t
update_noninterleaved_actions(pe_resource_t *instance, pe_action_t *first,
const pe_action_t *then, const pe_node_t *node,
uint32_t flags, uint32_t filter, uint32_t type)
{
pe_action_t *instance_action = NULL;
uint32_t instance_flags = 0;
uint32_t changed = pcmk__updated_none;
// Check whether instance has an equivalent of "then" action
instance_action = find_first_action(instance->actions, NULL, then->task,
node);
if (instance_action == NULL) {
return changed;
}
// Check whether action is runnable
instance_flags = instance->cmds->action_flags(instance_action, node);
if (!pcmk_is_set(instance_flags, pe_action_runnable)) {
return changed;
}
// If so, update actions for the instance
changed = instance->cmds->update_ordered_actions(first, instance_action,
node, flags, filter, type,
instance->cluster);
// Propagate any changes to later actions
if (pcmk_is_set(changed, pcmk__updated_then)) {
for (GList *after_iter = instance_action->actions_after;
after_iter != NULL; after_iter = after_iter->next) {
pe_action_wrapper_t *after = after_iter->data;
pcmk__update_action_for_orderings(after->action, instance->cluster);
}
}
return changed;
}
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two clone or bundle actions, update
* the actions' flags (and runnable_before members if appropriate) as
* appropriate for the ordering. Effects may cascade to other orderings
* involving the actions as well.
*
* \param[in,out] first 'First' action in an ordering
* \param[in,out] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in,out] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t
pcmk__instance_update_ordered_actions(pe_action_t *first, pe_action_t *then,
const pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set)
{
CRM_ASSERT((first != NULL) && (then != NULL) && (data_set != NULL));
if (then->rsc == NULL) {
return pcmk__updated_none;
} else if (can_interleave_actions(first, then)) {
return update_interleaved_actions(first, then, node, filter, type);
} else {
uint32_t changed = pcmk__updated_none;
GList *instances = get_instance_list(then->rsc);
// Update actions for the clone or bundle resource itself
changed |= pcmk__update_ordered_actions(first, then, node, flags,
filter, type, data_set);
// Update the 'then' clone instances or bundle containers individually
for (GList *iter = instances; iter != NULL; iter = iter->next) {
pe_resource_t *instance = iter->data;
changed |= update_noninterleaved_actions(instance, first, then,
node, flags, filter, type);
}
free_instance_list(then->rsc, instances);
return changed;
}
}
#define pe__clear_action_summary_flags(flags, action, flag) do { \
flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Action summary", action->rsc->id, \
flags, flag, #flag); \
} while (0)
/*!
* \internal
* \brief Return action flags for a given clone or bundle action
*
* \param[in,out] action Action for a clone or bundle
* \param[in] instances Clone instances or bundle containers
* \param[in] node If not NULL, limit effects to this node
*
* \return Flags appropriate to \p action on \p node
*/
uint32_t
pcmk__collective_action_flags(pe_action_t *action, const GList *instances,
const pe_node_t *node)
{
bool any_runnable = false;
const char *action_name = orig_action_name(action);
// Set original assumptions (optional and runnable may be cleared below)
uint32_t flags = pe_action_optional|pe_action_runnable|pe_action_pseudo;
for (const GList *iter = instances; iter != NULL; iter = iter->next) {
const pe_resource_t *instance = iter->data;
const pe_node_t *instance_node = NULL;
pe_action_t *instance_action = NULL;
uint32_t instance_flags;
// Node is relevant only to primitive instances
if (instance->variant == pcmk_rsc_variant_primitive) {
instance_node = node;
}
instance_action = find_first_action(instance->actions, NULL,
action_name, instance_node);
if (instance_action == NULL) {
pe_rsc_trace(action->rsc, "%s has no %s action on %s",
instance->id, action_name, pe__node_name(node));
continue;
}
pe_rsc_trace(action->rsc, "%s has %s for %s on %s",
instance->id, instance_action->uuid, action_name,
pe__node_name(node));
instance_flags = instance->cmds->action_flags(instance_action, node);
// If any instance action is mandatory, so is the collective action
if (pcmk_is_set(flags, pe_action_optional)
&& !pcmk_is_set(instance_flags, pe_action_optional)) {
pe_rsc_trace(instance, "%s is mandatory because %s is",
action->uuid, instance_action->uuid);
pe__clear_action_summary_flags(flags, action, pe_action_optional);
pe__clear_action_flags(action, pe_action_optional);
}
// If any instance action is runnable, so is the collective action
if (pcmk_is_set(instance_flags, pe_action_runnable)) {
any_runnable = true;
}
}
if (!any_runnable) {
pe_rsc_trace(action->rsc,
"%s is not runnable because no instance can run %s",
action->uuid, action_name);
pe__clear_action_summary_flags(flags, action, pe_action_runnable);
if (node == NULL) {
pe__clear_action_flags(action, pe_action_runnable);
}
}
return flags;
}
diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c
index 2a3a571d52..7a30a4bf20 100644
--- a/lib/pacemaker/pcmk_sched_primitive.c
+++ b/lib/pacemaker/pcmk_sched_primitive.c
@@ -1,1652 +1,1652 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <stdint.h> // uint8_t, uint32_t
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
static void stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
static void start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
static void demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
static void promote_resource(pe_resource_t *rsc, pe_node_t *node,
bool optional);
static void assert_role_error(pe_resource_t *rsc, pe_node_t *node,
bool optional);
#define RSC_ROLE_MAX (pcmk_role_promoted + 1)
static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* This array lists the immediate next role when transitioning from one role
* to a target role. For example, when going from Stopped to Promoted, the
* next role is Unpromoted, because the resource must be started before it
* can be promoted. The current state then becomes Started, which is fed
* into this array again, giving a next role of Promoted.
*
* Current role Immediate next role Final target role
* ------------ ------------------- -----------------
*/
/* Unknown */ { pcmk_role_unknown, /* Unknown */
pcmk_role_stopped, /* Stopped */
pcmk_role_stopped, /* Started */
pcmk_role_stopped, /* Unpromoted */
pcmk_role_stopped, /* Promoted */
},
/* Stopped */ { pcmk_role_stopped, /* Unknown */
pcmk_role_stopped, /* Stopped */
pcmk_role_started, /* Started */
pcmk_role_unpromoted, /* Unpromoted */
pcmk_role_unpromoted, /* Promoted */
},
/* Started */ { pcmk_role_stopped, /* Unknown */
pcmk_role_stopped, /* Stopped */
pcmk_role_started, /* Started */
pcmk_role_unpromoted, /* Unpromoted */
pcmk_role_promoted, /* Promoted */
},
/* Unpromoted */ { pcmk_role_stopped, /* Unknown */
pcmk_role_stopped, /* Stopped */
pcmk_role_stopped, /* Started */
pcmk_role_unpromoted, /* Unpromoted */
pcmk_role_promoted, /* Promoted */
},
/* Promoted */ { pcmk_role_stopped, /* Unknown */
pcmk_role_unpromoted, /* Stopped */
pcmk_role_unpromoted, /* Started */
pcmk_role_unpromoted, /* Unpromoted */
pcmk_role_promoted, /* Promoted */
},
};
/*!
* \internal
* \brief Function to schedule actions needed for a role change
*
* \param[in,out] rsc Resource whose role is changing
* \param[in,out] node Node where resource will be in its next role
* \param[in] optional Whether scheduled actions should be optional
*/
typedef void (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *node,
bool optional);
static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* This array lists the function needed to transition directly from one role
* to another. NULL indicates that nothing is needed.
*
* Current role Transition function Next role
* ------------ ------------------- ----------
*/
/* Unknown */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
assert_role_error, /* Started */
assert_role_error, /* Unpromoted */
assert_role_error, /* Promoted */
},
/* Stopped */ { assert_role_error, /* Unknown */
NULL, /* Stopped */
start_resource, /* Started */
start_resource, /* Unpromoted */
assert_role_error, /* Promoted */
},
/* Started */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
NULL, /* Started */
NULL, /* Unpromoted */
promote_resource, /* Promoted */
},
/* Unpromoted */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
stop_resource, /* Started */
NULL, /* Unpromoted */
promote_resource, /* Promoted */
},
/* Promoted */ { assert_role_error, /* Unknown */
demote_resource, /* Stopped */
demote_resource, /* Started */
demote_resource, /* Unpromoted */
NULL, /* Promoted */
},
};
/*!
* \internal
* \brief Get a list of a resource's allowed nodes sorted by node score
*
* \param[in] rsc Resource to check
*
* \return List of allowed nodes sorted by node score
*/
static GList *
sorted_allowed_nodes(const pe_resource_t *rsc)
{
if (rsc->allowed_nodes != NULL) {
GList *nodes = g_hash_table_get_values(rsc->allowed_nodes);
if (nodes != NULL) {
return pcmk__sort_nodes(nodes, pe__current_node(rsc));
}
}
return NULL;
}
/*!
* \internal
* \brief Assign a resource to its best allowed node, if possible
*
* \param[in,out] rsc Resource to choose a node for
* \param[in] prefer If not \c NULL, prefer this node when all else
* equal
* \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a
* node, set next role to stopped and update
* existing actions
*
* \return true if \p rsc could be assigned to a node, otherwise false
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
static bool
assign_best_node(pe_resource_t *rsc, const pe_node_t *prefer, bool stop_if_fail)
{
GList *nodes = NULL;
pe_node_t *chosen = NULL;
pe_node_t *best = NULL;
const pe_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc);
if (prefer == NULL) {
prefer = most_free_node;
}
- if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
// We've already finished assignment of resources to nodes
return rsc->allocated_to != NULL;
}
// Sort allowed nodes by score
nodes = sorted_allowed_nodes(rsc);
if (nodes != NULL) {
best = (pe_node_t *) nodes->data; // First node has best score
}
if ((prefer != NULL) && (nodes != NULL)) {
// Get the allowed node version of prefer
chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
if (chosen == NULL) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unknown",
pe__node_name(prefer), rsc->id);
/* Favor the preferred node as long as its score is at least as good as
* the best allowed node's.
*
* An alternative would be to favor the preferred node even if the best
* node is better, when the best node's score is less than INFINITY.
*/
} else if (chosen->weight < best->weight) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
pe__node_name(chosen), rsc->id);
chosen = NULL;
} else if (!pcmk__node_available(chosen, true, false)) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable",
pe__node_name(chosen), rsc->id);
chosen = NULL;
} else {
pe_rsc_trace(rsc,
"Chose preferred node %s for %s "
"(ignoring %d candidates)",
pe__node_name(chosen), rsc->id, g_list_length(nodes));
}
}
if ((chosen == NULL) && (best != NULL)) {
/* Either there is no preferred node, or the preferred node is not
* suitable, but another node is allowed to run the resource.
*/
chosen = best;
if (!pe_rsc_is_unique_clone(rsc->parent)
&& (chosen->weight > 0) // Zero not acceptable
&& pcmk__node_available(chosen, false, false)) {
/* If the resource is already running on a node, prefer that node if
* it is just as good as the chosen node.
*
* We don't do this for unique clone instances, because
* pcmk__assign_instances() has already assigned instances to their
* running nodes when appropriate, and if we get here, we don't want
* remaining unassigned instances to prefer a node that's already
* running another instance.
*/
pe_node_t *running = pe__current_node(rsc);
if (running == NULL) {
// Nothing to do
} else if (!pcmk__node_available(running, true, false)) {
pe_rsc_trace(rsc,
"Current node for %s (%s) can't run resources",
rsc->id, pe__node_name(running));
} else {
int nodes_with_best_score = 1;
for (GList *iter = nodes->next; iter; iter = iter->next) {
pe_node_t *allowed = (pe_node_t *) iter->data;
if (allowed->weight != chosen->weight) {
// The nodes are sorted by score, so no more are equal
break;
}
if (pe__same_node(allowed, running)) {
// Scores are equal, so prefer the current node
chosen = allowed;
}
nodes_with_best_score++;
}
if (nodes_with_best_score > 1) {
uint8_t log_level = LOG_INFO;
if (chosen->weight >= INFINITY) {
log_level = LOG_WARNING;
}
do_crm_log(log_level,
"Chose %s for %s from %d nodes with score %s",
pe__node_name(chosen), rsc->id,
nodes_with_best_score,
pcmk_readable_score(chosen->weight));
}
}
}
pe_rsc_trace(rsc, "Chose %s for %s from %d candidates",
pe__node_name(chosen), rsc->id, g_list_length(nodes));
}
pcmk__assign_resource(rsc, chosen, false, stop_if_fail);
g_list_free(nodes);
return rsc->allocated_to != NULL;
}
/*!
* \internal
* \brief Apply a "this with" colocation to a node's allowed node scores
*
* \param[in,out] colocation Colocation to apply
* \param[in,out] rsc Resource being assigned
*/
static void
apply_this_with(pcmk__colocation_t *colocation, pe_resource_t *rsc)
{
GHashTable *archive = NULL;
pe_resource_t *other = colocation->primary;
// In certain cases, we will need to revert the node scores
if ((colocation->dependent_role >= pcmk_role_promoted)
|| ((colocation->score < 0) && (colocation->score > -INFINITY))) {
archive = pcmk__copy_node_table(rsc->allowed_nodes);
}
- if (pcmk_is_set(other->flags, pe_rsc_provisional)) {
+ if (pcmk_is_set(other->flags, pcmk_rsc_unassigned)) {
pe_rsc_trace(rsc,
"%s: Assigning colocation %s primary %s first"
"(score=%d role=%s)",
rsc->id, colocation->id, other->id,
colocation->score, role2text(colocation->dependent_role));
other->cmds->assign(other, NULL, true);
}
// Apply the colocation score to this resource's allowed node scores
rsc->cmds->apply_coloc_score(rsc, other, colocation, true);
if ((archive != NULL)
&& !pcmk__any_node_available(rsc->allowed_nodes)) {
pe_rsc_info(rsc,
"%s: Reverting scores from colocation with %s "
"because no nodes allowed",
rsc->id, other->id);
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = archive;
archive = NULL;
}
if (archive != NULL) {
g_hash_table_destroy(archive);
}
}
/*!
* \internal
* \brief Update a Pacemaker Remote node once its connection has been assigned
*
* \param[in] connection Connection resource that has been assigned
*/
static void
remote_connection_assigned(const pe_resource_t *connection)
{
pe_node_t *remote_node = pe_find_node(connection->cluster->nodes,
connection->id);
CRM_CHECK(remote_node != NULL, return);
if ((connection->allocated_to != NULL)
&& (connection->next_role != pcmk_role_stopped)) {
crm_trace("Pacemaker Remote node %s will be online",
remote_node->details->id);
remote_node->details->online = TRUE;
if (remote_node->details->unseen) {
// Avoid unnecessary fence, since we will attempt connection
remote_node->details->unclean = FALSE;
}
} else {
crm_trace("Pacemaker Remote node %s will be shut down "
"(%sassigned connection's next role is %s)",
remote_node->details->id,
((connection->allocated_to == NULL)? "un" : ""),
role2text(connection->next_role));
remote_node->details->shutdown = TRUE;
}
}
/*!
* \internal
* \brief Assign a primitive resource to a node
*
* \param[in,out] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
* \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a
* node, set next role to stopped and update
* existing actions
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
pe_node_t *
pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer,
bool stop_if_fail)
{
GList *this_with_colocations = NULL;
GList *with_this_colocations = NULL;
GList *iter = NULL;
pcmk__colocation_t *colocation = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
// Never assign a child without parent being assigned first
if ((rsc->parent != NULL)
&& !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "%s: Assigning parent %s first",
rsc->id, rsc->parent->id);
rsc->parent->cmds->assign(rsc->parent, prefer, stop_if_fail);
}
- if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
// Assignment has already been done
const char *node_name = "no node";
if (rsc->allocated_to != NULL) {
node_name = pe__node_name(rsc->allocated_to);
}
pe_rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id, node_name);
return rsc->allocated_to;
}
// Ensure we detect assignment loops
if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
return NULL;
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
pe__show_node_scores(true, rsc, "Pre-assignment", rsc->allowed_nodes,
rsc->cluster);
this_with_colocations = pcmk__this_with_colocations(rsc);
with_this_colocations = pcmk__with_this_colocations(rsc);
// Apply mandatory colocations first, to satisfy as many as possible
for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
colocation = iter->data;
if ((colocation->score <= -CRM_SCORE_INFINITY)
|| (colocation->score >= CRM_SCORE_INFINITY)) {
apply_this_with(colocation, rsc);
}
}
for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
colocation = iter->data;
if ((colocation->score <= -CRM_SCORE_INFINITY)
|| (colocation->score >= CRM_SCORE_INFINITY)) {
pcmk__add_dependent_scores(colocation, rsc);
}
}
pe__show_node_scores(true, rsc, "Mandatory-colocations",
rsc->allowed_nodes, rsc->cluster);
// Then apply optional colocations
for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
colocation = iter->data;
if ((colocation->score > -CRM_SCORE_INFINITY)
&& (colocation->score < CRM_SCORE_INFINITY)) {
apply_this_with(colocation, rsc);
}
}
for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
colocation = iter->data;
if ((colocation->score > -CRM_SCORE_INFINITY)
&& (colocation->score < CRM_SCORE_INFINITY)) {
pcmk__add_dependent_scores(colocation, rsc);
}
}
g_list_free(this_with_colocations);
g_list_free(with_this_colocations);
if (rsc->next_role == pcmk_role_stopped) {
pe_rsc_trace(rsc,
"Banning %s from all nodes because it will be stopped",
rsc->id);
resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE,
rsc->cluster);
} else if ((rsc->next_role > rsc->role)
&& !pcmk_is_set(rsc->cluster->flags, pcmk_sched_quorate)
&& (rsc->cluster->no_quorum_policy == pcmk_no_quorum_freeze)) {
crm_notice("Resource %s cannot be elevated from %s to %s due to "
"no-quorum-policy=freeze",
rsc->id, role2text(rsc->role), role2text(rsc->next_role));
pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze");
}
pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags,
pcmk_sched_output_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
// Unmanage resource if fencing is enabled but no device is configured
if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)
&& !pcmk_is_set(rsc->cluster->flags, pcmk_sched_have_fencing)) {
pe__clear_resource_flags(rsc, pcmk_rsc_managed);
}
if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
// Unmanaged resources stay on their current node
const char *reason = NULL;
pe_node_t *assign_to = NULL;
pe__set_next_role(rsc, rsc->role, "unmanaged");
assign_to = pe__current_node(rsc);
if (assign_to == NULL) {
reason = "inactive";
} else if (rsc->role == pcmk_role_promoted) {
reason = "promoted";
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
reason = "failed";
} else {
reason = "active";
}
pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
(assign_to? assign_to->details->uname : "no node"), reason);
pcmk__assign_resource(rsc, assign_to, true, stop_if_fail);
} else if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_stop_all)) {
// Must stop at some point, but be consistent with stop_if_fail
if (stop_if_fail) {
pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources",
rsc->id);
}
pcmk__assign_resource(rsc, NULL, true, stop_if_fail);
} else if (!assign_best_node(rsc, prefer, stop_if_fail)) {
// Assignment failed
if (!pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
} else if ((rsc->running_on != NULL) && stop_if_fail) {
pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id);
}
}
pe__clear_resource_flags(rsc, pe_rsc_allocating);
if (rsc->is_remote_node) {
remote_connection_assigned(rsc);
}
return rsc->allocated_to;
}
/*!
* \internal
* \brief Schedule actions to bring resource down and back to current role
*
* \param[in,out] rsc Resource to restart
* \param[in,out] current Node that resource should be brought down on
* \param[in] need_stop Whether the resource must be stopped
* \param[in] need_promote Whether the resource must be promoted
*
* \return Role that resource would have after scheduled actions are taken
*/
static void
schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
bool need_stop, bool need_promote)
{
enum rsc_role_e role = rsc->role;
enum rsc_role_e next_role;
rsc_transition_fn fn = NULL;
pe__set_resource_flags(rsc, pe_rsc_restarting);
// Bring resource down to a stop on its current node
while (role != pcmk_role_stopped) {
next_role = rsc_state_matrix[role][pcmk_role_stopped];
pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
(need_stop? "required" : "optional"), rsc->id,
role2text(role), role2text(next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, current, !need_stop);
role = next_role;
}
// Bring resource up to its next role on its next node
while ((rsc->role <= rsc->next_role) && (role != rsc->role)
&& !pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
bool required = need_stop;
next_role = rsc_state_matrix[role][rsc->role];
if ((next_role == pcmk_role_promoted) && need_promote) {
required = true;
}
pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
(required? "required" : "optional"), rsc->id,
role2text(role), role2text(next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, rsc->allocated_to, !required);
role = next_role;
}
pe__clear_resource_flags(rsc, pe_rsc_restarting);
}
/*!
* \internal
* \brief If a resource's next role is not explicitly specified, set a default
*
* \param[in,out] rsc Resource to set next role for
*
* \return "explicit" if next role was explicitly set, otherwise "implicit"
*/
static const char *
set_default_next_role(pe_resource_t *rsc)
{
if (rsc->next_role != pcmk_role_unknown) {
return "explicit";
}
if (rsc->allocated_to == NULL) {
pe__set_next_role(rsc, pcmk_role_stopped, "assignment");
} else {
pe__set_next_role(rsc, pcmk_role_started, "assignment");
}
return "implicit";
}
/*!
* \internal
* \brief Create an action to represent an already pending start
*
* \param[in,out] rsc Resource to create start action for
*/
static void
create_pending_start(pe_resource_t *rsc)
{
pe_action_t *start = NULL;
pe_rsc_trace(rsc,
"Creating action for %s to represent already pending start",
rsc->id);
start = start_action(rsc, rsc->allocated_to, TRUE);
pe__set_action_flags(start, pe_action_print_always);
}
/*!
* \internal
* \brief Schedule actions needed to take a resource to its next role
*
* \param[in,out] rsc Resource to schedule actions for
*/
static void
schedule_role_transition_actions(pe_resource_t *rsc)
{
enum rsc_role_e role = rsc->role;
while (role != rsc->next_role) {
enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role];
rsc_transition_fn fn = NULL;
pe_rsc_trace(rsc,
"Creating action to take %s from %s to %s (ending at %s)",
rsc->id, role2text(role), role2text(next_role),
role2text(rsc->next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, rsc->allocated_to, false);
role = next_role;
}
}
/*!
* \internal
* \brief Create all actions needed for a given primitive resource
*
* \param[in,out] rsc Primitive resource to create actions for
*/
void
pcmk__primitive_create_actions(pe_resource_t *rsc)
{
bool need_stop = false;
bool need_promote = false;
bool is_moving = false;
bool allow_migrate = false;
bool multiply_active = false;
pe_node_t *current = NULL;
unsigned int num_all_active = 0;
unsigned int num_clean_active = 0;
const char *next_role_source = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
next_role_source = set_default_next_role(rsc);
pe_rsc_trace(rsc,
"Creating all actions for %s transition from %s to %s "
"(%s) on %s",
rsc->id, role2text(rsc->role), role2text(rsc->next_role),
next_role_source, pe__node_name(rsc->allocated_to));
current = rsc->fns->active_node(rsc, &num_all_active, &num_clean_active);
g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration,
rsc);
if ((current != NULL) && (rsc->allocated_to != NULL)
&& !pe__same_node(current, rsc->allocated_to)
&& (rsc->next_role >= pcmk_role_started)) {
pe_rsc_trace(rsc, "Moving %s from %s to %s",
rsc->id, pe__node_name(current),
pe__node_name(rsc->allocated_to));
is_moving = true;
allow_migrate = pcmk__rsc_can_migrate(rsc, current);
// This is needed even if migrating (though I'm not sure why ...)
need_stop = true;
}
// Check whether resource is partially migrated and/or multiply active
if ((rsc->partial_migration_source != NULL)
&& (rsc->partial_migration_target != NULL)
&& allow_migrate && (num_all_active == 2)
&& pe__same_node(current, rsc->partial_migration_source)
&& pe__same_node(rsc->allocated_to, rsc->partial_migration_target)) {
/* A partial migration is in progress, and the migration target remains
* the same as when the migration began.
*/
pe_rsc_trace(rsc, "Partial migration of %s from %s to %s will continue",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
} else if ((rsc->partial_migration_source != NULL)
|| (rsc->partial_migration_target != NULL)) {
// A partial migration is in progress but can't be continued
if (num_all_active > 2) {
// The resource is migrating *and* multiply active!
crm_notice("Forcing recovery of %s because it is migrating "
"from %s to %s and possibly active elsewhere",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
} else {
// The migration source or target isn't available
crm_notice("Forcing recovery of %s because it can no longer "
"migrate from %s to %s",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
}
need_stop = true;
rsc->partial_migration_source = rsc->partial_migration_target = NULL;
allow_migrate = false;
} else if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) {
multiply_active = (num_all_active > 1);
} else {
/* If a resource has "requires" set to nothing or quorum, don't consider
* it active on unclean nodes (similar to how all resources behave when
* stonith-enabled is false). We can start such resources elsewhere
* before fencing completes, and if we considered the resource active on
* the failed node, we would attempt recovery for being active on
* multiple nodes.
*/
multiply_active = (num_clean_active > 1);
}
if (multiply_active) {
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
// Resource was (possibly) incorrectly multiply active
pe_proc_err("%s resource %s might be active on %u nodes (%s)",
pcmk__s(class, "Untyped"), rsc->id, num_all_active,
recovery2text(rsc->recovery_type));
crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ"
"#Resource_is_Too_Active for more information");
switch (rsc->recovery_type) {
case pcmk_multiply_active_restart:
need_stop = true;
break;
case pcmk_multiply_active_unexpected:
need_stop = true; // stop_resource() will skip expected node
pe__set_resource_flags(rsc, pe_rsc_stop_unexpected);
break;
default:
break;
}
} else {
pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
}
if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
create_pending_start(rsc);
}
if (is_moving) {
// Remaining tests are only for resources staying where they are
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
if (pcmk_is_set(rsc->flags, pe_rsc_stop)) {
need_stop = true;
pe_rsc_trace(rsc, "Recovering %s", rsc->id);
} else {
pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
if (rsc->next_role == pcmk_role_promoted) {
need_promote = true;
}
}
} else if (pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
need_stop = true;
} else if ((rsc->role > pcmk_role_started) && (current != NULL)
&& (rsc->allocated_to != NULL)) {
pe_action_t *start = NULL;
pe_rsc_trace(rsc, "Creating start action for promoted resource %s",
rsc->id);
start = start_action(rsc, rsc->allocated_to, TRUE);
if (!pcmk_is_set(start->flags, pe_action_optional)) {
// Recovery of a promoted resource
pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
need_stop = true;
}
}
// Create any actions needed to bring resource down and back up to same role
schedule_restart_actions(rsc, current, need_stop, need_promote);
// Create any actions needed to take resource from this role to the next
schedule_role_transition_actions(rsc);
pcmk__create_recurring_actions(rsc);
if (allow_migrate) {
pcmk__create_migration_actions(rsc, current);
}
}
/*!
* \internal
* \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes
*
* \param[in] rsc Resource to check
*/
static void
rsc_avoids_remote_nodes(const pe_resource_t *rsc)
{
GHashTableIter iter;
pe_node_t *node = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (node->details->remote_rsc != NULL) {
node->weight = -INFINITY;
}
}
}
/*!
* \internal
* \brief Return allowed nodes as (possibly sorted) list
*
* Convert a resource's hash table of allowed nodes to a list. If printing to
* stdout, sort the list, to keep action ID numbers consistent for regression
* test output (while avoiding the performance hit on a live cluster).
*
* \param[in] rsc Resource to check for allowed nodes
*
* \return List of resource's allowed nodes
* \note Callers should take care not to rely on the list being sorted.
*/
static GList *
allowed_nodes_as_list(const pe_resource_t *rsc)
{
GList *allowed_nodes = NULL;
if (rsc->allowed_nodes) {
allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes);
}
if (!pcmk__is_daemon) {
allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
}
return allowed_nodes;
}
/*!
* \internal
* \brief Create implicit constraints needed for a primitive resource
*
* \param[in,out] rsc Primitive resource to create implicit constraints for
*/
void
pcmk__primitive_internal_constraints(pe_resource_t *rsc)
{
GList *allowed_nodes = NULL;
bool check_unfencing = false;
bool check_utilization = false;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
pe_rsc_trace(rsc,
"Skipping implicit constraints for unmanaged resource %s",
rsc->id);
return;
}
// Whether resource requires unfencing
check_unfencing = !pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)
&& pcmk_is_set(rsc->cluster->flags,
pcmk_sched_enable_unfencing)
&& pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing);
// Whether a non-default placement strategy is used
check_utilization = (g_hash_table_size(rsc->utilization) > 0)
&& !pcmk__str_eq(rsc->cluster->placement_strategy,
"default", pcmk__str_casei);
// Order stops before starts (i.e. restart)
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL,
rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL,
pe_order_optional|pe_order_implies_then|pe_order_restart,
rsc->cluster);
// Promotable ordering: demote before stop, start before promote
if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
pcmk_rsc_promotable)
|| (rsc->role > pcmk_role_unpromoted)) {
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_DEMOTE, 0),
NULL,
rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
NULL,
pe_order_promoted_implies_first, rsc->cluster);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
NULL,
rsc, pcmk__op_key(rsc->id, PCMK_ACTION_PROMOTE, 0),
NULL,
pe_order_runnable_left, rsc->cluster);
}
// Don't clear resource history if probing on same node
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_LRM_DELETE, 0),
NULL, rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0),
NULL, pe_order_same_node|pe_order_then_cancels_first,
rsc->cluster);
// Certain checks need allowed nodes
if (check_unfencing || check_utilization || (rsc->container != NULL)) {
allowed_nodes = allowed_nodes_as_list(rsc);
}
if (check_unfencing) {
g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc);
}
if (check_utilization) {
pcmk__create_utilization_constraints(rsc, allowed_nodes);
}
if (rsc->container != NULL) {
pe_resource_t *remote_rsc = NULL;
if (rsc->is_remote_node) {
// rsc is the implicit remote connection for a guest or bundle node
/* Guest resources are not allowed to run on Pacemaker Remote nodes,
* to avoid nesting remotes. However, bundles are allowed.
*/
if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
rsc_avoids_remote_nodes(rsc->container);
}
/* If someone cleans up a guest or bundle node's container, we will
* likely schedule a (re-)probe of the container and recovery of the
* connection. Order the connection stop after the container probe,
* so that if we detect the container running, we will trigger a new
* transition and avoid the unnecessary recovery.
*/
pcmk__order_resource_actions(rsc->container, PCMK_ACTION_MONITOR,
rsc, PCMK_ACTION_STOP,
pe_order_optional);
/* A user can specify that a resource must start on a Pacemaker Remote
* node by explicitly configuring it with the container=NODENAME
* meta-attribute. This is of questionable merit, since location
* constraints can accomplish the same thing. But we support it, so here
* we check whether a resource (that is not itself a remote connection)
* has container set to a remote node or guest node resource.
*/
} else if (rsc->container->is_remote_node) {
remote_rsc = rsc->container;
} else {
remote_rsc = pe__resource_contains_guest_node(rsc->cluster,
rsc->container);
}
if (remote_rsc != NULL) {
/* Force the resource on the Pacemaker Remote node instead of
* colocating the resource with the container resource.
*/
for (GList *item = allowed_nodes; item; item = item->next) {
pe_node_t *node = item->data;
if (node->details->remote_rsc != remote_rsc) {
node->weight = -INFINITY;
}
}
} else {
/* This resource is either a filler for a container that does NOT
* represent a Pacemaker Remote node, or a Pacemaker Remote
* connection resource for a guest node or bundle.
*/
int score;
crm_trace("Order and colocate %s relative to its container %s",
rsc->id, rsc->container->id);
pcmk__new_ordering(rsc->container,
pcmk__op_key(rsc->container->id,
PCMK_ACTION_START, 0),
NULL, rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
NULL,
pe_order_implies_then|pe_order_runnable_left,
rsc->cluster);
pcmk__new_ordering(rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
NULL,
rsc->container,
pcmk__op_key(rsc->container->id,
PCMK_ACTION_STOP, 0),
NULL, pe_order_implies_first, rsc->cluster);
if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
score = 10000; /* Highly preferred but not essential */
} else {
score = INFINITY; /* Force them to run on the same host */
}
pcmk__new_colocation("#resource-with-container", NULL, score, rsc,
rsc->container, NULL, NULL,
pcmk__coloc_influence);
}
}
if (rsc->is_remote_node
|| pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)) {
/* Remote connections and fencing devices are not allowed to run on
* Pacemaker Remote nodes
*/
rsc_avoids_remote_nodes(rsc);
}
g_list_free(allowed_nodes);
}
/*!
* \internal
* \brief Apply a colocation's score to node scores or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node scores (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__primitive_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent)
{
enum pcmk__coloc_affects filter_results;
CRM_ASSERT((dependent != NULL) && (primary != NULL)
&& (colocation != NULL));
if (for_dependent) {
// Always process on behalf of primary resource
primary->cmds->apply_coloc_score(dependent, primary, colocation, false);
return;
}
filter_results = pcmk__colocation_affects(dependent, primary, colocation,
false);
pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
((colocation->score > 0)? "Colocating" : "Anti-colocating"),
dependent->id, primary->id, colocation->id, colocation->score,
filter_results);
switch (filter_results) {
case pcmk__coloc_affects_role:
pcmk__apply_coloc_to_priority(dependent, primary, colocation);
break;
case pcmk__coloc_affects_location:
pcmk__apply_coloc_to_scores(dependent, primary, colocation);
break;
default: // pcmk__coloc_affects_nothing
return;
}
}
/* Primitive implementation of
* resource_alloc_functions_t:with_this_colocations()
*/
void
pcmk__with_primitive_colocations(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList **list)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
&& (list != NULL));
if (rsc == orig_rsc) {
/* For the resource itself, add all of its own colocations and relevant
* colocations from its parent (if any).
*/
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
if (rsc->parent != NULL) {
rsc->parent->cmds->with_this_colocations(rsc->parent, orig_rsc, list);
}
} else {
// For an ancestor, add only explicitly configured constraints
for (GList *iter = rsc->rsc_cons_lhs; iter != NULL; iter = iter->next) {
pcmk__colocation_t *colocation = iter->data;
if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
pcmk__add_with_this(list, colocation, orig_rsc);
}
}
}
}
/* Primitive implementation of
* resource_alloc_functions_t:this_with_colocations()
*/
void
pcmk__primitive_with_colocations(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList **list)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
&& (list != NULL));
if (rsc == orig_rsc) {
/* For the resource itself, add all of its own colocations and relevant
* colocations from its parent (if any).
*/
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
if (rsc->parent != NULL) {
rsc->parent->cmds->this_with_colocations(rsc->parent, orig_rsc, list);
}
} else {
// For an ancestor, add only explicitly configured constraints
for (GList *iter = rsc->rsc_cons; iter != NULL; iter = iter->next) {
pcmk__colocation_t *colocation = iter->data;
if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
pcmk__add_this_with(list, colocation, orig_rsc);
}
}
}
}
/*!
* \internal
* \brief Return action flags for a given primitive resource action
*
* \param[in,out] action Action to get flags for
* \param[in] node If not NULL, limit effects to this node (ignored)
*
* \return Flags appropriate to \p action on \p node
*/
uint32_t
pcmk__primitive_action_flags(pe_action_t *action, const pe_node_t *node)
{
CRM_ASSERT(action != NULL);
return (uint32_t) action->flags;
}
/*!
* \internal
* \brief Check whether a node is a multiply active resource's expected node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p rsc is multiply active with multiple-active set to
* stop_unexpected, and \p node is the node where it will remain active
* \note This assumes that the resource's next role cannot be changed to stopped
* after this is called, which should be reasonable if status has already
* been unpacked and resources have been assigned to nodes.
*/
static bool
is_expected_node(const pe_resource_t *rsc, const pe_node_t *node)
{
return pcmk_all_flags_set(rsc->flags,
pe_rsc_stop_unexpected|pe_rsc_restarting)
&& (rsc->next_role > pcmk_role_stopped)
&& pe__same_node(rsc->allocated_to, node);
}
/*!
* \internal
* \brief Schedule actions needed to stop a resource wherever it is active
*
* \param[in,out] rsc Resource being stopped
* \param[in] node Node where resource is being stopped (ignored)
* \param[in] optional Whether actions should be optional
*/
static void
stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
pe_node_t *current = (pe_node_t *) iter->data;
pe_action_t *stop = NULL;
if (is_expected_node(rsc, current)) {
/* We are scheduling restart actions for a multiply active resource
* with multiple-active=stop_unexpected, and this is where it should
* not be stopped.
*/
pe_rsc_trace(rsc,
"Skipping stop of multiply active resource %s "
"on expected node %s",
rsc->id, pe__node_name(current));
continue;
}
if (rsc->partial_migration_target != NULL) {
// Continue migration if node originally was and remains target
if (pe__same_node(current, rsc->partial_migration_target)
&& pe__same_node(current, rsc->allocated_to)) {
pe_rsc_trace(rsc,
"Skipping stop of %s on %s "
"because partial migration there will continue",
rsc->id, pe__node_name(current));
continue;
} else {
pe_rsc_trace(rsc,
"Forcing stop of %s on %s "
"because migration target changed",
rsc->id, pe__node_name(current));
optional = false;
}
}
pe_rsc_trace(rsc, "Scheduling stop of %s on %s",
rsc->id, pe__node_name(current));
stop = stop_action(rsc, current, optional);
if (rsc->allocated_to == NULL) {
pe_action_set_reason(stop, "node availability", true);
} else if (pcmk_all_flags_set(rsc->flags, pe_rsc_restarting
|pe_rsc_stop_unexpected)) {
/* We are stopping a multiply active resource on a node that is
* not its expected node, and we are still scheduling restart
* actions, so the stop is for being multiply active.
*/
pe_action_set_reason(stop, "being multiply active", true);
}
if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
pe__clear_action_flags(stop, pe_action_runnable);
}
if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_remove_after_stop)) {
pcmk__schedule_cleanup(rsc, current, optional);
}
if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) {
pe_action_t *unfence = pe_fence_op(current, PCMK_ACTION_ON, true,
NULL, false, rsc->cluster);
order_actions(stop, unfence, pe_order_implies_first);
if (!pcmk__node_unfenced(current)) {
pe_proc_err("Stopping %s until %s can be unfenced",
rsc->id, pe__node_name(current));
}
}
}
}
/*!
* \internal
* \brief Schedule actions needed to start a resource on a node
*
* \param[in,out] rsc Resource being started
* \param[in,out] node Node where resource should be started
* \param[in] optional Whether actions should be optional
*/
static void
start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
pe_action_t *start = NULL;
CRM_ASSERT(node != NULL);
pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)",
(optional? "optional" : "required"), rsc->id,
pe__node_name(node), node->weight);
start = start_action(rsc, node, TRUE);
pcmk__order_vs_unfence(rsc, node, start, pe_order_implies_then);
if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) {
pe__clear_action_flags(start, pe_action_optional);
}
if (is_expected_node(rsc, node)) {
/* This could be a problem if the start becomes necessary for other
* reasons later.
*/
pe_rsc_trace(rsc,
"Start of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, pe__node_name(node));
pe__set_action_flags(start, pe_action_pseudo);
}
}
/*!
* \internal
* \brief Schedule actions needed to promote a resource on a node
*
* \param[in,out] rsc Resource being promoted
* \param[in] node Node where resource should be promoted
* \param[in] optional Whether actions should be optional
*/
static void
promote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
GList *iter = NULL;
GList *action_list = NULL;
bool runnable = true;
CRM_ASSERT(node != NULL);
// Any start must be runnable for promotion to be runnable
action_list = pe__resource_actions(rsc, node, PCMK_ACTION_START, true);
for (iter = action_list; iter != NULL; iter = iter->next) {
pe_action_t *start = (pe_action_t *) iter->data;
if (!pcmk_is_set(start->flags, pe_action_runnable)) {
runnable = false;
}
}
g_list_free(action_list);
if (runnable) {
pe_action_t *promote = promote_action(rsc, node, optional);
pe_rsc_trace(rsc, "Scheduling %s promotion of %s on %s",
(optional? "optional" : "required"), rsc->id,
pe__node_name(node));
if (is_expected_node(rsc, node)) {
/* This could be a problem if the promote becomes necessary for
* other reasons later.
*/
pe_rsc_trace(rsc,
"Promotion of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, pe__node_name(node));
pe__set_action_flags(promote, pe_action_pseudo);
}
} else {
pe_rsc_trace(rsc, "Not promoting %s on %s: start unrunnable",
rsc->id, pe__node_name(node));
action_list = pe__resource_actions(rsc, node, PCMK_ACTION_PROMOTE,
true);
for (iter = action_list; iter != NULL; iter = iter->next) {
pe_action_t *promote = (pe_action_t *) iter->data;
pe__clear_action_flags(promote, pe_action_runnable);
}
g_list_free(action_list);
}
}
/*!
* \internal
* \brief Schedule actions needed to demote a resource wherever it is active
*
* \param[in,out] rsc Resource being demoted
* \param[in] node Node where resource should be demoted (ignored)
* \param[in] optional Whether actions should be optional
*/
static void
demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
/* Since this will only be called for a primitive (possibly as an instance
* of a collective resource), the resource is multiply active if it is
* running on more than one node, so we want to demote on all of them as
* part of recovery, regardless of which one is the desired node.
*/
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
pe_node_t *current = (pe_node_t *) iter->data;
if (is_expected_node(rsc, current)) {
pe_rsc_trace(rsc,
"Skipping demote of multiply active resource %s "
"on expected node %s",
rsc->id, pe__node_name(current));
} else {
pe_rsc_trace(rsc, "Scheduling %s demotion of %s on %s",
(optional? "optional" : "required"), rsc->id,
pe__node_name(current));
demote_action(rsc, current, optional);
}
}
}
static void
assert_role_error(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
CRM_ASSERT(false);
}
/*!
* \internal
* \brief Schedule cleanup of a resource
*
* \param[in,out] rsc Resource to clean up
* \param[in] node Node to clean up on
* \param[in] optional Whether clean-up should be optional
*/
void
pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, bool optional)
{
/* If the cleanup is required, its orderings are optional, because they're
* relevant only if both actions are required. Conversely, if the cleanup is
* optional, the orderings make the then action required if the first action
* becomes required.
*/
uint32_t flag = optional? pe_order_implies_then : pe_order_optional;
CRM_CHECK((rsc != NULL) && (node != NULL), return);
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed",
rsc->id, pe__node_name(node));
return;
}
if (node->details->unclean || !node->details->online) {
pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable",
rsc->id, pe__node_name(node));
return;
}
crm_notice("Scheduling clean-up of %s on %s", rsc->id, pe__node_name(node));
delete_action(rsc, node, optional);
// stop -> clean-up -> start
pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP,
rsc, PCMK_ACTION_DELETE, flag);
pcmk__order_resource_actions(rsc, PCMK_ACTION_DELETE,
rsc, PCMK_ACTION_START, flag);
}
/*!
* \internal
* \brief Add primitive meta-attributes relevant to graph actions to XML
*
* \param[in] rsc Primitive resource whose meta-attributes should be added
* \param[in,out] xml Transition graph action attributes XML to add to
*/
void
pcmk__primitive_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml)
{
char *name = NULL;
char *value = NULL;
const pe_resource_t *parent = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
&& (xml != NULL));
/* Clone instance numbers get set internally as meta-attributes, and are
* needed in the transition graph (for example, to tell unique clone
* instances apart).
*/
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION);
if (value != NULL) {
name = crm_meta_name(XML_RSC_ATTR_INCARNATION);
crm_xml_add(xml, name, value);
free(name);
}
// Not sure if this one is really needed ...
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE);
if (value != NULL) {
name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE);
crm_xml_add(xml, name, value);
free(name);
}
/* The container meta-attribute can be set on the primitive itself or one of
* its parents (for example, a group inside a container resource), so check
* them all, and keep the highest one found.
*/
for (parent = rsc; parent != NULL; parent = parent->parent) {
if (parent->container != NULL) {
crm_xml_add(xml, CRM_META "_" XML_RSC_ATTR_CONTAINER,
parent->container->id);
}
}
/* Bundle replica children will get their external-ip set internally as a
* meta-attribute. The graph action needs it, but under a different naming
* convention than other meta-attributes.
*/
value = g_hash_table_lookup(rsc->meta, "external-ip");
if (value != NULL) {
crm_xml_add(xml, "pcmk_external_ip", value);
}
}
// Primitive implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__primitive_add_utilization(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
&& (orig_rsc != NULL) && (utilization != NULL));
- if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
return;
}
pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization",
orig_rsc->id, rsc->id);
pcmk__release_node_capacity(utilization, rsc);
}
/*!
* \internal
* \brief Get epoch time of node's shutdown attribute (or now if none)
*
* \param[in,out] node Node to check
*
* \return Epoch time corresponding to shutdown attribute if set or now if not
*/
static time_t
shutdown_time(pe_node_t *node)
{
const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
time_t result = 0;
if (shutdown != NULL) {
long long result_ll;
if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) {
result = (time_t) result_ll;
}
}
return (result == 0)? get_effective_time(node->details->data_set) : result;
}
/*!
* \internal
* \brief Ban a resource from a node if it's not locked to the node
*
* \param[in] data Node to check
* \param[in,out] user_data Resource to check
*/
static void
ban_if_not_locked(gpointer data, gpointer user_data)
{
const pe_node_t *node = (const pe_node_t *) data;
pe_resource_t *rsc = (pe_resource_t *) user_data;
if (strcmp(node->details->uname, rsc->lock_node->details->uname) != 0) {
resource_location(rsc, node, -CRM_SCORE_INFINITY,
XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster);
}
}
// Primitive implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__primitive_shutdown_lock(pe_resource_t *rsc)
{
const char *class = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
// Fence devices and remote connections can't be locked
if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches)
|| pe__resource_is_remote_conn(rsc)) {
return;
}
if (rsc->lock_node != NULL) {
// The lock was obtained from resource history
if (rsc->running_on != NULL) {
/* The resource was started elsewhere even though it is now
* considered locked. This shouldn't be possible, but as a
* failsafe, we don't want to disturb the resource now.
*/
pe_rsc_info(rsc,
"Cancelling shutdown lock because %s is already active",
rsc->id);
pe__clear_resource_history(rsc, rsc->lock_node, rsc->cluster);
rsc->lock_node = NULL;
rsc->lock_time = 0;
}
// Only a resource active on exactly one node can be locked
} else if (pcmk__list_of_1(rsc->running_on)) {
pe_node_t *node = rsc->running_on->data;
if (node->details->shutdown) {
if (node->details->unclean) {
pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
rsc->id, pe__node_name(node));
} else {
rsc->lock_node = node;
rsc->lock_time = shutdown_time(node);
}
}
}
if (rsc->lock_node == NULL) {
// No lock needed
return;
}
if (rsc->cluster->shutdown_lock > 0) {
time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock;
pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
rsc->id, pe__node_name(rsc->lock_node),
(long long) lock_expiration);
pe__update_recheck_time(++lock_expiration, rsc->cluster);
} else {
pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
rsc->id, pe__node_name(rsc->lock_node));
}
// If resource is locked to one node, ban it from all other nodes
g_list_foreach(rsc->cluster->nodes, ban_if_not_locked, rsc);
}
diff --git a/lib/pacemaker/pcmk_sched_promotable.c b/lib/pacemaker/pcmk_sched_promotable.c
index ad8946f8df..02c446a99e 100644
--- a/lib/pacemaker/pcmk_sched_promotable.c
+++ b/lib/pacemaker/pcmk_sched_promotable.c
@@ -1,1299 +1,1299 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Add implicit promotion ordering for a promotable instance
*
* \param[in,out] clone Clone resource
* \param[in,out] child Instance of \p clone being ordered
* \param[in,out] last Previous instance ordered (NULL if \p child is first)
*/
static void
order_instance_promotion(pe_resource_t *clone, pe_resource_t *child,
pe_resource_t *last)
{
// "Promote clone" -> promote instance -> "clone promoted"
pcmk__order_resource_actions(clone, PCMK_ACTION_PROMOTE,
child, PCMK_ACTION_PROMOTE,
pe_order_optional);
pcmk__order_resource_actions(child, PCMK_ACTION_PROMOTE,
clone, PCMK_ACTION_PROMOTED,
pe_order_optional);
// If clone is ordered, order this instance relative to last
if ((last != NULL) && pe__clone_is_ordered(clone)) {
pcmk__order_resource_actions(last, PCMK_ACTION_PROMOTE,
child, PCMK_ACTION_PROMOTE,
pe_order_optional);
}
}
/*!
* \internal
* \brief Add implicit demotion ordering for a promotable instance
*
* \param[in,out] clone Clone resource
* \param[in,out] child Instance of \p clone being ordered
* \param[in] last Previous instance ordered (NULL if \p child is first)
*/
static void
order_instance_demotion(pe_resource_t *clone, pe_resource_t *child,
pe_resource_t *last)
{
// "Demote clone" -> demote instance -> "clone demoted"
pcmk__order_resource_actions(clone, PCMK_ACTION_DEMOTE, child,
PCMK_ACTION_DEMOTE,
pe_order_implies_first_printed);
pcmk__order_resource_actions(child, PCMK_ACTION_DEMOTE,
clone, PCMK_ACTION_DEMOTED,
pe_order_implies_then_printed);
// If clone is ordered, order this instance relative to last
if ((last != NULL) && pe__clone_is_ordered(clone)) {
pcmk__order_resource_actions(child, PCMK_ACTION_DEMOTE, last,
PCMK_ACTION_DEMOTE, pe_order_optional);
}
}
/*!
* \internal
* \brief Check whether an instance will be promoted or demoted
*
* \param[in] rsc Instance to check
* \param[out] demoting If \p rsc will be demoted, this will be set to true
* \param[out] promoting If \p rsc will be promoted, this will be set to true
*/
static void
check_for_role_change(const pe_resource_t *rsc, bool *demoting, bool *promoting)
{
const GList *iter = NULL;
// If this is a cloned group, check group members recursively
if (rsc->children != NULL) {
for (iter = rsc->children; iter != NULL; iter = iter->next) {
check_for_role_change((const pe_resource_t *) iter->data,
demoting, promoting);
}
return;
}
for (iter = rsc->actions; iter != NULL; iter = iter->next) {
const pe_action_t *action = (const pe_action_t *) iter->data;
if (*promoting && *demoting) {
return;
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
continue;
} else if (pcmk__str_eq(PCMK_ACTION_DEMOTE, action->task,
pcmk__str_none)) {
*demoting = true;
} else if (pcmk__str_eq(PCMK_ACTION_PROMOTE, action->task,
pcmk__str_none)) {
*promoting = true;
}
}
}
/*!
* \internal
* \brief Add promoted-role location constraint scores to an instance's priority
*
* Adjust a promotable clone instance's promotion priority by the scores of any
* location constraints in a list that are both limited to the promoted role and
* for the node where the instance will be placed.
*
* \param[in,out] child Promotable clone instance
* \param[in] location_constraints List of location constraints to apply
* \param[in] chosen Node where \p child will be placed
*/
static void
apply_promoted_locations(pe_resource_t *child,
const GList *location_constraints,
const pe_node_t *chosen)
{
for (const GList *iter = location_constraints; iter; iter = iter->next) {
const pe__location_t *location = iter->data;
const pe_node_t *constraint_node = NULL;
if (location->role_filter == pcmk_role_promoted) {
constraint_node = pe_find_node_id(location->node_list_rh,
chosen->details->id);
}
if (constraint_node != NULL) {
int new_priority = pcmk__add_scores(child->priority,
constraint_node->weight);
pe_rsc_trace(child,
"Applying location %s to %s promotion priority on %s: "
"%s + %s = %s",
location->id, child->id,
pe__node_name(constraint_node),
pcmk_readable_score(child->priority),
pcmk_readable_score(constraint_node->weight),
pcmk_readable_score(new_priority));
child->priority = new_priority;
}
}
}
/*!
* \internal
* \brief Get the node that an instance will be promoted on
*
* \param[in] rsc Promotable clone instance to check
*
* \return Node that \p rsc will be promoted on, or NULL if none
*/
static pe_node_t *
node_to_be_promoted_on(const pe_resource_t *rsc)
{
pe_node_t *node = NULL;
pe_node_t *local_node = NULL;
const pe_resource_t *parent = NULL;
// If this is a cloned group, bail if any group member can't be promoted
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
if (node_to_be_promoted_on(child) == NULL) {
pe_rsc_trace(rsc,
"%s can't be promoted because member %s can't",
rsc->id, child->id);
return NULL;
}
}
node = rsc->fns->location(rsc, NULL, FALSE);
if (node == NULL) {
pe_rsc_trace(rsc, "%s can't be promoted because it won't be active",
rsc->id);
return NULL;
} else if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
if (rsc->fns->state(rsc, TRUE) == pcmk_role_promoted) {
crm_notice("Unmanaged instance %s will be left promoted on %s",
rsc->id, pe__node_name(node));
} else {
pe_rsc_trace(rsc, "%s can't be promoted because it is unmanaged",
rsc->id);
return NULL;
}
} else if (rsc->priority < 0) {
pe_rsc_trace(rsc,
"%s can't be promoted because its promotion priority %d "
"is negative",
rsc->id, rsc->priority);
return NULL;
} else if (!pcmk__node_available(node, false, true)) {
pe_rsc_trace(rsc, "%s can't be promoted because %s can't run resources",
rsc->id, pe__node_name(node));
return NULL;
}
parent = pe__const_top_resource(rsc, false);
local_node = g_hash_table_lookup(parent->allowed_nodes, node->details->id);
if (local_node == NULL) {
/* It should not be possible for the scheduler to have assigned the
* instance to a node where its parent is not allowed, but it's good to
* have a fail-safe.
*/
if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
crm_warn("%s can't be promoted because %s is not allowed on %s "
"(scheduler bug?)",
rsc->id, parent->id, pe__node_name(node));
} // else the instance is unmanaged and already promoted
return NULL;
} else if ((local_node->count >= pe__clone_promoted_node_max(parent))
&& pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
pe_rsc_trace(rsc,
"%s can't be promoted because %s has "
"maximum promoted instances already",
rsc->id, pe__node_name(node));
return NULL;
}
return local_node;
}
/*!
* \internal
* \brief Compare two promotable clone instances by promotion priority
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a has higher promotion priority,
* a positive number if \p b has higher promotion priority,
* or 0 if promotion priorities are equal
*/
static gint
cmp_promotable_instance(gconstpointer a, gconstpointer b)
{
const pe_resource_t *rsc1 = (const pe_resource_t *) a;
const pe_resource_t *rsc2 = (const pe_resource_t *) b;
enum rsc_role_e role1 = pcmk_role_unknown;
enum rsc_role_e role2 = pcmk_role_unknown;
CRM_ASSERT((rsc1 != NULL) && (rsc2 != NULL));
// Check sort index set by pcmk__set_instance_roles()
if (rsc1->sort_index > rsc2->sort_index) {
pe_rsc_trace(rsc1,
"%s has higher promotion priority than %s "
"(sort index %d > %d)",
rsc1->id, rsc2->id, rsc1->sort_index, rsc2->sort_index);
return -1;
} else if (rsc1->sort_index < rsc2->sort_index) {
pe_rsc_trace(rsc1,
"%s has lower promotion priority than %s "
"(sort index %d < %d)",
rsc1->id, rsc2->id, rsc1->sort_index, rsc2->sort_index);
return 1;
}
// If those are the same, prefer instance whose current role is higher
role1 = rsc1->fns->state(rsc1, TRUE);
role2 = rsc2->fns->state(rsc2, TRUE);
if (role1 > role2) {
pe_rsc_trace(rsc1,
"%s has higher promotion priority than %s "
"(higher current role)",
rsc1->id, rsc2->id);
return -1;
} else if (role1 < role2) {
pe_rsc_trace(rsc1,
"%s has lower promotion priority than %s "
"(lower current role)",
rsc1->id, rsc2->id);
return 1;
}
// Finally, do normal clone instance sorting
return pcmk__cmp_instance(a, b);
}
/*!
* \internal
* \brief Add a promotable clone instance's sort index to its node's score
*
* Add a promotable clone instance's sort index (which sums its promotion
* preferences and scores of relevant location constraints for the promoted
* role) to the node score of the instance's assigned node.
*
* \param[in] data Promotable clone instance
* \param[in,out] user_data Clone parent of \p data
*/
static void
add_sort_index_to_node_score(gpointer data, gpointer user_data)
{
const pe_resource_t *child = (const pe_resource_t *) data;
pe_resource_t *clone = (pe_resource_t *) user_data;
pe_node_t *node = NULL;
const pe_node_t *chosen = NULL;
if (child->sort_index < 0) {
pe_rsc_trace(clone, "Not adding sort index of %s: negative", child->id);
return;
}
chosen = child->fns->location(child, NULL, FALSE);
if (chosen == NULL) {
pe_rsc_trace(clone, "Not adding sort index of %s: inactive", child->id);
return;
}
node = g_hash_table_lookup(clone->allowed_nodes, chosen->details->id);
CRM_ASSERT(node != NULL);
node->weight = pcmk__add_scores(child->sort_index, node->weight);
pe_rsc_trace(clone,
"Added cumulative priority of %s (%s) to score on %s (now %s)",
child->id, pcmk_readable_score(child->sort_index),
pe__node_name(node), pcmk_readable_score(node->weight));
}
/*!
* \internal
* \brief Apply colocation to dependent's node scores if for promoted role
*
* \param[in,out] data Colocation constraint to apply
* \param[in,out] user_data Promotable clone that is constraint's dependent
*/
static void
apply_coloc_to_dependent(gpointer data, gpointer user_data)
{
pcmk__colocation_t *colocation = data;
pe_resource_t *clone = user_data;
pe_resource_t *primary = colocation->primary;
uint32_t flags = pcmk__coloc_select_default;
float factor = colocation->score / (float) INFINITY;
if (colocation->dependent_role != pcmk_role_promoted) {
return;
}
if (colocation->score < INFINITY) {
flags = pcmk__coloc_select_active;
}
pe_rsc_trace(clone, "Applying colocation %s (promoted %s with %s) @%s",
colocation->id, colocation->dependent->id,
colocation->primary->id,
pcmk_readable_score(colocation->score));
primary->cmds->add_colocated_node_scores(primary, clone, clone->id,
&clone->allowed_nodes, colocation,
factor, flags);
}
/*!
* \internal
* \brief Apply colocation to primary's node scores if for promoted role
*
* \param[in,out] data Colocation constraint to apply
* \param[in,out] user_data Promotable clone that is constraint's primary
*/
static void
apply_coloc_to_primary(gpointer data, gpointer user_data)
{
pcmk__colocation_t *colocation = data;
pe_resource_t *clone = user_data;
pe_resource_t *dependent = colocation->dependent;
const float factor = colocation->score / (float) INFINITY;
const uint32_t flags = pcmk__coloc_select_active
|pcmk__coloc_select_nonnegative;
if ((colocation->primary_role != pcmk_role_promoted)
|| !pcmk__colocation_has_influence(colocation, NULL)) {
return;
}
pe_rsc_trace(clone, "Applying colocation %s (%s with promoted %s) @%s",
colocation->id, colocation->dependent->id,
colocation->primary->id,
pcmk_readable_score(colocation->score));
dependent->cmds->add_colocated_node_scores(dependent, clone, clone->id,
&clone->allowed_nodes,
colocation, factor, flags);
}
/*!
* \internal
* \brief Set clone instance's sort index to its node's score
*
* \param[in,out] data Promotable clone instance
* \param[in] user_data Parent clone of \p data
*/
static void
set_sort_index_to_node_score(gpointer data, gpointer user_data)
{
pe_resource_t *child = (pe_resource_t *) data;
const pe_resource_t *clone = (const pe_resource_t *) user_data;
pe_node_t *chosen = child->fns->location(child, NULL, FALSE);
if (!pcmk_is_set(child->flags, pcmk_rsc_managed)
&& (child->next_role == pcmk_role_promoted)) {
child->sort_index = INFINITY;
pe_rsc_trace(clone,
"Final sort index for %s is INFINITY (unmanaged promoted)",
child->id);
} else if ((chosen == NULL) || (child->sort_index < 0)) {
pe_rsc_trace(clone,
"Final sort index for %s is %d (ignoring node score)",
child->id, child->sort_index);
} else {
const pe_node_t *node = g_hash_table_lookup(clone->allowed_nodes,
chosen->details->id);
CRM_ASSERT(node != NULL);
child->sort_index = node->weight;
pe_rsc_trace(clone,
"Adding scores for %s: final sort index for %s is %d",
clone->id, child->id, child->sort_index);
}
}
/*!
* \internal
* \brief Sort a promotable clone's instances by descending promotion priority
*
* \param[in,out] clone Promotable clone to sort
*/
static void
sort_promotable_instances(pe_resource_t *clone)
{
GList *colocations = NULL;
if (pe__set_clone_flag(clone, pe__clone_promotion_constrained)
== pcmk_rc_already) {
return;
}
pe__set_resource_flags(clone, pe_rsc_merging);
for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
pe_rsc_trace(clone,
"Adding scores for %s: initial sort index for %s is %d",
clone->id, child->id, child->sort_index);
}
pe__show_node_scores(true, clone, "Before", clone->allowed_nodes,
clone->cluster);
g_list_foreach(clone->children, add_sort_index_to_node_score, clone);
colocations = pcmk__this_with_colocations(clone);
g_list_foreach(colocations, apply_coloc_to_dependent, clone);
g_list_free(colocations);
colocations = pcmk__with_this_colocations(clone);
g_list_foreach(colocations, apply_coloc_to_primary, clone);
g_list_free(colocations);
// Ban resource from all nodes if it needs a ticket but doesn't have it
pcmk__require_promotion_tickets(clone);
pe__show_node_scores(true, clone, "After", clone->allowed_nodes,
clone->cluster);
// Reset sort indexes to final node scores
g_list_foreach(clone->children, set_sort_index_to_node_score, clone);
// Finally, sort instances in descending order of promotion priority
clone->children = g_list_sort(clone->children, cmp_promotable_instance);
pe__clear_resource_flags(clone, pe_rsc_merging);
}
/*!
* \internal
* \brief Find the active instance (if any) of an anonymous clone on a node
*
* \param[in] clone Anonymous clone to check
* \param[in] id Instance ID (without instance number) to check
* \param[in] node Node to check
*
* \return
*/
static pe_resource_t *
find_active_anon_instance(const pe_resource_t *clone, const char *id,
const pe_node_t *node)
{
for (GList *iter = clone->children; iter; iter = iter->next) {
pe_resource_t *child = iter->data;
pe_resource_t *active = NULL;
// Use ->find_rsc() in case this is a cloned group
active = clone->fns->find_rsc(child, id, node,
pcmk_rsc_match_clone_only
|pcmk_rsc_match_current_node);
if (active != NULL) {
return active;
}
}
return NULL;
}
/*
* \brief Check whether an anonymous clone instance is known on a node
*
* \param[in] clone Anonymous clone to check
* \param[in] id Instance ID (without instance number) to check
* \param[in] node Node to check
*
* \return true if \p id instance of \p clone is known on \p node,
* otherwise false
*/
static bool
anonymous_known_on(const pe_resource_t *clone, const char *id,
const pe_node_t *node)
{
for (GList *iter = clone->children; iter; iter = iter->next) {
pe_resource_t *child = iter->data;
/* Use ->find_rsc() because this might be a cloned group, and knowing
* that other members of the group are known here implies nothing.
*/
child = clone->fns->find_rsc(child, id, NULL,
pcmk_rsc_match_clone_only);
CRM_LOG_ASSERT(child != NULL);
if (child != NULL) {
if (g_hash_table_lookup(child->known_on, node->details->id)) {
return true;
}
}
}
return false;
}
/*!
* \internal
* \brief Check whether a node is allowed to run a resource
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p node is allowed to run \p rsc, otherwise false
*/
static bool
is_allowed(const pe_resource_t *rsc, const pe_node_t *node)
{
pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes,
node->details->id);
return (allowed != NULL) && (allowed->weight >= 0);
}
/*!
* \brief Check whether a clone instance's promotion score should be considered
*
* \param[in] rsc Promotable clone instance to check
* \param[in] node Node where score would be applied
*
* \return true if \p rsc's promotion score should be considered on \p node,
* otherwise false
*/
static bool
promotion_score_applies(const pe_resource_t *rsc, const pe_node_t *node)
{
char *id = clone_strip(rsc->id);
const pe_resource_t *parent = pe__const_top_resource(rsc, false);
pe_resource_t *active = NULL;
const char *reason = "allowed";
// Some checks apply only to anonymous clone instances
if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
// If instance is active on the node, its score definitely applies
active = find_active_anon_instance(parent, id, node);
if (active == rsc) {
reason = "active";
goto check_allowed;
}
/* If *no* instance is active on this node, this instance's score will
* count if it has been probed on this node.
*/
if ((active == NULL) && anonymous_known_on(parent, id, node)) {
reason = "probed";
goto check_allowed;
}
}
/* If this clone's status is unknown on *all* nodes (e.g. cluster startup),
* take all instances' scores into account, to make sure we use any
* permanent promotion scores.
*/
if ((rsc->running_on == NULL) && (g_hash_table_size(rsc->known_on) == 0)) {
reason = "none probed";
goto check_allowed;
}
/* Otherwise, we've probed and/or started the resource *somewhere*, so
* consider promotion scores on nodes where we know the status.
*/
if ((g_hash_table_lookup(rsc->known_on, node->details->id) != NULL)
|| (pe_find_node_id(rsc->running_on, node->details->id) != NULL)) {
reason = "known";
} else {
pe_rsc_trace(rsc,
"Ignoring %s promotion score (for %s) on %s: not probed",
rsc->id, id, pe__node_name(node));
free(id);
return false;
}
check_allowed:
if (is_allowed(rsc, node)) {
pe_rsc_trace(rsc, "Counting %s promotion score (for %s) on %s: %s",
rsc->id, id, pe__node_name(node), reason);
free(id);
return true;
}
pe_rsc_trace(rsc, "Ignoring %s promotion score (for %s) on %s: not allowed",
rsc->id, id, pe__node_name(node));
free(id);
return false;
}
/*!
* \internal
* \brief Get the value of a promotion score node attribute
*
* \param[in] rsc Promotable clone instance to get promotion score for
* \param[in] node Node to get promotion score for
* \param[in] name Resource name to use in promotion score attribute name
*
* \return Value of promotion score node attribute for \p rsc on \p node
*/
static const char *
promotion_attr_value(const pe_resource_t *rsc, const pe_node_t *node,
const char *name)
{
char *attr_name = NULL;
const char *attr_value = NULL;
enum pe__rsc_node node_type = pe__rsc_node_assigned;
- if (pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
+ if (pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
// Not assigned yet
node_type = pe__rsc_node_current;
}
attr_name = pcmk_promotion_score_name(name);
attr_value = pe__node_attribute_calculated(node, attr_name, rsc, node_type,
false);
free(attr_name);
return attr_value;
}
/*!
* \internal
* \brief Get the promotion score for a clone instance on a node
*
* \param[in] rsc Promotable clone instance to get score for
* \param[in] node Node to get score for
* \param[out] is_default If non-NULL, will be set true if no score available
*
* \return Promotion score for \p rsc on \p node (or 0 if none)
*/
static int
promotion_score(const pe_resource_t *rsc, const pe_node_t *node,
bool *is_default)
{
char *name = NULL;
const char *attr_value = NULL;
if (is_default != NULL) {
*is_default = true;
}
CRM_CHECK((rsc != NULL) && (node != NULL), return 0);
/* If this is an instance of a cloned group, the promotion score is the sum
* of all members' promotion scores.
*/
if (rsc->children != NULL) {
int score = 0;
for (const GList *iter = rsc->children;
iter != NULL; iter = iter->next) {
const pe_resource_t *child = (const pe_resource_t *) iter->data;
bool child_default = false;
int child_score = promotion_score(child, node, &child_default);
if (!child_default && (is_default != NULL)) {
*is_default = false;
}
score += child_score;
}
return score;
}
if (!promotion_score_applies(rsc, node)) {
return 0;
}
/* For the promotion score attribute name, use the name the resource is
* known as in resource history, since that's what crm_attribute --promotion
* would have used.
*/
name = (rsc->clone_name == NULL)? rsc->id : rsc->clone_name;
attr_value = promotion_attr_value(rsc, node, name);
if (attr_value != NULL) {
pe_rsc_trace(rsc, "Promotion score for %s on %s = %s",
name, pe__node_name(node), pcmk__s(attr_value, "(unset)"));
} else if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
/* If we don't have any resource history yet, we won't have clone_name.
* In that case, for anonymous clones, try the resource name without
* any instance number.
*/
name = clone_strip(rsc->id);
if (strcmp(rsc->id, name) != 0) {
attr_value = promotion_attr_value(rsc, node, name);
pe_rsc_trace(rsc, "Promotion score for %s on %s (for %s) = %s",
name, pe__node_name(node), rsc->id,
pcmk__s(attr_value, "(unset)"));
}
free(name);
}
if (attr_value == NULL) {
return 0;
}
if (is_default != NULL) {
*is_default = false;
}
return char2score(attr_value);
}
/*!
* \internal
* \brief Include promotion scores in instances' node scores and priorities
*
* \param[in,out] rsc Promotable clone resource to update
*/
void
pcmk__add_promotion_scores(pe_resource_t *rsc)
{
if (pe__set_clone_flag(rsc, pe__clone_promotion_added) == pcmk_rc_already) {
return;
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
GHashTableIter iter;
pe_node_t *node = NULL;
int score, new_score;
g_hash_table_iter_init(&iter, child_rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (!pcmk__node_available(node, false, false)) {
/* This node will never be promoted, so don't apply the
* promotion score, as that may lead to clone shuffling.
*/
continue;
}
score = promotion_score(child_rsc, node, NULL);
if (score > 0) {
new_score = pcmk__add_scores(node->weight, score);
if (new_score != node->weight) { // Could remain INFINITY
node->weight = new_score;
pe_rsc_trace(rsc,
"Added %s promotion priority (%s) to score "
"on %s (now %s)",
child_rsc->id, pcmk_readable_score(score),
pe__node_name(node),
pcmk_readable_score(new_score));
}
}
if (score > child_rsc->priority) {
pe_rsc_trace(rsc,
"Updating %s priority to promotion score (%d->%d)",
child_rsc->id, child_rsc->priority, score);
child_rsc->priority = score;
}
}
}
}
/*!
* \internal
* \brief If a resource's current role is started, change it to unpromoted
*
* \param[in,out] data Resource to update
* \param[in] user_data Ignored
*/
static void
set_current_role_unpromoted(void *data, void *user_data)
{
pe_resource_t *rsc = (pe_resource_t *) data;
if (rsc->role == pcmk_role_started) {
// Promotable clones should use unpromoted role instead of started
rsc->role = pcmk_role_unpromoted;
}
g_list_foreach(rsc->children, set_current_role_unpromoted, NULL);
}
/*!
* \internal
* \brief Set a resource's next role to unpromoted (or stopped if unassigned)
*
* \param[in,out] data Resource to update
* \param[in] user_data Ignored
*/
static void
set_next_role_unpromoted(void *data, void *user_data)
{
pe_resource_t *rsc = (pe_resource_t *) data;
GList *assigned = NULL;
rsc->fns->location(rsc, &assigned, FALSE);
if (assigned == NULL) {
pe__set_next_role(rsc, pcmk_role_stopped, "stopped instance");
} else {
pe__set_next_role(rsc, pcmk_role_unpromoted, "unpromoted instance");
g_list_free(assigned);
}
g_list_foreach(rsc->children, set_next_role_unpromoted, NULL);
}
/*!
* \internal
* \brief Set a resource's next role to promoted if not already set
*
* \param[in,out] data Resource to update
* \param[in] user_data Ignored
*/
static void
set_next_role_promoted(void *data, gpointer user_data)
{
pe_resource_t *rsc = (pe_resource_t *) data;
if (rsc->next_role == pcmk_role_unknown) {
pe__set_next_role(rsc, pcmk_role_promoted, "promoted instance");
}
g_list_foreach(rsc->children, set_next_role_promoted, NULL);
}
/*!
* \internal
* \brief Show instance's promotion score on node where it will be active
*
* \param[in,out] instance Promotable clone instance to show
*/
static void
show_promotion_score(pe_resource_t *instance)
{
pe_node_t *chosen = instance->fns->location(instance, NULL, FALSE);
if (pcmk_is_set(instance->cluster->flags, pcmk_sched_output_scores)
&& !pcmk__is_daemon && (instance->cluster->priv != NULL)) {
pcmk__output_t *out = instance->cluster->priv;
out->message(out, "promotion-score", instance, chosen,
pcmk_readable_score(instance->sort_index));
} else {
pe_rsc_debug(pe__const_top_resource(instance, false),
"%s promotion score on %s: sort=%s priority=%s",
instance->id,
((chosen == NULL)? "none" : pe__node_name(chosen)),
pcmk_readable_score(instance->sort_index),
pcmk_readable_score(instance->priority));
}
}
/*!
* \internal
* \brief Set a clone instance's promotion priority
*
* \param[in,out] data Promotable clone instance to update
* \param[in] user_data Instance's parent clone
*/
static void
set_instance_priority(gpointer data, gpointer user_data)
{
pe_resource_t *instance = (pe_resource_t *) data;
const pe_resource_t *clone = (const pe_resource_t *) user_data;
const pe_node_t *chosen = NULL;
enum rsc_role_e next_role = pcmk_role_unknown;
GList *list = NULL;
pe_rsc_trace(clone, "Assigning priority for %s: %s", instance->id,
role2text(instance->next_role));
if (instance->fns->state(instance, TRUE) == pcmk_role_started) {
set_current_role_unpromoted(instance, NULL);
}
// Only an instance that will be active can be promoted
chosen = instance->fns->location(instance, &list, FALSE);
if (pcmk__list_of_multiple(list)) {
pcmk__config_err("Cannot promote non-colocated child %s",
instance->id);
}
g_list_free(list);
if (chosen == NULL) {
return;
}
next_role = instance->fns->state(instance, FALSE);
switch (next_role) {
case pcmk_role_started:
case pcmk_role_unknown:
// Set instance priority to its promotion score (or -1 if none)
{
bool is_default = false;
instance->priority = promotion_score(instance, chosen,
&is_default);
if (is_default) {
/*
* Default to -1 if no value is set. This allows
* instances eligible for promotion to be specified
* based solely on rsc_location constraints, but
* prevents any instance from being promoted if neither
* a constraint nor a promotion score is present
*/
instance->priority = -1;
}
}
break;
case pcmk_role_unpromoted:
case pcmk_role_stopped:
// Instance can't be promoted
instance->priority = -INFINITY;
break;
case pcmk_role_promoted:
// Nothing needed (re-creating actions after scheduling fencing)
break;
default:
CRM_CHECK(FALSE, crm_err("Unknown resource role %d for %s",
next_role, instance->id));
}
// Add relevant location constraint scores for promoted role
apply_promoted_locations(instance, instance->rsc_location, chosen);
apply_promoted_locations(instance, clone->rsc_location, chosen);
// Consider instance's role-based colocations with other resources
list = pcmk__this_with_colocations(instance);
for (GList *iter = list; iter != NULL; iter = iter->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) iter->data;
instance->cmds->apply_coloc_score(instance, cons->primary, cons, true);
}
g_list_free(list);
instance->sort_index = instance->priority;
if (next_role == pcmk_role_promoted) {
instance->sort_index = INFINITY;
}
pe_rsc_trace(clone, "Assigning %s priority = %d",
instance->id, instance->priority);
}
/*!
* \internal
* \brief Set a promotable clone instance's role
*
* \param[in,out] data Promotable clone instance to update
* \param[in,out] user_data Pointer to count of instances chosen for promotion
*/
static void
set_instance_role(gpointer data, gpointer user_data)
{
pe_resource_t *instance = (pe_resource_t *) data;
int *count = (int *) user_data;
const pe_resource_t *clone = pe__const_top_resource(instance, false);
pe_node_t *chosen = NULL;
show_promotion_score(instance);
if (instance->sort_index < 0) {
pe_rsc_trace(clone, "Not supposed to promote instance %s",
instance->id);
} else if ((*count < pe__clone_promoted_max(instance))
|| !pcmk_is_set(clone->flags, pcmk_rsc_managed)) {
chosen = node_to_be_promoted_on(instance);
}
if (chosen == NULL) {
set_next_role_unpromoted(instance, NULL);
return;
}
if ((instance->role < pcmk_role_promoted)
&& !pcmk_is_set(instance->cluster->flags, pcmk_sched_quorate)
&& (instance->cluster->no_quorum_policy == pcmk_no_quorum_freeze)) {
crm_notice("Clone instance %s cannot be promoted without quorum",
instance->id);
set_next_role_unpromoted(instance, NULL);
return;
}
chosen->count++;
pe_rsc_info(clone, "Choosing %s (%s) on %s for promotion",
instance->id, role2text(instance->role),
pe__node_name(chosen));
set_next_role_promoted(instance, NULL);
(*count)++;
}
/*!
* \internal
* \brief Set roles for all instances of a promotable clone
*
* \param[in,out] rsc Promotable clone resource to update
*/
void
pcmk__set_instance_roles(pe_resource_t *rsc)
{
int promoted = 0;
GHashTableIter iter;
pe_node_t *node = NULL;
// Repurpose count to track the number of promoted instances assigned
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
node->count = 0;
}
// Set instances' promotion priorities and sort by highest priority first
g_list_foreach(rsc->children, set_instance_priority, rsc);
sort_promotable_instances(rsc);
// Choose the first N eligible instances to be promoted
g_list_foreach(rsc->children, set_instance_role, &promoted);
pe_rsc_info(rsc, "%s: Promoted %d instances of a possible %d",
rsc->id, promoted, pe__clone_promoted_max(rsc));
}
/*!
*
* \internal
* \brief Create actions for promotable clone instances
*
* \param[in,out] clone Promotable clone to create actions for
* \param[out] any_promoting Will be set true if any instance is promoting
* \param[out] any_demoting Will be set true if any instance is demoting
*/
static void
create_promotable_instance_actions(pe_resource_t *clone,
bool *any_promoting, bool *any_demoting)
{
for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
instance->cmds->create_actions(instance);
check_for_role_change(instance, any_demoting, any_promoting);
}
}
/*!
* \internal
* \brief Reset each promotable instance's resource priority
*
* Reset the priority of each instance of a promotable clone to the clone's
* priority (after promotion actions are scheduled, when instance priorities
* were repurposed as promotion scores).
*
* \param[in,out] clone Promotable clone to reset
*/
static void
reset_instance_priorities(pe_resource_t *clone)
{
for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
instance->priority = clone->priority;
}
}
/*!
* \internal
* \brief Create actions specific to promotable clones
*
* \param[in,out] clone Promotable clone to create actions for
*/
void
pcmk__create_promotable_actions(pe_resource_t *clone)
{
bool any_promoting = false;
bool any_demoting = false;
// Create actions for each clone instance individually
create_promotable_instance_actions(clone, &any_promoting, &any_demoting);
// Create pseudo-actions for clone as a whole
pe__create_promotable_pseudo_ops(clone, any_promoting, any_demoting);
// Undo our temporary repurposing of resource priority for instances
reset_instance_priorities(clone);
}
/*!
* \internal
* \brief Create internal orderings for a promotable clone's instances
*
* \param[in,out] clone Promotable clone instance to order
*/
void
pcmk__order_promotable_instances(pe_resource_t *clone)
{
pe_resource_t *previous = NULL; // Needed for ordered clones
pcmk__promotable_restart_ordering(clone);
for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
// Demote before promote
pcmk__order_resource_actions(instance, PCMK_ACTION_DEMOTE,
instance, PCMK_ACTION_PROMOTE,
pe_order_optional);
order_instance_promotion(clone, instance, previous);
order_instance_demotion(clone, instance, previous);
previous = instance;
}
}
/*!
* \internal
* \brief Update dependent's allowed nodes for colocation with promotable
*
* \param[in,out] dependent Dependent resource to update
* \param[in] primary Primary resource
* \param[in] primary_node Node where an instance of the primary will be
* \param[in] colocation Colocation constraint to apply
*/
static void
update_dependent_allowed_nodes(pe_resource_t *dependent,
const pe_resource_t *primary,
const pe_node_t *primary_node,
const pcmk__colocation_t *colocation)
{
GHashTableIter iter;
pe_node_t *node = NULL;
const char *primary_value = NULL;
const char *attr = colocation->node_attribute;
if (colocation->score >= INFINITY) {
return; // Colocation is mandatory, so allowed node scores don't matter
}
primary_value = pcmk__colocation_node_attr(primary_node, attr, primary);
pe_rsc_trace(colocation->primary,
"Applying %s (%s with %s on %s by %s @%d) to %s",
colocation->id, colocation->dependent->id,
colocation->primary->id, pe__node_name(primary_node), attr,
colocation->score, dependent->id);
g_hash_table_iter_init(&iter, dependent->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
const char *dependent_value = pcmk__colocation_node_attr(node, attr,
dependent);
if (pcmk__str_eq(primary_value, dependent_value, pcmk__str_casei)) {
node->weight = pcmk__add_scores(node->weight, colocation->score);
pe_rsc_trace(colocation->primary,
"Added %s score (%s) to %s (now %s)",
colocation->id, pcmk_readable_score(colocation->score),
pe__node_name(node),
pcmk_readable_score(node->weight));
}
}
}
/*!
* \brief Update dependent for a colocation with a promotable clone
*
* \param[in] primary Primary resource in the colocation
* \param[in,out] dependent Dependent resource in the colocation
* \param[in] colocation Colocation constraint to apply
*/
void
pcmk__update_dependent_with_promotable(const pe_resource_t *primary,
pe_resource_t *dependent,
const pcmk__colocation_t *colocation)
{
GList *affected_nodes = NULL;
/* Build a list of all nodes where an instance of the primary will be, and
* (for optional colocations) update the dependent's allowed node scores for
* each one.
*/
for (GList *iter = primary->children; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
pe_node_t *node = instance->fns->location(instance, NULL, FALSE);
if (node == NULL) {
continue;
}
if (instance->fns->state(instance, FALSE) == colocation->primary_role) {
update_dependent_allowed_nodes(dependent, primary, node,
colocation);
affected_nodes = g_list_prepend(affected_nodes, node);
}
}
/* For mandatory colocations, add the primary's node score to the
* dependent's node score for each affected node, and ban the dependent
* from all other nodes.
*
* However, skip this for promoted-with-promoted colocations, otherwise
* inactive dependent instances can't start (in the unpromoted role).
*/
if ((colocation->score >= INFINITY)
&& ((colocation->dependent_role != pcmk_role_promoted)
|| (colocation->primary_role != pcmk_role_promoted))) {
pe_rsc_trace(colocation->primary,
"Applying %s (mandatory %s with %s) to %s",
colocation->id, colocation->dependent->id,
colocation->primary->id, dependent->id);
pcmk__colocation_intersect_nodes(dependent, primary, colocation,
affected_nodes, true);
}
g_list_free(affected_nodes);
}
/*!
* \internal
* \brief Update dependent priority for colocation with promotable
*
* \param[in] primary Primary resource in the colocation
* \param[in,out] dependent Dependent resource in the colocation
* \param[in] colocation Colocation constraint to apply
*/
void
pcmk__update_promotable_dependent_priority(const pe_resource_t *primary,
pe_resource_t *dependent,
const pcmk__colocation_t *colocation)
{
pe_resource_t *primary_instance = NULL;
// Look for a primary instance where dependent will be
primary_instance = pcmk__find_compatible_instance(dependent, primary,
colocation->primary_role,
false);
if (primary_instance != NULL) {
// Add primary instance's priority to dependent's
int new_priority = pcmk__add_scores(dependent->priority,
colocation->score);
pe_rsc_trace(colocation->primary,
"Applying %s (%s with %s) to %s priority (%s + %s = %s)",
colocation->id, colocation->dependent->id,
colocation->primary->id, dependent->id,
pcmk_readable_score(dependent->priority),
pcmk_readable_score(colocation->score),
pcmk_readable_score(new_priority));
dependent->priority = new_priority;
} else if (colocation->score >= INFINITY) {
// Mandatory colocation, but primary won't be here
pe_rsc_trace(colocation->primary,
"Applying %s (%s with %s) to %s: can't be promoted",
colocation->id, colocation->dependent->id,
colocation->primary->id, dependent->id);
dependent->priority = -INFINITY;
}
}
diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c
index 8c7f2c2732..f8e3308d52 100644
--- a/lib/pacemaker/pcmk_sched_resource.c
+++ b/lib/pacemaker/pcmk_sched_resource.c
@@ -1,771 +1,771 @@
/*
* Copyright 2014-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdlib.h>
#include <string.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
// Resource assignment methods by resource variant
static resource_alloc_functions_t assignment_methods[] = {
{
pcmk__primitive_assign,
pcmk__primitive_create_actions,
pcmk__probe_rsc_on_node,
pcmk__primitive_internal_constraints,
pcmk__primitive_apply_coloc_score,
pcmk__colocated_resources,
pcmk__with_primitive_colocations,
pcmk__primitive_with_colocations,
pcmk__add_colocated_node_scores,
pcmk__apply_location,
pcmk__primitive_action_flags,
pcmk__update_ordered_actions,
pcmk__output_resource_actions,
pcmk__add_rsc_actions_to_graph,
pcmk__primitive_add_graph_meta,
pcmk__primitive_add_utilization,
pcmk__primitive_shutdown_lock,
},
{
pcmk__group_assign,
pcmk__group_create_actions,
pcmk__probe_rsc_on_node,
pcmk__group_internal_constraints,
pcmk__group_apply_coloc_score,
pcmk__group_colocated_resources,
pcmk__with_group_colocations,
pcmk__group_with_colocations,
pcmk__group_add_colocated_node_scores,
pcmk__group_apply_location,
pcmk__group_action_flags,
pcmk__group_update_ordered_actions,
pcmk__output_resource_actions,
pcmk__add_rsc_actions_to_graph,
pcmk__noop_add_graph_meta,
pcmk__group_add_utilization,
pcmk__group_shutdown_lock,
},
{
pcmk__clone_assign,
pcmk__clone_create_actions,
pcmk__clone_create_probe,
pcmk__clone_internal_constraints,
pcmk__clone_apply_coloc_score,
pcmk__colocated_resources,
pcmk__with_clone_colocations,
pcmk__clone_with_colocations,
pcmk__add_colocated_node_scores,
pcmk__clone_apply_location,
pcmk__clone_action_flags,
pcmk__instance_update_ordered_actions,
pcmk__output_resource_actions,
pcmk__clone_add_actions_to_graph,
pcmk__clone_add_graph_meta,
pcmk__clone_add_utilization,
pcmk__clone_shutdown_lock,
},
{
pcmk__bundle_assign,
pcmk__bundle_create_actions,
pcmk__bundle_create_probe,
pcmk__bundle_internal_constraints,
pcmk__bundle_apply_coloc_score,
pcmk__colocated_resources,
pcmk__with_bundle_colocations,
pcmk__bundle_with_colocations,
pcmk__add_colocated_node_scores,
pcmk__bundle_apply_location,
pcmk__bundle_action_flags,
pcmk__instance_update_ordered_actions,
pcmk__output_bundle_actions,
pcmk__bundle_add_actions_to_graph,
pcmk__noop_add_graph_meta,
pcmk__bundle_add_utilization,
pcmk__bundle_shutdown_lock,
}
};
/*!
* \internal
* \brief Check whether a resource's agent standard, provider, or type changed
*
* \param[in,out] rsc Resource to check
* \param[in,out] node Node needing unfencing if agent changed
* \param[in] rsc_entry XML with previously known agent information
* \param[in] active_on_node Whether \p rsc is active on \p node
*
* \return true if agent for \p rsc changed, otherwise false
*/
bool
pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node,
const xmlNode *rsc_entry, bool active_on_node)
{
bool changed = false;
const char *attr_list[] = {
XML_ATTR_TYPE,
XML_AGENT_ATTR_CLASS,
XML_AGENT_ATTR_PROVIDER
};
for (int i = 0; i < PCMK__NELEM(attr_list); i++) {
const char *value = crm_element_value(rsc->xml, attr_list[i]);
const char *old_value = crm_element_value(rsc_entry, attr_list[i]);
if (!pcmk__str_eq(value, old_value, pcmk__str_none)) {
changed = true;
trigger_unfencing(rsc, node, "Device definition changed", NULL,
rsc->cluster);
if (active_on_node) {
crm_notice("Forcing restart of %s on %s "
"because %s changed from '%s' to '%s'",
rsc->id, pe__node_name(node), attr_list[i],
pcmk__s(old_value, ""), pcmk__s(value, ""));
}
}
}
if (changed && active_on_node) {
// Make sure the resource is restarted
custom_action(rsc, stop_key(rsc), PCMK_ACTION_STOP, node, FALSE, TRUE,
rsc->cluster);
pe__set_resource_flags(rsc, pe_rsc_start_pending);
}
return changed;
}
/*!
* \internal
* \brief Add resource (and any matching children) to list if it matches ID
*
* \param[in] result List to add resource to
* \param[in] rsc Resource to check
* \param[in] id ID to match
*
* \return (Possibly new) head of list
*/
static GList *
add_rsc_if_matching(GList *result, pe_resource_t *rsc, const char *id)
{
if ((strcmp(rsc->id, id) == 0)
|| ((rsc->clone_name != NULL) && (strcmp(rsc->clone_name, id) == 0))) {
result = g_list_prepend(result, rsc);
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
result = add_rsc_if_matching(result, child, id);
}
return result;
}
/*!
* \internal
* \brief Find all resources matching a given ID by either ID or clone name
*
* \param[in] id Resource ID to check
* \param[in] data_set Cluster working set
*
* \return List of all resources that match \p id
* \note The caller is responsible for freeing the return value with
* g_list_free().
*/
GList *
pcmk__rscs_matching_id(const char *id, const pe_working_set_t *data_set)
{
GList *result = NULL;
CRM_CHECK((id != NULL) && (data_set != NULL), return NULL);
for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
result = add_rsc_if_matching(result, (pe_resource_t *) iter->data, id);
}
return result;
}
/*!
* \internal
* \brief Set the variant-appropriate assignment methods for a resource
*
* \param[in,out] data Resource to set assignment methods for
* \param[in] user_data Ignored
*/
static void
set_assignment_methods_for_rsc(gpointer data, gpointer user_data)
{
pe_resource_t *rsc = data;
rsc->cmds = &assignment_methods[rsc->variant];
g_list_foreach(rsc->children, set_assignment_methods_for_rsc, NULL);
}
/*!
* \internal
* \brief Set the variant-appropriate assignment methods for all resources
*
* \param[in,out] data_set Cluster working set
*/
void
pcmk__set_assignment_methods(pe_working_set_t *data_set)
{
g_list_foreach(data_set->resources, set_assignment_methods_for_rsc, NULL);
}
/*!
* \internal
* \brief Wrapper for colocated_resources() method for readability
*
* \param[in] rsc Resource to add to colocated list
* \param[in] orig_rsc Resource originally requested
* \param[in,out] list Pointer to list to add to
*
* \return (Possibly new) head of list
*/
static inline void
add_colocated_resources(const pe_resource_t *rsc, const pe_resource_t *orig_rsc,
GList **list)
{
*list = rsc->cmds->colocated_resources(rsc, orig_rsc, *list);
}
// Shared implementation of resource_alloc_functions_t:colocated_resources()
GList *
pcmk__colocated_resources(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList *colocated_rscs)
{
const GList *iter = NULL;
GList *colocations = NULL;
if (orig_rsc == NULL) {
orig_rsc = rsc;
}
if ((rsc == NULL) || (g_list_find(colocated_rscs, rsc) != NULL)) {
return colocated_rscs;
}
pe_rsc_trace(orig_rsc, "%s is in colocation chain with %s",
rsc->id, orig_rsc->id);
colocated_rscs = g_list_prepend(colocated_rscs, (gpointer) rsc);
// Follow colocations where this resource is the dependent resource
colocations = pcmk__this_with_colocations(rsc);
for (iter = colocations; iter != NULL; iter = iter->next) {
const pcmk__colocation_t *constraint = iter->data;
const pe_resource_t *primary = constraint->primary;
if (primary == orig_rsc) {
continue; // Break colocation loop
}
if ((constraint->score == INFINITY) &&
(pcmk__colocation_affects(rsc, primary, constraint,
true) == pcmk__coloc_affects_location)) {
add_colocated_resources(primary, orig_rsc, &colocated_rscs);
}
}
g_list_free(colocations);
// Follow colocations where this resource is the primary resource
colocations = pcmk__with_this_colocations(rsc);
for (iter = colocations; iter != NULL; iter = iter->next) {
const pcmk__colocation_t *constraint = iter->data;
const pe_resource_t *dependent = constraint->dependent;
if (dependent == orig_rsc) {
continue; // Break colocation loop
}
if (pe_rsc_is_clone(rsc) && !pe_rsc_is_clone(dependent)) {
continue; // We can't be sure whether dependent will be colocated
}
if ((constraint->score == INFINITY) &&
(pcmk__colocation_affects(dependent, rsc, constraint,
true) == pcmk__coloc_affects_location)) {
add_colocated_resources(dependent, orig_rsc, &colocated_rscs);
}
}
g_list_free(colocations);
return colocated_rscs;
}
// No-op function for variants that don't need to implement add_graph_meta()
void
pcmk__noop_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml)
{
}
/*!
* \internal
* \brief Output a summary of scheduled actions for a resource
*
* \param[in,out] rsc Resource to output actions for
*/
void
pcmk__output_resource_actions(pe_resource_t *rsc)
{
pe_node_t *next = NULL;
pe_node_t *current = NULL;
pcmk__output_t *out = NULL;
CRM_ASSERT(rsc != NULL);
out = rsc->cluster->priv;
if (rsc->children != NULL) {
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
child->cmds->output_actions(child);
}
return;
}
next = rsc->allocated_to;
if (rsc->running_on) {
current = pe__current_node(rsc);
if (rsc->role == pcmk_role_stopped) {
/* This can occur when resources are being recovered because
* the current role can change in pcmk__primitive_create_actions()
*/
rsc->role = pcmk_role_started;
}
}
if ((current == NULL) && pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
/* Don't log stopped orphans */
return;
}
out->message(out, "rsc-action", rsc, current, next);
}
/*!
* \internal
* \brief Add a resource to a node's list of assigned resources
*
* \param[in,out] node Node to add resource to
* \param[in] rsc Resource to add
*/
static inline void
add_assigned_resource(pe_node_t *node, pe_resource_t *rsc)
{
node->details->allocated_rsc = g_list_prepend(node->details->allocated_rsc,
rsc);
}
/*!
* \internal
* \brief Assign a specified resource (of any variant) to a node
*
* Assign a specified resource and its children (if any) to a specified node, if
* the node can run the resource (or unconditionally, if \p force is true). Mark
* the resources as no longer provisional.
*
* If a resource can't be assigned (or \p node is \c NULL), unassign any
* previous assignment. If \p stop_if_fail is \c true, set next role to stopped
* and update any existing actions scheduled for the resource.
*
* \param[in,out] rsc Resource to assign
* \param[in,out] node Node to assign \p rsc to
* \param[in] force If true, assign to \p node even if unavailable
* \param[in] stop_if_fail If \c true and either \p rsc can't be assigned
* or \p chosen is \c NULL, set next role to
* stopped and update existing actions (if \p rsc
* is not a primitive, this applies to its
* primitive descendants instead)
*
* \return \c true if the assignment of \p rsc changed, or \c false otherwise
*
* \note Assigning a resource to the NULL node using this function is different
* from calling pcmk__unassign_resource(), in that it may also update any
* actions created for the resource.
* \note The \c resource_alloc_functions_t:assign() method is preferred, unless
* a resource should be assigned to the \c NULL node or every resource in
* a tree should be assigned to the same node.
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
bool
pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force,
bool stop_if_fail)
{
bool changed = false;
CRM_ASSERT(rsc != NULL);
if (rsc->children != NULL) {
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child_rsc = iter->data;
changed |= pcmk__assign_resource(child_rsc, node, force,
stop_if_fail);
}
return changed;
}
// Assigning a primitive
if (!force && (node != NULL)
&& ((node->weight < 0)
// Allow graph to assume that guest node connections will come up
|| (!pcmk__node_available(node, true, false)
&& !pe__is_guest_node(node)))) {
pe_rsc_debug(rsc,
"All nodes for resource %s are unavailable, unclean or "
"shutting down (%s can%s run resources, with score %s)",
rsc->id, pe__node_name(node),
(pcmk__node_available(node, true, false)? "" : "not"),
pcmk_readable_score(node->weight));
if (stop_if_fail) {
pe__set_next_role(rsc, pcmk_role_stopped, "node availability");
}
node = NULL;
}
if (rsc->allocated_to != NULL) {
changed = !pe__same_node(rsc->allocated_to, node);
} else {
changed = (node != NULL);
}
pcmk__unassign_resource(rsc);
- pe__clear_resource_flags(rsc, pe_rsc_provisional);
+ pe__clear_resource_flags(rsc, pcmk_rsc_unassigned);
if (node == NULL) {
char *rc_stopped = NULL;
pe_rsc_debug(rsc, "Could not assign %s to a node", rsc->id);
if (!stop_if_fail) {
return changed;
}
pe__set_next_role(rsc, pcmk_role_stopped, "unable to assign");
for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
pe_action_t *op = (pe_action_t *) iter->data;
pe_rsc_debug(rsc, "Updating %s for %s assignment failure",
op->uuid, rsc->id);
if (pcmk__str_eq(op->task, PCMK_ACTION_STOP, pcmk__str_none)) {
pe__clear_action_flags(op, pe_action_optional);
} else if (pcmk__str_eq(op->task, PCMK_ACTION_START,
pcmk__str_none)) {
pe__clear_action_flags(op, pe_action_runnable);
} else {
// Cancel recurring actions, unless for stopped state
const char *interval_ms_s = NULL;
const char *target_rc_s = NULL;
interval_ms_s = g_hash_table_lookup(op->meta,
XML_LRM_ATTR_INTERVAL_MS);
target_rc_s = g_hash_table_lookup(op->meta,
XML_ATTR_TE_TARGET_RC);
if (rc_stopped == NULL) {
rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING);
}
if (!pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)
&& !pcmk__str_eq(rc_stopped, target_rc_s, pcmk__str_none)) {
pe__clear_action_flags(op, pe_action_runnable);
}
}
}
free(rc_stopped);
return changed;
}
pe_rsc_debug(rsc, "Assigning %s to %s", rsc->id, pe__node_name(node));
rsc->allocated_to = pe__copy_node(node);
add_assigned_resource(node, rsc);
node->details->num_resources++;
node->count++;
pcmk__consume_node_capacity(node->details->utilization, rsc);
if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_show_utilization)) {
pcmk__output_t *out = rsc->cluster->priv;
out->message(out, "resource-util", rsc, node, __func__);
}
return changed;
}
/*!
* \internal
* \brief Remove any node assignment from a specified resource and its children
*
* If a specified resource has been assigned to a node, remove that assignment
* and mark the resource as provisional again.
*
* \param[in,out] rsc Resource to unassign
*
* \note This function is called recursively on \p rsc and its children.
*/
void
pcmk__unassign_resource(pe_resource_t *rsc)
{
pe_node_t *old = rsc->allocated_to;
if (old == NULL) {
crm_info("Unassigning %s", rsc->id);
} else {
crm_info("Unassigning %s from %s", rsc->id, pe__node_name(old));
}
- pe__set_resource_flags(rsc, pe_rsc_provisional);
+ pe__set_resource_flags(rsc, pcmk_rsc_unassigned);
if (rsc->children == NULL) {
if (old == NULL) {
return;
}
rsc->allocated_to = NULL;
/* We're going to free the pe_node_t, but its details member is shared
* and will remain, so update that appropriately first.
*/
old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc,
rsc);
old->details->num_resources--;
pcmk__release_node_capacity(old->details->utilization, rsc);
free(old);
return;
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pcmk__unassign_resource((pe_resource_t *) iter->data);
}
}
/*!
* \internal
* \brief Check whether a resource has reached its migration threshold on a node
*
* \param[in,out] rsc Resource to check
* \param[in] node Node to check
* \param[out] failed If threshold has been reached, this will be set to
* resource that failed (possibly a parent of \p rsc)
*
* \return true if the migration threshold has been reached, false otherwise
*/
bool
pcmk__threshold_reached(pe_resource_t *rsc, const pe_node_t *node,
pe_resource_t **failed)
{
int fail_count, remaining_tries;
pe_resource_t *rsc_to_ban = rsc;
// Migration threshold of 0 means never force away
if (rsc->migration_threshold == 0) {
return false;
}
// If we're ignoring failures, also ignore the migration threshold
if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) {
return false;
}
// If there are no failures, there's no need to force away
fail_count = pe_get_failcount(node, rsc, NULL,
pe_fc_effective|pe_fc_fillers, NULL);
if (fail_count <= 0) {
return false;
}
// If failed resource is anonymous clone instance, we'll force clone away
if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
rsc_to_ban = uber_parent(rsc);
}
// How many more times recovery will be tried on this node
remaining_tries = rsc->migration_threshold - fail_count;
if (remaining_tries <= 0) {
crm_warn("%s cannot run on %s due to reaching migration threshold "
"(clean up resource to allow again)"
CRM_XS " failures=%d migration-threshold=%d",
rsc_to_ban->id, pe__node_name(node), fail_count,
rsc->migration_threshold);
if (failed != NULL) {
*failed = rsc_to_ban;
}
return true;
}
crm_info("%s can fail %d more time%s on "
"%s before reaching migration threshold (%d)",
rsc_to_ban->id, remaining_tries, pcmk__plural_s(remaining_tries),
pe__node_name(node), rsc->migration_threshold);
return false;
}
/*!
* \internal
* \brief Get a node's score
*
* \param[in] node Node with ID to check
* \param[in] nodes List of nodes to look for \p node score in
*
* \return Node's score, or -INFINITY if not found
*/
static int
get_node_score(const pe_node_t *node, GHashTable *nodes)
{
pe_node_t *found_node = NULL;
if ((node != NULL) && (nodes != NULL)) {
found_node = g_hash_table_lookup(nodes, node->details->id);
}
return (found_node == NULL)? -INFINITY : found_node->weight;
}
/*!
* \internal
* \brief Compare two resources according to which should be assigned first
*
* \param[in] a First resource to compare
* \param[in] b Second resource to compare
* \param[in] data Sorted list of all nodes in cluster
*
* \return -1 if \p a should be assigned before \b, 0 if they are equal,
* or +1 if \p a should be assigned after \b
*/
static gint
cmp_resources(gconstpointer a, gconstpointer b, gpointer data)
{
/* GLib insists that this function require gconstpointer arguments, but we
* make a small, temporary change to each argument (setting the
* pe_rsc_merging flag) during comparison
*/
pe_resource_t *resource1 = (pe_resource_t *) a;
pe_resource_t *resource2 = (pe_resource_t *) b;
const GList *nodes = data;
int rc = 0;
int r1_score = -INFINITY;
int r2_score = -INFINITY;
pe_node_t *r1_node = NULL;
pe_node_t *r2_node = NULL;
GHashTable *r1_nodes = NULL;
GHashTable *r2_nodes = NULL;
const char *reason = NULL;
// Resources with highest priority should be assigned first
reason = "priority";
r1_score = resource1->priority;
r2_score = resource2->priority;
if (r1_score > r2_score) {
rc = -1;
goto done;
}
if (r1_score < r2_score) {
rc = 1;
goto done;
}
// We need nodes to make any other useful comparisons
reason = "no node list";
if (nodes == NULL) {
goto done;
}
// Calculate and log node scores
resource1->cmds->add_colocated_node_scores(resource1, NULL, resource1->id,
&r1_nodes, NULL, 1,
pcmk__coloc_select_this_with);
resource2->cmds->add_colocated_node_scores(resource2, NULL, resource2->id,
&r2_nodes, NULL, 1,
pcmk__coloc_select_this_with);
pe__show_node_scores(true, NULL, resource1->id, r1_nodes,
resource1->cluster);
pe__show_node_scores(true, NULL, resource2->id, r2_nodes,
resource2->cluster);
// The resource with highest score on its current node goes first
reason = "current location";
if (resource1->running_on != NULL) {
r1_node = pe__current_node(resource1);
}
if (resource2->running_on != NULL) {
r2_node = pe__current_node(resource2);
}
r1_score = get_node_score(r1_node, r1_nodes);
r2_score = get_node_score(r2_node, r2_nodes);
if (r1_score > r2_score) {
rc = -1;
goto done;
}
if (r1_score < r2_score) {
rc = 1;
goto done;
}
// Otherwise a higher score on any node will do
reason = "score";
for (const GList *iter = nodes; iter != NULL; iter = iter->next) {
const pe_node_t *node = (const pe_node_t *) iter->data;
r1_score = get_node_score(node, r1_nodes);
r2_score = get_node_score(node, r2_nodes);
if (r1_score > r2_score) {
rc = -1;
goto done;
}
if (r1_score < r2_score) {
rc = 1;
goto done;
}
}
done:
crm_trace("%s (%d)%s%s %c %s (%d)%s%s: %s",
resource1->id, r1_score,
((r1_node == NULL)? "" : " on "),
((r1_node == NULL)? "" : r1_node->details->id),
((rc < 0)? '>' : ((rc > 0)? '<' : '=')),
resource2->id, r2_score,
((r2_node == NULL)? "" : " on "),
((r2_node == NULL)? "" : r2_node->details->id),
reason);
if (r1_nodes != NULL) {
g_hash_table_destroy(r1_nodes);
}
if (r2_nodes != NULL) {
g_hash_table_destroy(r2_nodes);
}
return rc;
}
/*!
* \internal
* \brief Sort resources in the order they should be assigned to nodes
*
* \param[in,out] data_set Cluster working set
*/
void
pcmk__sort_resources(pe_working_set_t *data_set)
{
GList *nodes = g_list_copy(data_set->nodes);
nodes = pcmk__sort_nodes(nodes, NULL);
data_set->resources = g_list_sort_with_data(data_set->resources,
cmp_resources, nodes);
g_list_free(nodes);
}
diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c
index 3d709d3872..455b7bc8fd 100644
--- a/lib/pengine/complex.c
+++ b/lib/pengine/complex.c
@@ -1,1185 +1,1185 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/internal.h>
#include <crm/msg_xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/scheduler_internal.h>
#include "pe_status_private.h"
void populate_hash(xmlNode * nvpair_list, GHashTable * hash, const char **attrs, int attrs_length);
static pe_node_t *active_node(const pe_resource_t *rsc, unsigned int *count_all,
unsigned int *count_clean);
resource_object_functions_t resource_class_functions[] = {
{
native_unpack,
native_find_rsc,
native_parameter,
native_print,
native_active,
native_resource_state,
native_location,
native_free,
pe__count_common,
pe__native_is_filtered,
active_node,
pe__primitive_max_per_node,
},
{
group_unpack,
native_find_rsc,
native_parameter,
group_print,
group_active,
group_resource_state,
native_location,
group_free,
pe__count_common,
pe__group_is_filtered,
active_node,
pe__group_max_per_node,
},
{
clone_unpack,
native_find_rsc,
native_parameter,
clone_print,
clone_active,
clone_resource_state,
native_location,
clone_free,
pe__count_common,
pe__clone_is_filtered,
active_node,
pe__clone_max_per_node,
},
{
pe__unpack_bundle,
native_find_rsc,
native_parameter,
pe__print_bundle,
pe__bundle_active,
pe__bundle_resource_state,
native_location,
pe__free_bundle,
pe__count_bundle,
pe__bundle_is_filtered,
pe__bundle_active_node,
pe__bundle_max_per_node,
}
};
static enum pe_obj_types
get_resource_type(const char *name)
{
if (pcmk__str_eq(name, XML_CIB_TAG_RESOURCE, pcmk__str_casei)) {
return pcmk_rsc_variant_primitive;
} else if (pcmk__str_eq(name, XML_CIB_TAG_GROUP, pcmk__str_casei)) {
return pcmk_rsc_variant_group;
} else if (pcmk__str_eq(name, XML_CIB_TAG_INCARNATION, pcmk__str_casei)) {
return pcmk_rsc_variant_clone;
} else if (pcmk__str_eq(name, PCMK_XE_PROMOTABLE_LEGACY, pcmk__str_casei)) {
// @COMPAT deprecated since 2.0.0
return pcmk_rsc_variant_clone;
} else if (pcmk__str_eq(name, XML_CIB_TAG_CONTAINER, pcmk__str_casei)) {
return pcmk_rsc_variant_bundle;
}
return pcmk_rsc_variant_unknown;
}
static void
dup_attr(gpointer key, gpointer value, gpointer user_data)
{
add_hash_param(user_data, key, value);
}
static void
expand_parents_fixed_nvpairs(pe_resource_t * rsc, pe_rule_eval_data_t * rule_data, GHashTable * meta_hash, pe_working_set_t * data_set)
{
GHashTable *parent_orig_meta = pcmk__strkey_table(free, free);
pe_resource_t *p = rsc->parent;
if (p == NULL) {
return ;
}
/* Search all parent resources, get the fixed value of "meta_attributes" set only in the original xml, and stack it in the hash table. */
/* The fixed value of the lower parent resource takes precedence and is not overwritten. */
while(p != NULL) {
/* A hash table for comparison is generated, including the id-ref. */
pe__unpack_dataset_nvpairs(p->xml, XML_TAG_META_SETS,
rule_data, parent_orig_meta, NULL, FALSE, data_set);
p = p->parent;
}
/* If there is a fixed value of "meta_attributes" of the parent resource, it will be processed. */
if (parent_orig_meta != NULL) {
GHashTableIter iter;
char *key = NULL;
char *value = NULL;
g_hash_table_iter_init(&iter, parent_orig_meta);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
/* Parameters set in the original xml of the parent resource will also try to overwrite the child resource. */
/* Attributes that already exist in the child lease are not updated. */
dup_attr(key, value, meta_hash);
}
}
if (parent_orig_meta != NULL) {
g_hash_table_destroy(parent_orig_meta);
}
return ;
}
void
get_meta_attributes(GHashTable * meta_hash, pe_resource_t * rsc,
pe_node_t * node, pe_working_set_t * data_set)
{
pe_rsc_eval_data_t rsc_rule_data = {
.standard = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS),
.provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER),
.agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE)
};
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = pcmk_role_unknown,
.now = data_set->now,
.match_data = NULL,
.rsc_data = &rsc_rule_data,
.op_data = NULL
};
if (node) {
rule_data.node_hash = node->details->attrs;
}
for (xmlAttrPtr a = pcmk__xe_first_attr(rsc->xml); a != NULL; a = a->next) {
const char *prop_name = (const char *) a->name;
const char *prop_value = pcmk__xml_attr_value(a);
add_hash_param(meta_hash, prop_name, prop_value);
}
pe__unpack_dataset_nvpairs(rsc->xml, XML_TAG_META_SETS, &rule_data,
meta_hash, NULL, FALSE, data_set);
/* Set the "meta_attributes" explicitly set in the parent resource to the hash table of the child resource. */
/* If it is already explicitly set as a child, it will not be overwritten. */
if (rsc->parent != NULL) {
expand_parents_fixed_nvpairs(rsc, &rule_data, meta_hash, data_set);
}
/* check the defaults */
pe__unpack_dataset_nvpairs(data_set->rsc_defaults, XML_TAG_META_SETS,
&rule_data, meta_hash, NULL, FALSE, data_set);
/* If there is "meta_attributes" that the parent resource has not explicitly set, set a value that is not set from rsc_default either. */
/* The values already set up to this point will not be overwritten. */
if (rsc->parent) {
g_hash_table_foreach(rsc->parent->meta, dup_attr, meta_hash);
}
}
void
get_rsc_attributes(GHashTable *meta_hash, const pe_resource_t *rsc,
const pe_node_t *node, pe_working_set_t *data_set)
{
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = pcmk_role_unknown,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
if (node) {
rule_data.node_hash = node->details->attrs;
}
pe__unpack_dataset_nvpairs(rsc->xml, XML_TAG_ATTR_SETS, &rule_data,
meta_hash, NULL, FALSE, data_set);
/* set anything else based on the parent */
if (rsc->parent != NULL) {
get_rsc_attributes(meta_hash, rsc->parent, node, data_set);
} else {
/* and finally check the defaults */
pe__unpack_dataset_nvpairs(data_set->rsc_defaults, XML_TAG_ATTR_SETS,
&rule_data, meta_hash, NULL, FALSE, data_set);
}
}
static char *
template_op_key(xmlNode * op)
{
const char *name = crm_element_value(op, "name");
const char *role = crm_element_value(op, "role");
char *key = NULL;
if ((role == NULL)
|| pcmk__strcase_any_of(role, PCMK__ROLE_STARTED, PCMK__ROLE_UNPROMOTED,
PCMK__ROLE_UNPROMOTED_LEGACY, NULL)) {
role = PCMK__ROLE_UNKNOWN;
}
key = crm_strdup_printf("%s-%s", name, role);
return key;
}
static gboolean
unpack_template(xmlNode * xml_obj, xmlNode ** expanded_xml, pe_working_set_t * data_set)
{
xmlNode *cib_resources = NULL;
xmlNode *template = NULL;
xmlNode *new_xml = NULL;
xmlNode *child_xml = NULL;
xmlNode *rsc_ops = NULL;
xmlNode *template_ops = NULL;
const char *template_ref = NULL;
const char *clone = NULL;
const char *id = NULL;
if (xml_obj == NULL) {
pe_err("No resource object for template unpacking");
return FALSE;
}
template_ref = crm_element_value(xml_obj, XML_CIB_TAG_RSC_TEMPLATE);
if (template_ref == NULL) {
return TRUE;
}
id = ID(xml_obj);
if (id == NULL) {
pe_err("'%s' object must have a id", xml_obj->name);
return FALSE;
}
if (pcmk__str_eq(template_ref, id, pcmk__str_none)) {
pe_err("The resource object '%s' should not reference itself", id);
return FALSE;
}
cib_resources = get_xpath_object("//"XML_CIB_TAG_RESOURCES, data_set->input, LOG_TRACE);
if (cib_resources == NULL) {
pe_err("No resources configured");
return FALSE;
}
template = pcmk__xe_match(cib_resources, XML_CIB_TAG_RSC_TEMPLATE,
XML_ATTR_ID, template_ref);
if (template == NULL) {
pe_err("No template named '%s'", template_ref);
return FALSE;
}
new_xml = copy_xml(template);
xmlNodeSetName(new_xml, xml_obj->name);
crm_xml_add(new_xml, XML_ATTR_ID, id);
clone = crm_element_value(xml_obj, XML_RSC_ATTR_INCARNATION);
if(clone) {
crm_xml_add(new_xml, XML_RSC_ATTR_INCARNATION, clone);
}
template_ops = find_xml_node(new_xml, "operations", FALSE);
for (child_xml = pcmk__xe_first_child(xml_obj); child_xml != NULL;
child_xml = pcmk__xe_next(child_xml)) {
xmlNode *new_child = NULL;
new_child = add_node_copy(new_xml, child_xml);
if (pcmk__str_eq((const char *)new_child->name, "operations", pcmk__str_none)) {
rsc_ops = new_child;
}
}
if (template_ops && rsc_ops) {
xmlNode *op = NULL;
GHashTable *rsc_ops_hash = pcmk__strkey_table(free, NULL);
for (op = pcmk__xe_first_child(rsc_ops); op != NULL;
op = pcmk__xe_next(op)) {
char *key = template_op_key(op);
g_hash_table_insert(rsc_ops_hash, key, op);
}
for (op = pcmk__xe_first_child(template_ops); op != NULL;
op = pcmk__xe_next(op)) {
char *key = template_op_key(op);
if (g_hash_table_lookup(rsc_ops_hash, key) == NULL) {
add_node_copy(rsc_ops, op);
}
free(key);
}
if (rsc_ops_hash) {
g_hash_table_destroy(rsc_ops_hash);
}
free_xml(template_ops);
}
/*free_xml(*expanded_xml); */
*expanded_xml = new_xml;
/* Disable multi-level templates for now */
/*if(unpack_template(new_xml, expanded_xml, data_set) == FALSE) {
free_xml(*expanded_xml);
*expanded_xml = NULL;
return FALSE;
} */
return TRUE;
}
static gboolean
add_template_rsc(xmlNode * xml_obj, pe_working_set_t * data_set)
{
const char *template_ref = NULL;
const char *id = NULL;
if (xml_obj == NULL) {
pe_err("No resource object for processing resource list of template");
return FALSE;
}
template_ref = crm_element_value(xml_obj, XML_CIB_TAG_RSC_TEMPLATE);
if (template_ref == NULL) {
return TRUE;
}
id = ID(xml_obj);
if (id == NULL) {
pe_err("'%s' object must have a id", xml_obj->name);
return FALSE;
}
if (pcmk__str_eq(template_ref, id, pcmk__str_none)) {
pe_err("The resource object '%s' should not reference itself", id);
return FALSE;
}
if (add_tag_ref(data_set->template_rsc_sets, template_ref, id) == FALSE) {
return FALSE;
}
return TRUE;
}
static bool
detect_promotable(pe_resource_t *rsc)
{
const char *promotable = g_hash_table_lookup(rsc->meta,
XML_RSC_ATTR_PROMOTABLE);
if (crm_is_true(promotable)) {
return TRUE;
}
// @COMPAT deprecated since 2.0.0
if (pcmk__xe_is(rsc->xml, PCMK_XE_PROMOTABLE_LEGACY)) {
/* @TODO in some future version, pe_warn_once() here,
* then drop support in even later version
*/
g_hash_table_insert(rsc->meta, strdup(XML_RSC_ATTR_PROMOTABLE),
strdup(XML_BOOLEAN_TRUE));
return TRUE;
}
return FALSE;
}
static void
free_params_table(gpointer data)
{
g_hash_table_destroy((GHashTable *) data);
}
/*!
* \brief Get a table of resource parameters
*
* \param[in,out] rsc Resource to query
* \param[in] node Node for evaluating rules (NULL for defaults)
* \param[in,out] data_set Cluster working set
*
* \return Hash table containing resource parameter names and values
* (or NULL if \p rsc or \p data_set is NULL)
* \note The returned table will be destroyed when the resource is freed, so
* callers should not destroy it.
*/
GHashTable *
pe_rsc_params(pe_resource_t *rsc, const pe_node_t *node,
pe_working_set_t *data_set)
{
GHashTable *params_on_node = NULL;
/* A NULL node is used to request the resource's default parameters
* (not evaluated for node), but we always want something non-NULL
* as a hash table key.
*/
const char *node_name = "";
// Sanity check
if ((rsc == NULL) || (data_set == NULL)) {
return NULL;
}
if ((node != NULL) && (node->details->uname != NULL)) {
node_name = node->details->uname;
}
// Find the parameter table for given node
if (rsc->parameter_cache == NULL) {
rsc->parameter_cache = pcmk__strikey_table(free, free_params_table);
} else {
params_on_node = g_hash_table_lookup(rsc->parameter_cache, node_name);
}
// If none exists yet, create one with parameters evaluated for node
if (params_on_node == NULL) {
params_on_node = pcmk__strkey_table(free, free);
get_rsc_attributes(params_on_node, rsc, node, data_set);
g_hash_table_insert(rsc->parameter_cache, strdup(node_name),
params_on_node);
}
return params_on_node;
}
/*!
* \internal
* \brief Unpack a resource's "requires" meta-attribute
*
* \param[in,out] rsc Resource being unpacked
* \param[in] value Value of "requires" meta-attribute
* \param[in] is_default Whether \p value was selected by default
*/
static void
unpack_requires(pe_resource_t *rsc, const char *value, bool is_default)
{
if (pcmk__str_eq(value, PCMK__VALUE_NOTHING, pcmk__str_casei)) {
} else if (pcmk__str_eq(value, PCMK__VALUE_QUORUM, pcmk__str_casei)) {
pe__set_resource_flags(rsc, pe_rsc_needs_quorum);
} else if (pcmk__str_eq(value, PCMK__VALUE_FENCING, pcmk__str_casei)) {
pe__set_resource_flags(rsc, pe_rsc_needs_fencing);
if (!pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) {
pcmk__config_warn("%s requires fencing but fencing is disabled",
rsc->id);
}
} else if (pcmk__str_eq(value, PCMK__VALUE_UNFENCING, pcmk__str_casei)) {
if (pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)) {
pcmk__config_warn("Resetting \"" XML_RSC_ATTR_REQUIRES "\" for %s "
"to \"" PCMK__VALUE_QUORUM "\" because fencing "
"devices cannot require unfencing", rsc->id);
unpack_requires(rsc, PCMK__VALUE_QUORUM, true);
return;
} else if (!pcmk_is_set(rsc->cluster->flags,
pcmk_sched_fencing_enabled)) {
pcmk__config_warn("Resetting \"" XML_RSC_ATTR_REQUIRES "\" for %s "
"to \"" PCMK__VALUE_QUORUM "\" because fencing "
"is disabled", rsc->id);
unpack_requires(rsc, PCMK__VALUE_QUORUM, true);
return;
} else {
pe__set_resource_flags(rsc,
pe_rsc_needs_fencing|pe_rsc_needs_unfencing);
}
} else {
const char *orig_value = value;
if (pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)) {
value = PCMK__VALUE_QUORUM;
} else if ((rsc->variant == pcmk_rsc_variant_primitive)
&& xml_contains_remote_node(rsc->xml)) {
value = PCMK__VALUE_QUORUM;
} else if (pcmk_is_set(rsc->cluster->flags,
pcmk_sched_enable_unfencing)) {
value = PCMK__VALUE_UNFENCING;
} else if (pcmk_is_set(rsc->cluster->flags,
pcmk_sched_fencing_enabled)) {
value = PCMK__VALUE_FENCING;
} else if (rsc->cluster->no_quorum_policy == pcmk_no_quorum_ignore) {
value = PCMK__VALUE_NOTHING;
} else {
value = PCMK__VALUE_QUORUM;
}
if (orig_value != NULL) {
pcmk__config_err("Resetting '" XML_RSC_ATTR_REQUIRES "' for %s "
"to '%s' because '%s' is not valid",
rsc->id, value, orig_value);
}
unpack_requires(rsc, value, true);
return;
}
pe_rsc_trace(rsc, "\tRequired to start: %s%s", value,
(is_default? " (default)" : ""));
}
#ifndef PCMK__COMPAT_2_0
static void
warn_about_deprecated_classes(pe_resource_t *rsc)
{
const char *std = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if (pcmk__str_eq(std, PCMK_RESOURCE_CLASS_UPSTART, pcmk__str_none)) {
pe_warn_once(pe_wo_upstart,
"Support for Upstart resources (such as %s) is deprecated "
"and will be removed in a future release of Pacemaker",
rsc->id);
} else if (pcmk__str_eq(std, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_none)) {
pe_warn_once(pe_wo_nagios,
"Support for Nagios resources (such as %s) is deprecated "
"and will be removed in a future release of Pacemaker",
rsc->id);
}
}
#endif
/*!
* \internal
* \brief Unpack configuration XML for a given resource
*
* Unpack the XML object containing a resource's configuration into a new
* \c pe_resource_t object.
*
* \param[in] xml_obj XML node containing the resource's configuration
* \param[out] rsc Where to store the unpacked resource information
* \param[in] parent Resource's parent, if any
* \param[in,out] data_set Cluster working set
*
* \return Standard Pacemaker return code
* \note If pcmk_rc_ok is returned, \p *rsc is guaranteed to be non-NULL, and
* the caller is responsible for freeing it using its variant-specific
* free() method. Otherwise, \p *rsc is guaranteed to be NULL.
*/
int
pe__unpack_resource(xmlNode *xml_obj, pe_resource_t **rsc,
pe_resource_t *parent, pe_working_set_t *data_set)
{
xmlNode *expanded_xml = NULL;
xmlNode *ops = NULL;
const char *value = NULL;
const char *id = NULL;
bool guest_node = false;
bool remote_node = false;
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = pcmk_role_unknown,
.now = NULL,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
CRM_CHECK(rsc != NULL, return EINVAL);
CRM_CHECK((xml_obj != NULL) && (data_set != NULL),
*rsc = NULL;
return EINVAL);
rule_data.now = data_set->now;
crm_log_xml_trace(xml_obj, "[raw XML]");
id = crm_element_value(xml_obj, XML_ATTR_ID);
if (id == NULL) {
pe_err("Ignoring <%s> configuration without " XML_ATTR_ID,
xml_obj->name);
return pcmk_rc_unpack_error;
}
if (unpack_template(xml_obj, &expanded_xml, data_set) == FALSE) {
return pcmk_rc_unpack_error;
}
*rsc = calloc(1, sizeof(pe_resource_t));
if (*rsc == NULL) {
crm_crit("Unable to allocate memory for resource '%s'", id);
return ENOMEM;
}
(*rsc)->cluster = data_set;
if (expanded_xml) {
crm_log_xml_trace(expanded_xml, "[expanded XML]");
(*rsc)->xml = expanded_xml;
(*rsc)->orig_xml = xml_obj;
} else {
(*rsc)->xml = xml_obj;
(*rsc)->orig_xml = NULL;
}
/* Do not use xml_obj from here on, use (*rsc)->xml in case templates are involved */
(*rsc)->parent = parent;
ops = find_xml_node((*rsc)->xml, "operations", FALSE);
(*rsc)->ops_xml = expand_idref(ops, data_set->input);
(*rsc)->variant = get_resource_type((const char *) (*rsc)->xml->name);
if ((*rsc)->variant == pcmk_rsc_variant_unknown) {
pe_err("Ignoring resource '%s' of unknown type '%s'",
id, (*rsc)->xml->name);
common_free(*rsc);
*rsc = NULL;
return pcmk_rc_unpack_error;
}
#ifndef PCMK__COMPAT_2_0
warn_about_deprecated_classes(*rsc);
#endif
(*rsc)->meta = pcmk__strkey_table(free, free);
(*rsc)->allowed_nodes = pcmk__strkey_table(NULL, free);
(*rsc)->known_on = pcmk__strkey_table(NULL, free);
value = crm_element_value((*rsc)->xml, XML_RSC_ATTR_INCARNATION);
if (value) {
(*rsc)->id = crm_strdup_printf("%s:%s", id, value);
add_hash_param((*rsc)->meta, XML_RSC_ATTR_INCARNATION, value);
} else {
(*rsc)->id = strdup(id);
}
(*rsc)->fns = &resource_class_functions[(*rsc)->variant];
get_meta_attributes((*rsc)->meta, *rsc, NULL, data_set);
(*rsc)->parameters = pe_rsc_params(*rsc, NULL, data_set); // \deprecated
(*rsc)->flags = 0;
- pe__set_resource_flags(*rsc, pe_rsc_runnable|pe_rsc_provisional);
+ pe__set_resource_flags(*rsc, pe_rsc_runnable|pcmk_rsc_unassigned);
if (!pcmk_is_set(data_set->flags, pcmk_sched_in_maintenance)) {
pe__set_resource_flags(*rsc, pcmk_rsc_managed);
}
(*rsc)->rsc_cons = NULL;
(*rsc)->rsc_tickets = NULL;
(*rsc)->actions = NULL;
(*rsc)->role = pcmk_role_stopped;
(*rsc)->next_role = pcmk_role_unknown;
(*rsc)->recovery_type = pcmk_multiply_active_restart;
(*rsc)->stickiness = 0;
(*rsc)->migration_threshold = INFINITY;
(*rsc)->failure_timeout = 0;
value = g_hash_table_lookup((*rsc)->meta, XML_CIB_ATTR_PRIORITY);
(*rsc)->priority = char2score(value);
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_CRITICAL);
if ((value == NULL) || crm_is_true(value)) {
pe__set_resource_flags(*rsc, pe_rsc_critical);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_NOTIFY);
if (crm_is_true(value)) {
pe__set_resource_flags(*rsc, pcmk_rsc_notify);
}
if (xml_contains_remote_node((*rsc)->xml)) {
(*rsc)->is_remote_node = TRUE;
if (g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_CONTAINER)) {
guest_node = true;
} else {
remote_node = true;
}
}
value = g_hash_table_lookup((*rsc)->meta, XML_OP_ATTR_ALLOW_MIGRATE);
if (crm_is_true(value)) {
pe__set_resource_flags(*rsc, pe_rsc_allow_migrate);
} else if ((value == NULL) && remote_node) {
/* By default, we want remote nodes to be able
* to float around the cluster without having to stop all the
* resources within the remote-node before moving. Allowing
* migration support enables this feature. If this ever causes
* problems, migration support can be explicitly turned off with
* allow-migrate=false.
*/
pe__set_resource_flags(*rsc, pe_rsc_allow_migrate);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MANAGED);
if (value != NULL && !pcmk__str_eq("default", value, pcmk__str_casei)) {
if (crm_is_true(value)) {
pe__set_resource_flags(*rsc, pcmk_rsc_managed);
} else {
pe__clear_resource_flags(*rsc, pcmk_rsc_managed);
}
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MAINTENANCE);
if (crm_is_true(value)) {
pe__clear_resource_flags(*rsc, pcmk_rsc_managed);
pe__set_resource_flags(*rsc, pe_rsc_maintenance);
}
if (pcmk_is_set(data_set->flags, pcmk_sched_in_maintenance)) {
pe__clear_resource_flags(*rsc, pcmk_rsc_managed);
pe__set_resource_flags(*rsc, pe_rsc_maintenance);
}
if (pe_rsc_is_clone(pe__const_top_resource(*rsc, false))) {
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE);
if (crm_is_true(value)) {
pe__set_resource_flags(*rsc, pcmk_rsc_unique);
}
if (detect_promotable(*rsc)) {
pe__set_resource_flags(*rsc, pcmk_rsc_promotable);
}
} else {
pe__set_resource_flags(*rsc, pcmk_rsc_unique);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_RESTART);
if (pcmk__str_eq(value, "restart", pcmk__str_casei)) {
(*rsc)->restart_type = pe_restart_restart;
pe_rsc_trace((*rsc), "%s dependency restart handling: restart",
(*rsc)->id);
pe_warn_once(pe_wo_restart_type,
"Support for restart-type is deprecated and will be removed in a future release");
} else {
(*rsc)->restart_type = pe_restart_ignore;
pe_rsc_trace((*rsc), "%s dependency restart handling: ignore",
(*rsc)->id);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MULTIPLE);
if (pcmk__str_eq(value, "stop_only", pcmk__str_casei)) {
(*rsc)->recovery_type = pcmk_multiply_active_stop;
pe_rsc_trace((*rsc), "%s multiple running resource recovery: stop only",
(*rsc)->id);
} else if (pcmk__str_eq(value, "block", pcmk__str_casei)) {
(*rsc)->recovery_type = pcmk_multiply_active_block;
pe_rsc_trace((*rsc), "%s multiple running resource recovery: block",
(*rsc)->id);
} else if (pcmk__str_eq(value, "stop_unexpected", pcmk__str_casei)) {
(*rsc)->recovery_type = pcmk_multiply_active_unexpected;
pe_rsc_trace((*rsc), "%s multiple running resource recovery: "
"stop unexpected instances",
(*rsc)->id);
} else { // "stop_start"
if (!pcmk__str_eq(value, "stop_start",
pcmk__str_casei|pcmk__str_null_matches)) {
pe_warn("%s is not a valid value for " XML_RSC_ATTR_MULTIPLE
", using default of \"stop_start\"", value);
}
(*rsc)->recovery_type = pcmk_multiply_active_restart;
pe_rsc_trace((*rsc), "%s multiple running resource recovery: "
"stop/start", (*rsc)->id);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_STICKINESS);
if (value != NULL && !pcmk__str_eq("default", value, pcmk__str_casei)) {
(*rsc)->stickiness = char2score(value);
}
value = g_hash_table_lookup((*rsc)->meta, PCMK_META_MIGRATION_THRESHOLD);
if (value != NULL && !pcmk__str_eq("default", value, pcmk__str_casei)) {
(*rsc)->migration_threshold = char2score(value);
if ((*rsc)->migration_threshold < 0) {
/* @TODO We use 1 here to preserve previous behavior, but this
* should probably use the default (INFINITY) or 0 (to disable)
* instead.
*/
pe_warn_once(pe_wo_neg_threshold,
PCMK_META_MIGRATION_THRESHOLD
" must be non-negative, using 1 instead");
(*rsc)->migration_threshold = 1;
}
}
if (pcmk__str_eq(crm_element_value((*rsc)->xml, XML_AGENT_ATTR_CLASS),
PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
pe__set_working_set_flags(data_set, pcmk_sched_have_fencing);
pe__set_resource_flags(*rsc, pcmk_rsc_fence_device);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_REQUIRES);
unpack_requires(*rsc, value, false);
value = g_hash_table_lookup((*rsc)->meta, PCMK_META_FAILURE_TIMEOUT);
if (value != NULL) {
// Stored as seconds
(*rsc)->failure_timeout = (int) (crm_parse_interval_spec(value) / 1000);
}
if (remote_node) {
GHashTable *params = pe_rsc_params(*rsc, NULL, data_set);
/* Grabbing the value now means that any rules based on node attributes
* will evaluate to false, so such rules should not be used with
* reconnect_interval.
*
* @TODO Evaluate per node before using
*/
value = g_hash_table_lookup(params, XML_REMOTE_ATTR_RECONNECT_INTERVAL);
if (value) {
/* reconnect delay works by setting failure_timeout and preventing the
* connection from starting until the failure is cleared. */
(*rsc)->remote_reconnect_ms = crm_parse_interval_spec(value);
/* we want to override any default failure_timeout in use when remote
* reconnect_interval is in use. */
(*rsc)->failure_timeout = (*rsc)->remote_reconnect_ms / 1000;
}
}
get_target_role(*rsc, &((*rsc)->next_role));
pe_rsc_trace((*rsc), "%s desired next state: %s", (*rsc)->id,
(*rsc)->next_role != pcmk_role_unknown? role2text((*rsc)->next_role) : "default");
if ((*rsc)->fns->unpack(*rsc, data_set) == FALSE) {
(*rsc)->fns->free(*rsc);
*rsc = NULL;
return pcmk_rc_unpack_error;
}
if (pcmk_is_set(data_set->flags, pcmk_sched_symmetric_cluster)) {
// This tag must stay exactly the same because it is tested elsewhere
resource_location(*rsc, NULL, 0, "symmetric_default", data_set);
} else if (guest_node) {
/* remote resources tied to a container resource must always be allowed
* to opt-in to the cluster. Whether the connection resource is actually
* allowed to be placed on a node is dependent on the container resource */
resource_location(*rsc, NULL, 0, "remote_connection_default", data_set);
}
pe_rsc_trace((*rsc), "%s action notification: %s", (*rsc)->id,
pcmk_is_set((*rsc)->flags, pcmk_rsc_notify)? "required" : "not required");
(*rsc)->utilization = pcmk__strkey_table(free, free);
pe__unpack_dataset_nvpairs((*rsc)->xml, XML_TAG_UTILIZATION, &rule_data,
(*rsc)->utilization, NULL, FALSE, data_set);
if (expanded_xml) {
if (add_template_rsc(xml_obj, data_set) == FALSE) {
(*rsc)->fns->free(*rsc);
*rsc = NULL;
return pcmk_rc_unpack_error;
}
}
return pcmk_rc_ok;
}
gboolean
is_parent(pe_resource_t *child, pe_resource_t *rsc)
{
pe_resource_t *parent = child;
if (parent == NULL || rsc == NULL) {
return FALSE;
}
while (parent->parent != NULL) {
if (parent->parent == rsc) {
return TRUE;
}
parent = parent->parent;
}
return FALSE;
}
pe_resource_t *
uber_parent(pe_resource_t * rsc)
{
pe_resource_t *parent = rsc;
if (parent == NULL) {
return NULL;
}
while ((parent->parent != NULL)
&& (parent->parent->variant != pcmk_rsc_variant_bundle)) {
parent = parent->parent;
}
return parent;
}
/*!
* \internal
* \brief Get the topmost parent of a resource as a const pointer
*
* \param[in] rsc Resource to check
* \param[in] include_bundle If true, go all the way to bundle
*
* \return \p NULL if \p rsc is NULL, \p rsc if \p rsc has no parent,
* the bundle if \p rsc is bundled and \p include_bundle is true,
* otherwise the topmost parent of \p rsc up to a clone
*/
const pe_resource_t *
pe__const_top_resource(const pe_resource_t *rsc, bool include_bundle)
{
const pe_resource_t *parent = rsc;
if (parent == NULL) {
return NULL;
}
while (parent->parent != NULL) {
if (!include_bundle
&& (parent->parent->variant == pcmk_rsc_variant_bundle)) {
break;
}
parent = parent->parent;
}
return parent;
}
void
common_free(pe_resource_t * rsc)
{
if (rsc == NULL) {
return;
}
pe_rsc_trace(rsc, "Freeing %s %d", rsc->id, rsc->variant);
g_list_free(rsc->rsc_cons);
g_list_free(rsc->rsc_cons_lhs);
g_list_free(rsc->rsc_tickets);
g_list_free(rsc->dangling_migrations);
if (rsc->parameter_cache != NULL) {
g_hash_table_destroy(rsc->parameter_cache);
}
if (rsc->meta != NULL) {
g_hash_table_destroy(rsc->meta);
}
if (rsc->utilization != NULL) {
g_hash_table_destroy(rsc->utilization);
}
if ((rsc->parent == NULL)
&& pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
free_xml(rsc->xml);
rsc->xml = NULL;
free_xml(rsc->orig_xml);
rsc->orig_xml = NULL;
/* if rsc->orig_xml, then rsc->xml is an expanded xml from a template */
} else if (rsc->orig_xml) {
free_xml(rsc->xml);
rsc->xml = NULL;
}
if (rsc->running_on) {
g_list_free(rsc->running_on);
rsc->running_on = NULL;
}
if (rsc->known_on) {
g_hash_table_destroy(rsc->known_on);
rsc->known_on = NULL;
}
if (rsc->actions) {
g_list_free(rsc->actions);
rsc->actions = NULL;
}
if (rsc->allowed_nodes) {
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = NULL;
}
g_list_free(rsc->fillers);
g_list_free(rsc->rsc_location);
pe_rsc_trace(rsc, "Resource freed");
free(rsc->id);
free(rsc->clone_name);
free(rsc->allocated_to);
free(rsc->variant_opaque);
free(rsc->pending_task);
free(rsc);
}
/*!
* \internal
* \brief Count a node and update most preferred to it as appropriate
*
* \param[in] rsc An active resource
* \param[in] node A node that \p rsc is active on
* \param[in,out] active This will be set to \p node if \p node is more
* preferred than the current value
* \param[in,out] count_all If not NULL, this will be incremented
* \param[in,out] count_clean If not NULL, this will be incremented if \p node
* is online and clean
*
* \return true if the count should continue, or false if sufficiently known
*/
bool
pe__count_active_node(const pe_resource_t *rsc, pe_node_t *node,
pe_node_t **active, unsigned int *count_all,
unsigned int *count_clean)
{
bool keep_looking = false;
bool is_happy = false;
CRM_CHECK((rsc != NULL) && (node != NULL) && (active != NULL),
return false);
is_happy = node->details->online && !node->details->unclean;
if (count_all != NULL) {
++*count_all;
}
if ((count_clean != NULL) && is_happy) {
++*count_clean;
}
if ((count_all != NULL) || (count_clean != NULL)) {
keep_looking = true; // We're counting, so go through entire list
}
if (rsc->partial_migration_source != NULL) {
if (node->details == rsc->partial_migration_source->details) {
*active = node; // This is the migration source
} else {
keep_looking = true;
}
} else if (!pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) {
if (is_happy && ((*active == NULL) || !(*active)->details->online
|| (*active)->details->unclean)) {
*active = node; // This is the first clean node
} else {
keep_looking = true;
}
}
if (*active == NULL) {
*active = node; // This is the first node checked
}
return keep_looking;
}
// Shared implementation of resource_object_functions_t:active_node()
static pe_node_t *
active_node(const pe_resource_t *rsc, unsigned int *count_all,
unsigned int *count_clean)
{
pe_node_t *active = NULL;
if (count_all != NULL) {
*count_all = 0;
}
if (count_clean != NULL) {
*count_clean = 0;
}
if (rsc == NULL) {
return NULL;
}
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
if (!pe__count_active_node(rsc, (pe_node_t *) iter->data, &active,
count_all, count_clean)) {
break; // Don't waste time iterating if we don't have to
}
}
return active;
}
/*!
* \brief
* \internal Find and count active nodes according to "requires"
*
* \param[in] rsc Resource to check
* \param[out] count If not NULL, will be set to count of active nodes
*
* \return An active node (or NULL if resource is not active anywhere)
*
* \note This is a convenience wrapper for active_node() where the count of all
* active nodes or only clean active nodes is desired according to the
* "requires" meta-attribute.
*/
pe_node_t *
pe__find_active_requires(const pe_resource_t *rsc, unsigned int *count)
{
if (rsc == NULL) {
if (count != NULL) {
*count = 0;
}
return NULL;
} else if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) {
return rsc->fns->active_node(rsc, count, NULL);
} else {
return rsc->fns->active_node(rsc, NULL, count);
}
}
void
pe__count_common(pe_resource_t *rsc)
{
if (rsc->children != NULL) {
for (GList *item = rsc->children; item != NULL; item = item->next) {
((pe_resource_t *) item->data)->fns->count(item->data);
}
} else if (!pcmk_is_set(rsc->flags, pcmk_rsc_removed)
|| (rsc->role > pcmk_role_stopped)) {
rsc->cluster->ninstances++;
if (pe__resource_is_disabled(rsc)) {
rsc->cluster->disabled_resources++;
}
if (pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
rsc->cluster->blocked_resources++;
}
}
}
/*!
* \internal
* \brief Update a resource's next role
*
* \param[in,out] rsc Resource to be updated
* \param[in] role Resource's new next role
* \param[in] why Human-friendly reason why role is changing (for logs)
*/
void
pe__set_next_role(pe_resource_t *rsc, enum rsc_role_e role, const char *why)
{
CRM_ASSERT((rsc != NULL) && (why != NULL));
if (rsc->next_role != role) {
pe_rsc_trace(rsc, "Resetting next role for %s from %s to %s (%s)",
rsc->id, role2text(rsc->next_role), role2text(role), why);
rsc->next_role = role;
}
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Oct 29, 8:55 PM (1 d, 16 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
942631
Default Alt Text
(438 KB)

Event Timeline