Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h
index fe9ea4122c..d0f8322ee3 100644
--- a/include/crm/pengine/internal.h
+++ b/include/crm/pengine/internal.h
@@ -1,601 +1,615 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PE_INTERNAL__H
# define PE_INTERNAL__H
+
# include <stdint.h>
# include <string.h>
# include <crm/pengine/status.h>
# include <crm/pengine/remote_internal.h>
# include <crm/common/internal.h>
# include <crm/common/options_internal.h>
# include <crm/common/output_internal.h>
+enum pe__clone_flags {
+ // Whether instances should be started sequentially
+ pe__clone_ordered = (1 << 0),
+
+ // Whether promotion scores have been added
+ pe__clone_promotion_added = (1 << 1),
+
+ // Whether promotion constraints have been added
+ pe__clone_promotion_constrained = (1 << 2),
+};
+
+bool pe__clone_is_ordered(pe_resource_t *clone);
+int pe__set_clone_flag(pe_resource_t *clone, enum pe__clone_flags flag);
+
+
# define pe_rsc_info(rsc, fmt, args...) crm_log_tag(LOG_INFO, rsc ? rsc->id : "<NULL>", fmt, ##args)
# define pe_rsc_debug(rsc, fmt, args...) crm_log_tag(LOG_DEBUG, rsc ? rsc->id : "<NULL>", fmt, ##args)
# define pe_rsc_trace(rsc, fmt, args...) crm_log_tag(LOG_TRACE, rsc ? rsc->id : "<NULL>", fmt, ##args)
# define pe_err(fmt...) do { \
was_processing_error = TRUE; \
pcmk__config_err(fmt); \
} while (0)
# define pe_warn(fmt...) do { \
was_processing_warning = TRUE; \
pcmk__config_warn(fmt); \
} while (0)
# define pe_proc_err(fmt...) { was_processing_error = TRUE; crm_err(fmt); }
# define pe_proc_warn(fmt...) { was_processing_warning = TRUE; crm_warn(fmt); }
#define pe__set_working_set_flags(working_set, flags_to_set) do { \
(working_set)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Working set", crm_system_name, \
(working_set)->flags, (flags_to_set), #flags_to_set); \
} while (0)
#define pe__clear_working_set_flags(working_set, flags_to_clear) do { \
(working_set)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Working set", crm_system_name, \
(working_set)->flags, (flags_to_clear), #flags_to_clear); \
} while (0)
#define pe__set_resource_flags(resource, flags_to_set) do { \
(resource)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Resource", (resource)->id, (resource)->flags, \
(flags_to_set), #flags_to_set); \
} while (0)
#define pe__clear_resource_flags(resource, flags_to_clear) do { \
(resource)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Resource", (resource)->id, (resource)->flags, \
(flags_to_clear), #flags_to_clear); \
} while (0)
#define pe__set_action_flags(action, flags_to_set) do { \
(action)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Action", (action)->uuid, \
(action)->flags, \
(flags_to_set), \
#flags_to_set); \
} while (0)
#define pe__clear_action_flags(action, flags_to_clear) do { \
(action)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Action", (action)->uuid, \
(action)->flags, \
(flags_to_clear), \
#flags_to_clear); \
} while (0)
#define pe__set_raw_action_flags(action_flags, action_name, flags_to_set) do { \
action_flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Action", action_name, \
(action_flags), \
(flags_to_set), #flags_to_set); \
} while (0)
#define pe__clear_raw_action_flags(action_flags, action_name, flags_to_clear) do { \
action_flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Action", action_name, \
(action_flags), \
(flags_to_clear), \
#flags_to_clear); \
} while (0)
#define pe__set_action_flags_as(function, line, action, flags_to_set) do { \
(action)->flags = pcmk__set_flags_as((function), (line), \
LOG_TRACE, \
"Action", (action)->uuid, \
(action)->flags, \
(flags_to_set), \
#flags_to_set); \
} while (0)
#define pe__clear_action_flags_as(function, line, action, flags_to_clear) do { \
(action)->flags = pcmk__clear_flags_as((function), (line), \
LOG_TRACE, \
"Action", (action)->uuid, \
(action)->flags, \
(flags_to_clear), \
#flags_to_clear); \
} while (0)
#define pe__set_order_flags(order_flags, flags_to_set) do { \
order_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Ordering", "constraint", \
order_flags, (flags_to_set), \
#flags_to_set); \
} while (0)
#define pe__clear_order_flags(order_flags, flags_to_clear) do { \
order_flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Ordering", "constraint", \
order_flags, (flags_to_clear), \
#flags_to_clear); \
} while (0)
#define pe__set_graph_flags(graph_flags, gr_action, flags_to_set) do { \
graph_flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Graph", \
(gr_action)->uuid, graph_flags, \
(flags_to_set), #flags_to_set); \
} while (0)
#define pe__clear_graph_flags(graph_flags, gr_action, flags_to_clear) do { \
graph_flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Graph", \
(gr_action)->uuid, graph_flags, \
(flags_to_clear), #flags_to_clear); \
} while (0)
// Some warnings we don't want to print every transition
enum pe_warn_once_e {
pe_wo_blind = (1 << 0),
pe_wo_restart_type = (1 << 1),
pe_wo_role_after = (1 << 2),
pe_wo_poweroff = (1 << 3),
pe_wo_require_all = (1 << 4),
pe_wo_order_score = (1 << 5),
pe_wo_neg_threshold = (1 << 6),
pe_wo_remove_after = (1 << 7),
pe_wo_ping_node = (1 << 8),
};
extern uint32_t pe_wo;
#define pe_warn_once(pe_wo_bit, fmt...) do { \
if (!pcmk_is_set(pe_wo, pe_wo_bit)) { \
if (pe_wo_bit == pe_wo_blind) { \
crm_warn(fmt); \
} else { \
pe_warn(fmt); \
} \
pe_wo = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Warn-once", "logging", pe_wo, \
(pe_wo_bit), #pe_wo_bit); \
} \
} while (0);
typedef struct pe__location_constraint_s {
char *id; // Constraint XML ID
pe_resource_t *rsc_lh; // Resource being located
enum rsc_role_e role_filter; // Role to locate
enum pe_discover_e discover_mode; // Resource discovery
GList *node_list_rh; // List of pe_node_t*
} pe__location_t;
typedef struct pe__order_constraint_s {
int id;
enum pe_ordering type;
void *lh_opaque;
pe_resource_t *lh_rsc;
pe_action_t *lh_action;
char *lh_action_task;
void *rh_opaque;
pe_resource_t *rh_rsc;
pe_action_t *rh_action;
char *rh_action_task;
} pe__ordering_t;
typedef struct notify_data_s {
GSList *keys; // Environment variable name/value pairs
const char *action;
pe_action_t *pre;
pe_action_t *post;
pe_action_t *pre_done;
pe_action_t *post_done;
GList *active; /* notify_entry_t* */
GList *inactive; /* notify_entry_t* */
GList *start; /* notify_entry_t* */
GList *stop; /* notify_entry_t* */
GList *demote; /* notify_entry_t* */
GList *promote; /* notify_entry_t* */
GList *promoted; /* notify_entry_t* */
GList *unpromoted; /* notify_entry_t* */
GHashTable *allowed_nodes;
} notify_data_t;
bool pe_can_fence(pe_working_set_t *data_set, pe_node_t *node);
void add_hash_param(GHashTable * hash, const char *name, const char *value);
char *native_parameter(pe_resource_t * rsc, pe_node_t * node, gboolean create, const char *name,
pe_working_set_t * data_set);
pe_node_t *native_location(const pe_resource_t *rsc, GList **list, int current);
void pe_metadata(pcmk__output_t *out);
void verify_pe_options(GHashTable * options);
void common_update_score(pe_resource_t * rsc, const char *id, int score);
void native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set, gboolean failed);
gboolean native_unpack(pe_resource_t * rsc, pe_working_set_t * data_set);
gboolean group_unpack(pe_resource_t * rsc, pe_working_set_t * data_set);
gboolean clone_unpack(pe_resource_t * rsc, pe_working_set_t * data_set);
gboolean pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set);
pe_resource_t *native_find_rsc(pe_resource_t *rsc, const char *id, const pe_node_t *node,
int flags);
gboolean native_active(pe_resource_t * rsc, gboolean all);
gboolean group_active(pe_resource_t * rsc, gboolean all);
gboolean clone_active(pe_resource_t * rsc, gboolean all);
gboolean pe__bundle_active(pe_resource_t *rsc, gboolean all);
void native_print(pe_resource_t * rsc, const char *pre_text, long options, void *print_data);
void group_print(pe_resource_t * rsc, const char *pre_text, long options, void *print_data);
void clone_print(pe_resource_t * rsc, const char *pre_text, long options, void *print_data);
void pe__print_bundle(pe_resource_t *rsc, const char *pre_text, long options,
void *print_data);
gchar * pcmk__native_output_string(pe_resource_t *rsc, const char *name, pe_node_t *node,
uint32_t show_opts, const char *target_role, bool show_nodes);
int pe__name_and_nvpairs_xml(pcmk__output_t *out, bool is_list, const char *tag_name
, size_t pairs_count, ...);
char *pe__node_display_name(pe_node_t *node, bool print_detail);
static inline const char *
pe__rsc_bool_str(pe_resource_t *rsc, uint64_t rsc_flag)
{
return pcmk__btoa(pcmk_is_set(rsc->flags, rsc_flag));
}
int pe__clone_xml(pcmk__output_t *out, va_list args);
int pe__clone_default(pcmk__output_t *out, va_list args);
int pe__group_xml(pcmk__output_t *out, va_list args);
int pe__group_default(pcmk__output_t *out, va_list args);
int pe__bundle_xml(pcmk__output_t *out, va_list args);
int pe__bundle_html(pcmk__output_t *out, va_list args);
int pe__bundle_text(pcmk__output_t *out, va_list args);
int pe__node_html(pcmk__output_t *out, va_list args);
int pe__node_text(pcmk__output_t *out, va_list args);
int pe__node_xml(pcmk__output_t *out, va_list args);
int pe__resource_xml(pcmk__output_t *out, va_list args);
int pe__resource_html(pcmk__output_t *out, va_list args);
int pe__resource_text(pcmk__output_t *out, va_list args);
void native_free(pe_resource_t * rsc);
void group_free(pe_resource_t * rsc);
void clone_free(pe_resource_t * rsc);
void pe__free_bundle(pe_resource_t *rsc);
enum rsc_role_e native_resource_state(const pe_resource_t * rsc, gboolean current);
enum rsc_role_e group_resource_state(const pe_resource_t * rsc, gboolean current);
enum rsc_role_e clone_resource_state(const pe_resource_t * rsc, gboolean current);
enum rsc_role_e pe__bundle_resource_state(const pe_resource_t *rsc,
gboolean current);
void pe__count_common(pe_resource_t *rsc);
void pe__count_bundle(pe_resource_t *rsc);
gboolean common_unpack(xmlNode * xml_obj, pe_resource_t ** rsc, pe_resource_t * parent,
pe_working_set_t * data_set);
void common_free(pe_resource_t * rsc);
pe_node_t *pe__copy_node(const pe_node_t *this_node);
extern time_t get_effective_time(pe_working_set_t * data_set);
/* Failure handling utilities (from failcounts.c) */
// bit flags for fail count handling options
enum pe_fc_flags_e {
pe_fc_default = (1 << 0),
pe_fc_effective = (1 << 1), // don't count expired failures
pe_fc_fillers = (1 << 2), // if container, include filler failures in count
};
int pe_get_failcount(pe_node_t *node, pe_resource_t *rsc, time_t *last_failure,
uint32_t flags, xmlNode *xml_op,
pe_working_set_t *data_set);
pe_action_t *pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node,
const char *reason,
pe_working_set_t *data_set);
/* Functions for finding/counting a resource's active nodes */
pe_node_t *pe__find_active_on(const pe_resource_t *rsc,
unsigned int *count_all,
unsigned int *count_clean);
pe_node_t *pe__find_active_requires(const pe_resource_t *rsc,
unsigned int *count);
static inline pe_node_t *
pe__current_node(const pe_resource_t *rsc)
{
return pe__find_active_on(rsc, NULL, NULL);
}
/* Binary like operators for lists of nodes */
extern void node_list_exclude(GHashTable * list, GList *list2, gboolean merge_scores);
GHashTable *pe__node_list2table(GList *list);
static inline gpointer
pe_hash_table_lookup(GHashTable * hash, gconstpointer key)
{
if (hash) {
return g_hash_table_lookup(hash, key);
}
return NULL;
}
extern pe_action_t *get_pseudo_op(const char *name, pe_working_set_t * data_set);
extern gboolean order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order);
/* Printing functions for debug */
extern void print_str_str(gpointer key, gpointer value, gpointer user_data);
extern void pe__output_node(pe_node_t * node, gboolean details, pcmk__output_t *out);
void pe__show_node_weights_as(const char *file, const char *function,
int line, bool to_log, pe_resource_t *rsc,
const char *comment, GHashTable *nodes,
pe_working_set_t *data_set);
#define pe__show_node_weights(level, rsc, text, nodes, data_set) \
pe__show_node_weights_as(__FILE__, __func__, __LINE__, \
(level), (rsc), (text), (nodes), (data_set))
-/* Sorting functions */
extern gint sort_rsc_priority(gconstpointer a, gconstpointer b);
-extern gint sort_rsc_index(gconstpointer a, gconstpointer b);
extern xmlNode *find_rsc_op_entry(pe_resource_t * rsc, const char *key);
extern pe_action_t *custom_action(pe_resource_t * rsc, char *key, const char *task, pe_node_t * on_node,
gboolean optional, gboolean foo, pe_working_set_t * data_set);
# define delete_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DELETE, 0)
# define delete_action(rsc, node, optional) custom_action( \
rsc, delete_key(rsc), CRMD_ACTION_DELETE, node, \
optional, TRUE, data_set);
# define stopped_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STOPPED, 0)
# define stopped_action(rsc, node, optional) custom_action( \
rsc, stopped_key(rsc), CRMD_ACTION_STOPPED, node, \
optional, TRUE, data_set);
# define stop_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STOP, 0)
# define stop_action(rsc, node, optional) custom_action( \
rsc, stop_key(rsc), CRMD_ACTION_STOP, node, \
optional, TRUE, data_set);
# define reload_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_RELOAD_AGENT, 0)
# define start_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_START, 0)
# define start_action(rsc, node, optional) custom_action( \
rsc, start_key(rsc), CRMD_ACTION_START, node, \
optional, TRUE, data_set)
# define started_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STARTED, 0)
# define started_action(rsc, node, optional) custom_action( \
rsc, started_key(rsc), CRMD_ACTION_STARTED, node, \
optional, TRUE, data_set)
# define promote_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_PROMOTE, 0)
# define promote_action(rsc, node, optional) custom_action( \
rsc, promote_key(rsc), CRMD_ACTION_PROMOTE, node, \
optional, TRUE, data_set)
# define promoted_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_PROMOTED, 0)
# define promoted_action(rsc, node, optional) custom_action( \
rsc, promoted_key(rsc), CRMD_ACTION_PROMOTED, node, \
optional, TRUE, data_set)
# define demote_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DEMOTE, 0)
# define demote_action(rsc, node, optional) custom_action( \
rsc, demote_key(rsc), CRMD_ACTION_DEMOTE, node, \
optional, TRUE, data_set)
# define demoted_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DEMOTED, 0)
# define demoted_action(rsc, node, optional) custom_action( \
rsc, demoted_key(rsc), CRMD_ACTION_DEMOTED, node, \
optional, TRUE, data_set)
extern int pe_get_configured_timeout(pe_resource_t *rsc, const char *action,
pe_working_set_t *data_set);
extern pe_action_t *find_first_action(GList *input, const char *uuid, const char *task,
pe_node_t * on_node);
extern enum action_tasks get_complex_task(pe_resource_t * rsc, const char *name,
gboolean allow_non_atomic);
extern GList *find_actions(GList *input, const char *key, const pe_node_t *on_node);
GList *find_actions_exact(GList *input, const char *key,
const pe_node_t *on_node);
extern GList *find_recurring_actions(GList *input, pe_node_t * not_on_node);
GList *pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node,
const char *task, bool require_node);
extern void pe_free_action(pe_action_t * action);
extern void resource_location(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag,
pe_working_set_t * data_set);
extern int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b,
bool same_node_default);
extern gint sort_op_by_callid(gconstpointer a, gconstpointer b);
extern gboolean get_target_role(pe_resource_t * rsc, enum rsc_role_e *role);
void pe__set_next_role(pe_resource_t *rsc, enum rsc_role_e role,
const char *why);
extern pe_resource_t *find_clone_instance(pe_resource_t * rsc, const char *sub_id,
pe_working_set_t * data_set);
extern void destroy_ticket(gpointer data);
extern pe_ticket_t *ticket_new(const char *ticket_id, pe_working_set_t * data_set);
// Resources for manipulating resource names
const char *pe_base_name_end(const char *id);
char *clone_strip(const char *last_rsc_id);
char *clone_zero(const char *last_rsc_id);
static inline bool
pe_base_name_eq(pe_resource_t *rsc, const char *id)
{
if (id && rsc && rsc->id) {
// Number of characters in rsc->id before any clone suffix
size_t base_len = pe_base_name_end(rsc->id) - rsc->id + 1;
return (strlen(id) == base_len) && !strncmp(id, rsc->id, base_len);
}
return false;
}
int pe__target_rc_from_xml(xmlNode *xml_op);
gint sort_node_uname(gconstpointer a, gconstpointer b);
bool is_set_recursive(pe_resource_t * rsc, long long flag, bool any);
enum rsc_digest_cmp_val {
/*! Digests are the same */
RSC_DIGEST_MATCH = 0,
/*! Params that require a restart changed */
RSC_DIGEST_RESTART,
/*! Some parameter changed. */
RSC_DIGEST_ALL,
/*! rsc op didn't have a digest associated with it, so
* it is unknown if parameters changed or not. */
RSC_DIGEST_UNKNOWN,
};
typedef struct op_digest_cache_s {
enum rsc_digest_cmp_val rc;
xmlNode *params_all;
xmlNode *params_secure;
xmlNode *params_restart;
char *digest_all_calc;
char *digest_secure_calc;
char *digest_restart_calc;
} op_digest_cache_t;
op_digest_cache_t *pe__calculate_digests(pe_resource_t *rsc, const char *task,
guint *interval_ms, pe_node_t *node,
xmlNode *xml_op, GHashTable *overrides,
bool calc_secure,
pe_working_set_t *data_set);
void pe__free_digests(gpointer ptr);
op_digest_cache_t *rsc_action_digest_cmp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * node,
pe_working_set_t * data_set);
pe_action_t *pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t * data_set);
void trigger_unfencing(
pe_resource_t * rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t * data_set);
char *pe__action2reason(pe_action_t *action, enum pe_action_flags flag);
void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite);
void pe__set_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags);
void pe__clear_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags);
void pe__clear_resource_flags_on_all(pe_working_set_t *data_set, uint64_t flag);
gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref);
void print_rscs_brief(GList *rsc_list, const char * pre_text, long options,
void * print_data, gboolean print_all);
int pe__rscs_brief_output(pcmk__output_t *out, GList *rsc_list, unsigned int options);
void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason, bool priority_delay);
pe_node_t *pe_create_node(const char *id, const char *uname, const char *type,
const char *score, pe_working_set_t * data_set);
void common_print(pe_resource_t * rsc, const char *pre_text, const char *name, pe_node_t *node, long options, void *print_data);
int pe__common_output_text(pcmk__output_t *out, pe_resource_t * rsc, const char *name, pe_node_t *node, unsigned int options);
int pe__common_output_html(pcmk__output_t *out, pe_resource_t * rsc, const char *name, pe_node_t *node, unsigned int options);
pe_resource_t *pe__find_bundle_replica(const pe_resource_t *bundle,
const pe_node_t *node);
bool pe__bundle_needs_remote_name(pe_resource_t *rsc,
pe_working_set_t *data_set);
const char *pe__add_bundle_remote_name(pe_resource_t *rsc,
pe_working_set_t *data_set,
xmlNode *xml, const char *field);
const char *pe_node_attribute_calculated(const pe_node_t *node,
const char *name,
const pe_resource_t *rsc);
const char *pe_node_attribute_raw(pe_node_t *node, const char *name);
bool pe__is_universal_clone(pe_resource_t *rsc,
pe_working_set_t *data_set);
void pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node,
enum pe_check_parameters, pe_working_set_t *data_set);
void pe__foreach_param_check(pe_working_set_t *data_set,
void (*cb)(pe_resource_t*, pe_node_t*, xmlNode*,
enum pe_check_parameters,
pe_working_set_t*));
void pe__free_param_checks(pe_working_set_t *data_set);
bool pe__shutdown_requested(pe_node_t *node);
void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set);
/*!
* \internal
* \brief Register xml formatting message functions.
*/
void pe__register_messages(pcmk__output_t *out);
void pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name,
pe_rule_eval_data_t *rule_data, GHashTable *hash,
const char *always_first, gboolean overwrite,
pe_working_set_t *data_set);
bool pe__resource_is_disabled(pe_resource_t *rsc);
pe_action_t *pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node,
pe_working_set_t *data_set);
GList *pe__rscs_with_tag(pe_working_set_t *data_set, const char *tag_name);
GList *pe__unames_with_tag(pe_working_set_t *data_set, const char *tag_name);
bool pe__rsc_has_tag(pe_working_set_t *data_set, const char *rsc, const char *tag);
bool pe__uname_has_tag(pe_working_set_t *data_set, const char *node, const char *tag);
bool pe__rsc_running_on_any(pe_resource_t *rsc, GList *node_list);
GList *pe__filter_rsc_list(GList *rscs, GList *filter);
GList * pe__build_node_name_list(pe_working_set_t *data_set, const char *s);
GList * pe__build_rsc_list(pe_working_set_t *data_set, const char *s);
bool pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GList *only_node);
gboolean pe__bundle_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent);
gboolean pe__clone_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent);
gboolean pe__group_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent);
gboolean pe__native_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent);
xmlNode *pe__failed_probe_for_rsc(pe_resource_t *rsc, const char *name);
const char *pe__clone_child_id(pe_resource_t *rsc);
int pe__sum_node_health_scores(const pe_node_t *node, int base_health);
int pe__node_health(pe_node_t *node);
static inline enum pcmk__health_strategy
pe__health_strategy(pe_working_set_t *data_set)
{
return pcmk__parse_health_strategy(pe_pref(data_set->config_hash,
PCMK__OPT_NODE_HEALTH_STRATEGY));
}
static inline int
pe__health_score(const char *option, pe_working_set_t *data_set)
{
return char2score(pe_pref(data_set->config_hash, option));
}
#endif
diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h
index f959fbc58b..3585703070 100644
--- a/lib/pacemaker/libpacemaker_private.h
+++ b/lib/pacemaker/libpacemaker_private.h
@@ -1,429 +1,430 @@
/*
* Copyright 2021-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__LIBPACEMAKER_PRIVATE__H
# define PCMK__LIBPACEMAKER_PRIVATE__H
/* This header is for the sole use of libpacemaker, so that functions can be
* declared with G_GNUC_INTERNAL for efficiency.
*/
#include <crm/pengine/pe_types.h> // pe_action_t, pe_node_t, pe_working_set_t
// Actions (pcmk_sched_actions.c)
G_GNUC_INTERNAL
void pcmk__update_action_for_orderings(pe_action_t *action,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__log_action(const char *pre_text, pe_action_t *action, bool details);
G_GNUC_INTERNAL
pe_action_t *pcmk__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task,
bool optional, bool runnable);
G_GNUC_INTERNAL
pe_action_t *pcmk__new_cancel_action(pe_resource_t *rsc, const char *name,
guint interval_ms, pe_node_t *node);
G_GNUC_INTERNAL
pe_action_t *pcmk__new_shutdown_action(pe_node_t *node);
G_GNUC_INTERNAL
bool pcmk__action_locks_rsc_to_node(const pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__deduplicate_action_inputs(pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__output_actions(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node,
xmlNode *xml_op);
G_GNUC_INTERNAL
void pcmk__handle_rsc_config_changes(pe_working_set_t *data_set);
// Producing transition graphs (pcmk_graph_producer.c)
G_GNUC_INTERNAL
bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action,
pe_action_wrapper_t *input);
G_GNUC_INTERNAL
void pcmk__add_action_to_graph(pe_action_t *action, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__create_graph(pe_working_set_t *data_set);
// Fencing (pcmk_sched_fencing.c)
G_GNUC_INTERNAL
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *action, enum pe_ordering order,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__fence_guest(pe_node_t *node);
G_GNUC_INTERNAL
bool pcmk__node_unfenced(pe_node_t *node);
// Injected scheduler inputs (pcmk_sched_injections.c)
void pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib,
pcmk_injections_t *injections);
// Constraints of any type (pcmk_sched_constraints.c)
G_GNUC_INTERNAL
pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id);
G_GNUC_INTERNAL
xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__valid_resource_or_tag(pe_working_set_t *data_set, const char *id,
pe_resource_t **rsc, pe_tag_t **tag);
G_GNUC_INTERNAL
bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr,
bool convert_rsc, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__create_internal_constraints(pe_working_set_t *data_set);
// Location constraints
G_GNUC_INTERNAL
void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc,
int node_weight, const char *discover_mode,
pe_node_t *foo_node,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_locations(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_location(pe__location_t *constraint, pe_resource_t *rsc);
// Colocation constraints
enum pcmk__coloc_affects {
pcmk__coloc_affects_nothing = 0,
pcmk__coloc_affects_location,
pcmk__coloc_affects_role,
};
G_GNUC_INTERNAL
enum pcmk__coloc_affects pcmk__colocation_affects(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint,
bool preview);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_weights(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_priority(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint);
G_GNUC_INTERNAL
void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__new_colocation(const char *id, const char *node_attr, int score,
pe_resource_t *dependent, pe_resource_t *primary,
const char *dependent_role, const char *primary_role,
bool influence, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__block_colocated_starts(pe_action_t *action,
pe_working_set_t *data_set);
/*!
* \internal
* \brief Check whether colocation's left-hand preferences should be considered
*
* \param[in] colocation Colocation constraint
* \param[in] rsc Right-hand instance (normally this will be
* colocation->primary, which NULL will be treated as,
* but for clones or bundles with multiple instances
* this can be a particular instance)
*
* \return true if colocation influence should be effective, otherwise false
*/
static inline bool
pcmk__colocation_has_influence(const pcmk__colocation_t *colocation,
const pe_resource_t *rsc)
{
if (rsc == NULL) {
rsc = colocation->primary;
}
/* The left hand of a colocation influences the right hand's location
* if the influence option is true, or the right hand is not yet active.
*/
return colocation->influence || (rsc->running_on == NULL);
}
// Ordering constraints (pcmk_sched_ordering.c)
G_GNUC_INTERNAL
void pcmk__new_ordering(pe_resource_t *lh_rsc, char *lh_task,
pe_action_t *lh_action, pe_resource_t *rh_rsc,
char *rh_task, pe_action_t *rh_action,
enum pe_ordering type, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__disable_invalid_orderings(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_stops_before_shutdown(pe_node_t *node,
pe_action_t *shutdown_op,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_orderings(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_after_each(pe_action_t *after, GList *list);
/*!
* \internal
* \brief Create a new ordering between two resource actions
*
* \param[in] lh_rsc Resource for 'first' action
* \param[in] rh_rsc Resource for 'then' action
* \param[in] lh_task Action key for 'first' action
* \param[in] rh_task Action key for 'then' action
* \param[in] flags Bitmask of enum pe_ordering flags
* \param[in] data_set Cluster working set to add ordering to
*/
#define pcmk__order_resource_actions(lh_rsc, lh_task, rh_rsc, rh_task, \
flags, data_set) \
pcmk__new_ordering((lh_rsc), pcmk__op_key((lh_rsc)->id, (lh_task), 0), \
NULL, \
(rh_rsc), pcmk__op_key((rh_rsc)->id, (rh_task), 0), \
NULL, (flags), (data_set))
#define pcmk__order_starts(rsc1, rsc2, type, data_set) \
pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \
(rsc2), CRMD_ACTION_START, (type), (data_set))
#define pcmk__order_stops(rsc1, rsc2, type, data_set) \
pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \
(rsc2), CRMD_ACTION_STOP, (type), (data_set))
G_GNUC_INTERNAL
void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__require_promotion_tickets(pe_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__is_failed_remote_node(pe_node_t *node);
G_GNUC_INTERNAL
void pcmk__order_remote_connection_actions(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__rsc_corresponds_to_guest(pe_resource_t *rsc, pe_node_t *node);
G_GNUC_INTERNAL
pe_node_t *pcmk__connection_host_for_action(pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params);
G_GNUC_INTERNAL
void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, pe_action_t *action);
// Groups (pcmk_sched_group.c)
G_GNUC_INTERNAL
GList *pcmk__group_colocated_resources(pe_resource_t *rsc,
pe_resource_t *orig_rsc,
GList *colocated_rscs);
// Bundles (pcmk_sched_bundle.c)
G_GNUC_INTERNAL
void pcmk__output_bundle_actions(pe_resource_t *rsc);
// Injections (pcmk_injections.c)
G_GNUC_INTERNAL
xmlNode *pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_node_state_change(cib_t *cib_conn, const char *node,
bool up);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node,
const char *resource,
const char *lrm_name,
const char *rclass,
const char *rtype,
const char *rprovider);
G_GNUC_INTERNAL
void pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node,
const char *resource, const char *task,
guint interval_ms, int rc);
G_GNUC_INTERNAL
xmlNode *pcmk__inject_action_result(xmlNode *cib_resource,
lrmd_event_data_t *op, int target_rc);
// Nodes (pcmk_sched_nodes.c)
G_GNUC_INTERNAL
-bool pcmk__node_available(const pe_node_t *node);
+bool pcmk__node_available(const pe_node_t *node, bool consider_score,
+ bool consider_guest);
G_GNUC_INTERNAL
bool pcmk__any_node_available(GHashTable *nodes);
G_GNUC_INTERNAL
GHashTable *pcmk__copy_node_table(GHashTable *nodes);
G_GNUC_INTERNAL
GList *pcmk__sort_nodes(GList *nodes, pe_node_t *active_node,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_node_health(pe_working_set_t *data_set);
G_GNUC_INTERNAL
pe_node_t *pcmk__top_allowed_node(const pe_resource_t *rsc,
const pe_node_t *node);
// Clone notifictions (pcmk_sched_notif.c)
G_GNUC_INTERNAL
void pcmk__create_notifications(pe_resource_t *rsc, notify_data_t *n_data);
G_GNUC_INTERNAL
notify_data_t *pcmk__clone_notif_pseudo_ops(pe_resource_t *rsc,
const char *task,
pe_action_t *action,
pe_action_t *complete);
G_GNUC_INTERNAL
void pcmk__free_notification_data(notify_data_t *n_data);
G_GNUC_INTERNAL
void pcmk__order_notifs_after_fencing(pe_action_t *action, pe_resource_t *rsc,
pe_action_t *stonith_op);
// Functions applying to more than one variant (pcmk_sched_resource.c)
G_GNUC_INTERNAL
void pcmk__set_allocation_methods(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node,
const xmlNode *rsc_entry, bool active_on_node);
G_GNUC_INTERNAL
GList *pcmk__rscs_matching_id(const char *id, pe_working_set_t *data_set);
G_GNUC_INTERNAL
GList *pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs);
G_GNUC_INTERNAL
void pcmk__output_resource_actions(pe_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force);
G_GNUC_INTERNAL
bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force);
G_GNUC_INTERNAL
void pcmk__unassign_resource(pe_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node,
pe_resource_t **failed);
G_GNUC_INTERNAL
void pcmk__sort_resources(pe_working_set_t *data_set);
G_GNUC_INTERNAL
gint pcmk__cmp_instance(gconstpointer a, gconstpointer b);
G_GNUC_INTERNAL
gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b);
// Functions related to probes (pcmk_sched_probes.c)
G_GNUC_INTERNAL
void pcmk__order_probes(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__schedule_probes(pe_working_set_t *data_set);
// Functions related to node utilization (pcmk_sched_utilization.c)
G_GNUC_INTERNAL
int pcmk__compare_node_capacities(const pe_node_t *node1,
const pe_node_t *node2);
G_GNUC_INTERNAL
void pcmk__consume_node_capacity(GHashTable *current_utilization,
pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__release_node_capacity(GHashTable *current_utilization,
pe_resource_t *rsc);
G_GNUC_INTERNAL
void pcmk__ban_insufficient_capacity(pe_resource_t *rsc, pe_node_t **prefer);
G_GNUC_INTERNAL
void pcmk__create_utilization_constraints(pe_resource_t *rsc,
GList *allowed_nodes);
G_GNUC_INTERNAL
void pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set);
#endif // PCMK__LIBPACEMAKER_PRIVATE__H
diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c
index 7f98ef377a..dacd8c5f0f 100644
--- a/lib/pacemaker/pcmk_sched_actions.c
+++ b/lib/pacemaker/pcmk_sched_actions.c
@@ -1,1761 +1,1763 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <sys/param.h>
#include <glib.h>
#include <crm/lrmd_internal.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
extern gboolean DeleteRsc(pe_resource_t *rsc, pe_node_t *node,
gboolean optional, pe_working_set_t *data_set);
/*!
* \internal
* \brief Get the action flags relevant to ordering constraints
*
* \param[in] action Action to check
* \param[in] node Node that *other* action in the ordering is on
* (used only for clone resource actions)
*
* \return Action flags that should be used for orderings
*/
static enum pe_action_flags
action_flags_for_ordering(pe_action_t *action, pe_node_t *node)
{
bool runnable = false;
enum pe_action_flags flags;
// For non-resource actions, return the action flags
if (action->rsc == NULL) {
return action->flags;
}
/* For non-clone resources, or a clone action not assigned to a node,
* return the flags as determined by the resource method without a node
* specified.
*/
flags = action->rsc->cmds->action_flags(action, NULL);
if ((node == NULL) || !pe_rsc_is_clone(action->rsc)) {
return flags;
}
/* Otherwise (i.e., for clone resource actions on a specific node), first
* remember whether the non-node-specific action is runnable.
*/
runnable = pcmk_is_set(flags, pe_action_runnable);
// Then recheck the resource method with the node
flags = action->rsc->cmds->action_flags(action, node);
/* For clones in ordering constraints, the node-specific "runnable" doesn't
* matter, just the non-node-specific setting (i.e., is the action runnable
* anywhere).
*
* This applies only to runnable, and only for ordering constraints. This
* function shouldn't be used for other types of constraints without
* changes. Not very satisfying, but it's logical and appears to work well.
*/
if (runnable && !pcmk_is_set(flags, pe_action_runnable)) {
pe__set_raw_action_flags(flags, action->rsc->id,
pe_action_runnable);
}
return flags;
}
/*!
* \internal
* \brief Get action UUID that should be used with a resource ordering
*
* When an action is ordered relative to an action for a collective resource
* (clone, group, or bundle), it actually needs to be ordered after all
* instances of the collective have completed the relevant action (for example,
* given "start CLONE then start RSC", RSC must wait until all instances of
* CLONE have started). Given the UUID and resource of the first action in an
* ordering, this returns the UUID of the action that should actually be used
* for ordering (for example, "CLONE_started_0" instead of "CLONE_start_0").
*
* \param[in] first_uuid UUID of first action in ordering
* \param[in] first_rsc Resource of first action in ordering
*
* \return Newly allocated copy of UUID to use with ordering
* \note It is the caller's responsibility to free the return value.
*/
static char *
action_uuid_for_ordering(const char *first_uuid, pe_resource_t *first_rsc)
{
guint interval_ms = 0;
char *uuid = NULL;
char *rid = NULL;
char *first_task_str = NULL;
enum action_tasks first_task = no_action;
enum action_tasks remapped_task = no_action;
// Only non-notify actions for collective resources need remapping
if ((strstr(first_uuid, "notify") != NULL)
|| (first_rsc->variant < pe_group)) {
goto done;
}
// Only non-recurring actions need remapping
CRM_ASSERT(parse_op_key(first_uuid, &rid, &first_task_str, &interval_ms));
if (interval_ms > 0) {
goto done;
}
first_task = text2task(first_task_str);
switch (first_task) {
case stop_rsc:
case start_rsc:
case action_notify:
case action_promote:
case action_demote:
remapped_task = first_task + 1;
break;
case stopped_rsc:
case started_rsc:
case action_notified:
case action_promoted:
case action_demoted:
remapped_task = first_task;
break;
case monitor_rsc:
case shutdown_crm:
case stonith_node:
break;
default:
crm_err("Unknown action '%s' in ordering", first_task_str);
break;
}
if (remapped_task != no_action) {
/* If a (clone) resource has notifications enabled, we want to order
* relative to when all notifications have been sent for the remapped
* task. Only outermost resources or those in bundles have
* notifications.
*/
if (pcmk_is_set(first_rsc->flags, pe_rsc_notify)
&& ((first_rsc->parent == NULL)
|| (pe_rsc_is_clone(first_rsc)
&& (first_rsc->parent->variant == pe_container)))) {
uuid = pcmk__notify_key(rid, "confirmed-post",
task2text(remapped_task));
} else {
uuid = pcmk__op_key(rid, task2text(remapped_task), 0);
}
pe_rsc_trace(first_rsc,
"Remapped action UUID %s to %s for ordering purposes",
first_uuid, uuid);
}
done:
if (uuid == NULL) {
uuid = strdup(first_uuid);
CRM_ASSERT(uuid != NULL);
}
free(first_task_str);
free(rid);
return uuid;
}
/*!
* \internal
* \brief Get actual action that should be used with an ordering
*
* When an action is ordered relative to an action for a collective resource
* (clone, group, or bundle), it actually needs to be ordered after all
* instances of the collective have completed the relevant action (for example,
* given "start CLONE then start RSC", RSC must wait until all instances of
* CLONE have started). Given the first action in an ordering, this returns the
* the action that should actually be used for ordering (for example, the
* started action instead of the start action).
*
* \param[in] action First action in an ordering
*
* \return Actual action that should be used for the ordering
*/
static pe_action_t *
action_for_ordering(pe_action_t *action)
{
pe_action_t *result = action;
pe_resource_t *rsc = action->rsc;
if ((rsc != NULL) && (rsc->variant >= pe_group) && (action->uuid != NULL)) {
char *uuid = action_uuid_for_ordering(action->uuid, rsc);
result = find_first_action(rsc->actions, uuid, NULL, NULL);
if (result == NULL) {
crm_warn("Not remapping %s to %s because %s does not have "
"remapped action", action->uuid, uuid, rsc->id);
result = action;
}
free(uuid);
}
return result;
}
/*!
* \internal
* \brief Update flags for ordering's actions appropriately for ordering's flags
*
* \param[in] first First action in an ordering
* \param[in] then Then action in an ordering
* \param[in] first_flags Action flags for \p first for ordering purposes
* \param[in] then_flags Action flags for \p then for ordering purposes
* \param[in] order Action wrapper for \p first in ordering
* \param[in] data_set Cluster working set
*
* \return Mask of pe_graph_updated_first and/or pe_graph_updated_then
*/
static enum pe_graph_flags
update_action_for_ordering_flags(pe_action_t *first, pe_action_t *then,
enum pe_action_flags first_flags,
enum pe_action_flags then_flags,
pe_action_wrapper_t *order,
pe_working_set_t *data_set)
{
enum pe_graph_flags changed = pe_graph_none;
/* The node will only be used for clones. If interleaved, node will be NULL,
* otherwise the ordering scope will be limited to the node. Normally, the
* whole 'then' clone should restart if 'first' is restarted, so then->node
* is needed.
*/
pe_node_t *node = then->node;
if (pcmk_is_set(order->type, pe_order_implies_then_on_node)) {
/* For unfencing, only instances of 'then' on the same node as 'first'
* (the unfencing operation) should restart, so reset node to
* first->node, at which point this case is handled like a normal
* pe_order_implies_then.
*/
pe__clear_order_flags(order->type, pe_order_implies_then_on_node);
pe__set_order_flags(order->type, pe_order_implies_then);
node = first->node;
pe_rsc_trace(then->rsc,
"%s then %s: mapped pe_order_implies_then_on_node to "
"pe_order_implies_then on %s",
first->uuid, then->uuid, node->details->uname);
}
if (pcmk_is_set(order->type, pe_order_implies_then)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags & pe_action_optional,
pe_action_optional,
pe_order_implies_then,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_optional)
&& pcmk_is_set(then->flags, pe_action_optional)) {
pe__clear_action_flags(then, pe_action_optional);
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_then",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_restart) && (then->rsc != NULL)) {
enum pe_action_flags restart = pe_action_optional|pe_action_runnable;
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, restart,
pe_order_restart, data_set);
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_restart",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_implies_first)) {
if (first->rsc != NULL) {
changed |= first->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_optional,
pe_order_implies_first,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_optional)
&& pcmk_is_set(first->flags, pe_action_runnable)) {
pe__clear_action_flags(first, pe_action_runnable);
pe__set_graph_flags(changed, first, pe_graph_updated_first);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_first",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_promoted_implies_first)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags & pe_action_optional,
pe_action_optional,
pe_order_promoted_implies_first,
data_set);
}
pe_rsc_trace(then->rsc,
"%s then %s: %s after pe_order_promoted_implies_first",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_one_or_more)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_one_or_more,
data_set);
} else if (pcmk_is_set(first_flags, pe_action_runnable)) {
// We have another runnable instance of "first"
then->runnable_before++;
/* Mark "then" as runnable if it requires a certain number of
* "before" instances to be runnable, and they now are.
*/
if ((then->runnable_before >= then->required_runnable_before)
&& !pcmk_is_set(then->flags, pe_action_runnable)) {
pe__set_action_flags(then, pe_action_runnable);
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_one_or_more",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_probe) && (then->rsc != NULL)) {
if (!pcmk_is_set(first_flags, pe_action_runnable)
&& (first->rsc->running_on != NULL)) {
pe_rsc_trace(then->rsc,
"%s then %s: ignoring because first is stopping",
first->uuid, then->uuid);
order->type = pe_order_none;
} else {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_runnable_left,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_probe",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_runnable_left)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_runnable_left,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_runnable)
&& pcmk_is_set(then->flags, pe_action_runnable)) {
pe__clear_action_flags(then, pe_action_runnable);
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_runnable_left",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_implies_first_migratable)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, pe_action_optional,
pe_order_implies_first_migratable, data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after "
"pe_order_implies_first_migratable",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_pseudo_left)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_optional,
pe_order_pseudo_left,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_pseudo_left",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_optional)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_optional,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_optional",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_asymmetrical)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_asymmetrical,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_asymmetrical",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(first->flags, pe_action_runnable)
&& pcmk_is_set(order->type, pe_order_implies_then_printed)
&& !pcmk_is_set(first_flags, pe_action_optional)) {
pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
then->uuid, first->uuid);
pe__set_action_flags(then, pe_action_print_always);
// Don't bother marking 'then' as changed just for this
}
if (pcmk_is_set(order->type, pe_order_implies_first_printed)
&& !pcmk_is_set(then_flags, pe_action_optional)) {
pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
first->uuid, then->uuid);
pe__set_action_flags(first, pe_action_print_always);
// Don't bother marking 'first' as changed just for this
}
if (pcmk_any_flags_set(order->type, pe_order_implies_then
|pe_order_implies_first
|pe_order_restart)
&& (first->rsc != NULL)
&& !pcmk_is_set(first->rsc->flags, pe_rsc_managed)
&& pcmk_is_set(first->rsc->flags, pe_rsc_block)
&& !pcmk_is_set(first->flags, pe_action_runnable)
&& pcmk__str_eq(first->task, RSC_STOP, pcmk__str_casei)) {
if (pcmk_is_set(then->flags, pe_action_runnable)) {
pe__clear_action_flags(then, pe_action_runnable);
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after checking whether first "
"is blocked, unmanaged, unrunnable stop",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
return changed;
}
// Convenience macros for logging action properties
#define action_type_str(flags) \
(pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action")
#define action_optional_str(flags) \
(pcmk_is_set((flags), pe_action_optional)? "optional" : "required")
#define action_runnable_str(flags) \
(pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable")
#define action_node_str(a) \
(((a)->node == NULL)? "no node" : (a)->node->details->uname)
/*!
* \internal
* \brief Update an action's flags for all orderings where it is "then"
*
* \param[in] then Action to update
* \param[in] data_set Cluster working set
*/
void
pcmk__update_action_for_orderings(pe_action_t *then, pe_working_set_t *data_set)
{
GList *lpc = NULL;
enum pe_graph_flags changed = pe_graph_none;
int last_flags = then->flags;
pe_rsc_trace(then->rsc, "Updating %s %s (%s %s) on %s",
action_type_str(then->flags), then->uuid,
action_optional_str(then->flags),
action_runnable_str(then->flags), action_node_str(then));
if (pcmk_is_set(then->flags, pe_action_requires_any)) {
/* Initialize current known "runnable before" actions. As
* update_action_for_ordering_flags() is called for each of then's
* before actions, this number will increment as runnable 'first'
* actions are encountered.
*/
then->runnable_before = 0;
if (then->required_runnable_before == 0) {
/* @COMPAT This ordering constraint uses the deprecated
* "require-all=false" attribute. Treat it like "clone-min=1".
*/
then->required_runnable_before = 1;
}
/* The pe_order_one_or_more clause of update_action_for_ordering_flags()
* (called below) will reset runnable if appropriate.
*/
pe__clear_action_flags(then, pe_action_runnable);
}
for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
pe_action_t *first = other->action;
pe_node_t *then_node = then->node;
pe_node_t *first_node = first->node;
if ((first->rsc != NULL)
&& (first->rsc->variant == pe_group)
&& pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) {
first_node = first->rsc->fns->location(first->rsc, NULL, FALSE);
if (first_node != NULL) {
pe_rsc_trace(first->rsc, "Found node %s for 'first' %s",
first_node->details->uname, first->uuid);
}
}
if ((then->rsc != NULL)
&& (then->rsc->variant == pe_group)
&& pcmk__str_eq(then->task, RSC_START, pcmk__str_casei)) {
then_node = then->rsc->fns->location(then->rsc, NULL, FALSE);
if (then_node != NULL) {
pe_rsc_trace(then->rsc, "Found node %s for 'then' %s",
then_node->details->uname, then->uuid);
}
}
// Disable constraint if it only applies when on same node, but isn't
if (pcmk_is_set(other->type, pe_order_same_node)
&& (first_node != NULL) && (then_node != NULL)
&& (first_node->details != then_node->details)) {
pe_rsc_trace(then->rsc,
"Disabled ordering %s on %s then %s on %s: not same node",
other->action->uuid, first_node->details->uname,
then->uuid, then_node->details->uname);
other->type = pe_order_none;
continue;
}
pe__clear_graph_flags(changed, then, pe_graph_updated_first);
if ((first->rsc != NULL)
&& pcmk_is_set(other->type, pe_order_then_cancels_first)
&& !pcmk_is_set(then->flags, pe_action_optional)) {
/* 'then' is required, so we must abandon 'first'
* (e.g. a required stop cancels any agent reload).
*/
pe__set_action_flags(other->action, pe_action_optional);
if (!strcmp(first->task, CRMD_ACTION_RELOAD_AGENT)) {
pe__clear_resource_flags(first->rsc, pe_rsc_reload);
}
}
if ((first->rsc != NULL) && (then->rsc != NULL)
&& (first->rsc != then->rsc) && !is_parent(then->rsc, first->rsc)) {
first = action_for_ordering(first);
}
if (first != other->action) {
pe_rsc_trace(then->rsc, "Ordering %s after %s instead of %s",
then->uuid, first->uuid, other->action->uuid);
}
pe_rsc_trace(then->rsc,
"%s (%#.6x) then %s (%#.6x): type=%#.6x node=%s",
first->uuid, first->flags, then->uuid, then->flags,
other->type, action_node_str(first));
if (first == other->action) {
/* 'first' was not remapped (e.g. from 'start' to 'running'), which
* could mean it is a non-resource action, a primitive resource
* action, or already expanded.
*/
enum pe_action_flags first_flags, then_flags;
first_flags = action_flags_for_ordering(first, then_node);
then_flags = action_flags_for_ordering(then, first_node);
changed |= update_action_for_ordering_flags(first, then,
first_flags, then_flags,
other, data_set);
/* 'first' was for a complex resource (clone, group, etc),
* create a new dependency if necessary
*/
} else if (order_actions(first, then, other->type)) {
/* This was the first time 'first' and 'then' were associated,
* start again to get the new actions_before list
*/
pe__set_graph_flags(changed, then,
pe_graph_updated_then|pe_graph_disable);
}
if (pcmk_is_set(changed, pe_graph_disable)) {
pe_rsc_trace(then->rsc,
"Disabled ordering %s then %s in favor of %s then %s",
other->action->uuid, then->uuid, first->uuid,
then->uuid);
pe__clear_graph_flags(changed, then, pe_graph_disable);
other->type = pe_order_none;
}
if (pcmk_is_set(changed, pe_graph_updated_first)) {
crm_trace("Re-processing %s and its 'after' actions "
"because it changed", first->uuid);
for (GList *lpc2 = first->actions_after; lpc2 != NULL;
lpc2 = lpc2->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc2->data;
pcmk__update_action_for_orderings(other->action, data_set);
}
pcmk__update_action_for_orderings(first, data_set);
}
}
if (pcmk_is_set(then->flags, pe_action_requires_any)) {
if (last_flags == then->flags) {
pe__clear_graph_flags(changed, then, pe_graph_updated_then);
} else {
pe__set_graph_flags(changed, then, pe_graph_updated_then);
}
}
if (pcmk_is_set(changed, pe_graph_updated_then)) {
crm_trace("Re-processing %s and its 'after' actions because it changed",
then->uuid);
if (pcmk_is_set(last_flags, pe_action_runnable)
&& !pcmk_is_set(then->flags, pe_action_runnable)) {
pcmk__block_colocated_starts(then, data_set);
}
pcmk__update_action_for_orderings(then, data_set);
for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
pcmk__update_action_for_orderings(other->action, data_set);
}
}
}
/*!
* \internal
* \brief Trace-log an action (optionally with its dependent actions)
*
* \param[in] pre_text If not NULL, prefix the log with this plus ": "
* \param[in] action Action to log
* \param[in] details If true, recursively log dependent actions
*/
void
pcmk__log_action(const char *pre_text, pe_action_t *action, bool details)
{
const char *node_uname = NULL;
const char *node_uuid = NULL;
const char *desc = NULL;
CRM_CHECK(action != NULL, return);
if (!pcmk_is_set(action->flags, pe_action_pseudo)) {
if (action->node != NULL) {
node_uname = action->node->details->uname;
node_uuid = action->node->details->id;
} else {
node_uname = "<none>";
}
}
switch (text2task(action->task)) {
case stonith_node:
case shutdown_crm:
if (pcmk_is_set(action->flags, pe_action_pseudo)) {
desc = "Pseudo ";
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
desc = "Optional ";
} else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
desc = "!!Non-Startable!! ";
} else if (pcmk_is_set(action->flags, pe_action_processed)) {
desc = "";
} else {
desc = "(Provisional) ";
}
crm_trace("%s%s%sAction %d: %s%s%s%s%s%s",
((pre_text == NULL)? "" : pre_text),
((pre_text == NULL)? "" : ": "),
desc, action->id, action->uuid,
(node_uname? "\ton " : ""), (node_uname? node_uname : ""),
(node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
(node_uuid? ")" : ""));
break;
default:
if (pcmk_is_set(action->flags, pe_action_optional)) {
desc = "Optional ";
} else if (pcmk_is_set(action->flags, pe_action_pseudo)) {
desc = "Pseudo ";
} else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
desc = "!!Non-Startable!! ";
} else if (pcmk_is_set(action->flags, pe_action_processed)) {
desc = "";
} else {
desc = "(Provisional) ";
}
crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s",
((pre_text == NULL)? "" : pre_text),
((pre_text == NULL)? "" : ": "),
desc, action->id, action->uuid,
(action->rsc? action->rsc->id : "<none>"),
(node_uname? "\ton " : ""), (node_uname? node_uname : ""),
(node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
(node_uuid? ")" : ""));
break;
}
if (details) {
GList *iter = NULL;
crm_trace("\t\t====== Preceding Actions");
for (iter = action->actions_before; iter != NULL; iter = iter->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) iter->data;
pcmk__log_action("\t\t", other->action, false);
}
crm_trace("\t\t====== Subsequent Actions");
for (iter = action->actions_after; iter != NULL; iter = iter->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) iter->data;
pcmk__log_action("\t\t", other->action, false);
}
crm_trace("\t\t====== End");
} else {
crm_trace("\t\t(before=%d, after=%d)",
g_list_length(action->actions_before),
g_list_length(action->actions_after));
}
}
/*!
* \internal
* \brief Create a new pseudo-action for a resource
*
* \param[in] rsc Resource to create action for
* \param[in] task Action name
* \param[in] optional Whether action should be considered optional
* \param[in] runnable Whethe action should be considered runnable
*
* \return New action object corresponding to arguments
*/
pe_action_t *
pcmk__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task,
bool optional, bool runnable)
{
pe_action_t *action = NULL;
CRM_ASSERT((rsc != NULL) && (task != NULL));
action = custom_action(rsc, pcmk__op_key(rsc->id, task, 0), task, NULL,
optional, TRUE, rsc->cluster);
pe__set_action_flags(action, pe_action_pseudo);
if (runnable) {
pe__set_action_flags(action, pe_action_runnable);
}
return action;
}
/*!
* \internal
* \brief Create an executor cancel action
*
* \param[in] rsc Resource of action to cancel
* \param[in] task Name of action to cancel
* \param[in] interval_ms Interval of action to cancel
* \param[in] node Node of action to cancel
* \param[in] data_set Working set of cluster
*
* \return Created op
*/
pe_action_t *
pcmk__new_cancel_action(pe_resource_t *rsc, const char *task, guint interval_ms,
pe_node_t *node)
{
pe_action_t *cancel_op = NULL;
char *key = NULL;
char *interval_ms_s = NULL;
CRM_ASSERT((rsc != NULL) && (task != NULL) && (node != NULL));
// @TODO dangerous if possible to schedule another action with this key
key = pcmk__op_key(rsc->id, task, interval_ms);
cancel_op = custom_action(rsc, key, RSC_CANCEL, node, FALSE, TRUE,
rsc->cluster);
pcmk__str_update(&cancel_op->task, RSC_CANCEL);
pcmk__str_update(&cancel_op->cancel_task, task);
interval_ms_s = crm_strdup_printf("%u", interval_ms);
add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, task);
add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL_MS, interval_ms_s);
free(interval_ms_s);
return cancel_op;
}
/*!
* \internal
* \brief Create a new shutdown action for a node
*
* \param[in] node Node being shut down
*
* \return Newly created shutdown action for \p node
*/
pe_action_t *
pcmk__new_shutdown_action(pe_node_t *node)
{
char *shutdown_id = NULL;
pe_action_t *shutdown_op = NULL;
CRM_ASSERT(node != NULL);
shutdown_id = crm_strdup_printf("%s-%s", CRM_OP_SHUTDOWN,
node->details->uname);
shutdown_op = custom_action(NULL, shutdown_id, CRM_OP_SHUTDOWN, node, FALSE,
TRUE, node->details->data_set);
pcmk__order_stops_before_shutdown(node, shutdown_op,
node->details->data_set);
add_hash_param(shutdown_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
return shutdown_op;
}
/*!
* \internal
* \brief Calculate and add an operation digest to XML
*
* Calculate an operation digest, which enables us to later determine when a
* restart is needed due to the resource's parameters being changed, and add it
* to given XML.
*
* \param[in] op Operation result from executor
* \param[in] update XML to add digest to
*/
static void
add_op_digest_to_xml(lrmd_event_data_t *op, xmlNode *update)
{
char *digest = NULL;
xmlNode *args_xml = NULL;
if (op->params == NULL) {
return;
}
args_xml = create_xml_node(NULL, XML_TAG_PARAMS);
g_hash_table_foreach(op->params, hash2field, args_xml);
pcmk__filter_op_for_digest(args_xml);
digest = calculate_operation_digest(args_xml, NULL);
crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest);
free_xml(args_xml);
free(digest);
}
#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
/*!
* \internal
* \brief Create XML for resource operation history update
*
* \param[in,out] parent Parent XML node to add to
* \param[in,out] op Operation event data
* \param[in] caller_version DC feature set
* \param[in] target_rc Expected result of operation
* \param[in] node Name of node on which operation was performed
* \param[in] origin Arbitrary description of update source
*
* \return Newly created XML node for history update
*/
xmlNode *
pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
const char *caller_version, int target_rc,
const char *node, const char *origin)
{
char *key = NULL;
char *magic = NULL;
char *op_id = NULL;
char *op_id_additional = NULL;
char *local_user_data = NULL;
const char *exit_reason = NULL;
xmlNode *xml_op = NULL;
const char *task = NULL;
CRM_CHECK(op != NULL, return NULL);
crm_trace("Creating history XML for %s-interval %s action for %s on %s "
"(DC version: %s, origin: %s)",
pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
((node == NULL)? "no node" : node), caller_version, origin);
task = op->op_type;
/* Record a successful agent reload as a start, and a failed one as a
* monitor, to make life easier for the scheduler when determining the
* current state.
*
* @COMPAT We should check "reload" here only if the operation was for a
* pre-OCF-1.1 resource agent, but we don't know that here, and we should
* only ever get results for actions scheduled by us, so we can reasonably
* assume any "reload" is actually a pre-1.1 agent reload.
*/
if (pcmk__str_any_of(task, CRMD_ACTION_RELOAD, CRMD_ACTION_RELOAD_AGENT,
NULL)) {
if (op->op_status == PCMK_EXEC_DONE) {
task = CRMD_ACTION_START;
} else {
task = CRMD_ACTION_STATUS;
}
}
key = pcmk__op_key(op->rsc_id, task, op->interval_ms);
if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_none)) {
const char *n_type = crm_meta_value(op->params, "notify_type");
const char *n_task = crm_meta_value(op->params, "notify_operation");
CRM_LOG_ASSERT(n_type != NULL);
CRM_LOG_ASSERT(n_task != NULL);
op_id = pcmk__notify_key(op->rsc_id, n_type, n_task);
if (op->op_status != PCMK_EXEC_PENDING) {
/* Ignore notify errors.
*
* @TODO It might be better to keep the correct result here, and
* ignore it in process_graph_event().
*/
lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
}
/* Migration history is preserved separately, which usually matters for
* multiple nodes and is important for future cluster transitions.
*/
} else if (pcmk__str_any_of(op->op_type, CRMD_ACTION_MIGRATE,
CRMD_ACTION_MIGRATED, NULL)) {
op_id = strdup(key);
} else if (did_rsc_op_fail(op, target_rc)) {
op_id = pcmk__op_key(op->rsc_id, "last_failure", 0);
if (op->interval_ms == 0) {
// Ensure 'last' gets updated, in case record-pending is true
op_id_additional = pcmk__op_key(op->rsc_id, "last", 0);
}
exit_reason = op->exit_reason;
} else if (op->interval_ms > 0) {
op_id = strdup(key);
} else {
op_id = pcmk__op_key(op->rsc_id, "last", 0);
}
again:
xml_op = pcmk__xe_match(parent, XML_LRM_TAG_RSC_OP, XML_ATTR_ID, op_id);
if (xml_op == NULL) {
xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP);
}
if (op->user_data == NULL) {
crm_debug("Generating fake transition key for: " PCMK__OP_FMT
" %d from %s", op->rsc_id, op->op_type, op->interval_ms,
op->call_id, origin);
local_user_data = pcmk__transition_key(-1, op->call_id, target_rc,
FAKE_TE_ID);
op->user_data = local_user_data;
}
if (magic == NULL) {
magic = crm_strdup_printf("%d:%d;%s", op->op_status, op->rc,
(const char *) op->user_data);
}
crm_xml_add(xml_op, XML_ATTR_ID, op_id);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task);
crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin);
crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic);
crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, exit_reason == NULL ? "" : exit_reason);
crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); /* For context during triage */
crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id);
crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc);
crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status);
crm_xml_add_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, op->interval_ms);
if (compare_version("2.1", caller_version) <= 0) {
if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) {
crm_trace("Timing data (" PCMK__OP_FMT
"): last=%u change=%u exec=%u queue=%u",
op->rsc_id, op->op_type, op->interval_ms,
op->t_run, op->t_rcchange, op->exec_time, op->queue_time);
if ((op->interval_ms != 0) && (op->t_rcchange != 0)) {
// Recurring ops may have changed rc after initial run
crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
(long long) op->t_rcchange);
} else {
crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
(long long) op->t_run);
}
crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time);
crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time);
}
}
if (pcmk__str_any_of(op->op_type, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
/*
* Record migrate_source and migrate_target always for migrate ops.
*/
const char *name = XML_LRM_ATTR_MIGRATE_SOURCE;
crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
name = XML_LRM_ATTR_MIGRATE_TARGET;
crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
}
add_op_digest_to_xml(op, xml_op);
if (op_id_additional) {
free(op_id);
op_id = op_id_additional;
op_id_additional = NULL;
goto again;
}
if (local_user_data) {
free(local_user_data);
op->user_data = NULL;
}
free(magic);
free(op_id);
free(key);
return xml_op;
}
/*!
* \internal
* \brief Check whether an action shutdown-locks a resource to a node
*
* If the shutdown-lock cluster property is set, resources will not be recovered
* on a different node if cleanly stopped, and may start only on that same node.
* This function checks whether that applies to a given action, so that the
* transition graph can be marked appropriately.
*
* \param[in] action Action to check
*
* \return true if \p action locks its resource to the action's node,
* otherwise false
*/
bool
pcmk__action_locks_rsc_to_node(const pe_action_t *action)
{
// Only resource actions taking place on resource's lock node are locked
if ((action == NULL) || (action->rsc == NULL)
|| (action->rsc->lock_node == NULL) || (action->node == NULL)
|| (action->node->details != action->rsc->lock_node->details)) {
return false;
}
/* During shutdown, only stops are locked (otherwise, another action such as
* a demote would cause the controller to clear the lock)
*/
if (action->node->details->shutdown && (action->task != NULL)
&& (strcmp(action->task, RSC_STOP) != 0)) {
return false;
}
return true;
}
/* lowest to highest */
static gint
sort_action_id(gconstpointer a, gconstpointer b)
{
const pe_action_wrapper_t *action_wrapper2 = (const pe_action_wrapper_t *)a;
const pe_action_wrapper_t *action_wrapper1 = (const pe_action_wrapper_t *)b;
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (action_wrapper1->action->id < action_wrapper2->action->id) {
return 1;
}
if (action_wrapper1->action->id > action_wrapper2->action->id) {
return -1;
}
return 0;
}
/*!
* \internal
* \brief Remove any duplicate action inputs, merging action flags
*
* \param[in] action Action whose inputs should be checked
*/
void
pcmk__deduplicate_action_inputs(pe_action_t *action)
{
GList *item = NULL;
GList *next = NULL;
pe_action_wrapper_t *last_input = NULL;
action->actions_before = g_list_sort(action->actions_before,
sort_action_id);
for (item = action->actions_before; item != NULL; item = next) {
pe_action_wrapper_t *input = (pe_action_wrapper_t *) item->data;
next = item->next;
if ((last_input != NULL)
&& (input->action->id == last_input->action->id)) {
crm_trace("Input %s (%d) duplicate skipped for action %s (%d)",
input->action->uuid, input->action->id,
action->uuid, action->id);
/* For the purposes of scheduling, the ordering flags no longer
* matter, but crm_simulate looks at certain ones when creating a
* dot graph. Combining the flags is sufficient for that purpose.
*/
last_input->type |= input->type;
if (input->state == pe_link_dumped) {
last_input->state = pe_link_dumped;
}
free(item->data);
action->actions_before = g_list_delete_link(action->actions_before,
item);
} else {
last_input = input;
input->state = pe_link_not_dumped;
}
}
}
/*!
* \internal
* \brief Output all scheduled actions
*
* \param[in] data_set Cluster working set
*/
void
pcmk__output_actions(pe_working_set_t *data_set)
{
pcmk__output_t *out = data_set->priv;
// Output node (non-resource) actions
for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) {
char *node_name = NULL;
char *task = NULL;
pe_action_t *action = (pe_action_t *) iter->data;
if (action->rsc != NULL) {
continue; // Resource actions will be output later
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
continue; // This action was not scheduled
}
if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
task = strdup("Shutdown");
} else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) {
const char *op = g_hash_table_lookup(action->meta, "stonith_action");
task = crm_strdup_printf("Fence (%s)", op);
} else {
continue; // Don't display other node action types
}
if (pe__is_guest_node(action->node)) {
node_name = crm_strdup_printf("%s (resource: %s)",
action->node->details->uname,
action->node->details->remote_rsc->container->id);
} else if (action->node != NULL) {
node_name = crm_strdup_printf("%s", action->node->details->uname);
}
out->message(out, "node-action", task, node_name, action->reason);
free(node_name);
free(task);
}
// Output resource actions
for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
rsc->cmds->output_actions(rsc);
}
}
/*!
* \internal
* \brief Schedule cancellation of a recurring action
*
* \param[in] rsc Resource that action is for
* \param[in] call_id Action's call ID from history
* \param[in] task Action name
* \param[in] interval_ms Action interval
* \param[in] node Node that history entry is for
* \param[in] reason Short description of why action is being cancelled
*/
static void
schedule_cancel(pe_resource_t *rsc, const char *call_id, const char *task,
guint interval_ms, pe_node_t *node, const char *reason)
{
pe_action_t *cancel = NULL;
CRM_CHECK((rsc != NULL) && (task != NULL)
&& (node != NULL) && (reason != NULL),
return);
crm_info("Recurring %s-interval %s for %s will be stopped on %s: %s",
pcmk__readable_interval(interval_ms), task, rsc->id,
pcmk__s(node->details->uname, "unknown node"), reason);
cancel = pcmk__new_cancel_action(rsc, task, interval_ms, node);
add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
// Cancellations happen after stops
pcmk__new_ordering(rsc, stop_key(rsc), NULL, rsc, NULL, cancel,
pe_order_optional, rsc->cluster);
}
/*!
* \internal
* \brief Check whether action from resource history is still in configuration
*
* \param[in] rsc Resource that action is for
* \param[in] task Action's name
* \param[in] interval_ms Action's interval (in milliseconds)
*
* \return true if action is still in resource configuration, otherwise false
*/
static bool
action_in_config(pe_resource_t *rsc, const char *task, guint interval_ms)
{
char *key = pcmk__op_key(rsc->id, task, interval_ms);
bool config = (find_rsc_op_entry(rsc, key) != NULL);
free(key);
return config;
}
/*!
* \internal
* \brief Get action name needed to compare digest for configuration changes
*
* \param[in] task Action name from history
* \param[in] interval_ms Action interval (in milliseconds)
*
* \return Action name whose digest should be compared
*/
static const char *
task_for_digest(const char *task, guint interval_ms)
{
/* Certain actions need to be compared against the parameters used to start
* the resource.
*/
if ((interval_ms == 0)
&& pcmk__str_any_of(task, RSC_STATUS, RSC_MIGRATED, RSC_PROMOTE, NULL)) {
task = RSC_START;
}
return task;
}
/*!
* \internal
* \brief Check whether only sanitized parameters to an action changed
*
* When collecting CIB files for troubleshooting, crm_report will mask
* sensitive resource parameters. If simulations were run using that, affected
* resources would appear to need a restart, which would complicate
* troubleshooting. To avoid that, we save a "secure digest" of non-sensitive
* parameters. This function used that digest to check whether only masked
* parameters are different.
*
* \param[in] xml_op Resource history entry with secure digest
* \param[in] digest_data Operation digest information being compared
* \param[in] data_set Cluster working set
*
* \return true if only sanitized parameters changed, otherwise false
*/
static bool
only_sanitized_changed(xmlNode *xml_op, const op_digest_cache_t *digest_data,
pe_working_set_t *data_set)
{
const char *digest_secure = NULL;
if (!pcmk_is_set(data_set->flags, pe_flag_sanitized)) {
// The scheduler is not being run as a simulation
return false;
}
digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
return (digest_data->rc != RSC_DIGEST_MATCH) && (digest_secure != NULL)
&& (digest_data->digest_secure_calc != NULL)
&& (strcmp(digest_data->digest_secure_calc, digest_secure) == 0);
}
/*!
* \internal
* \brief Force a restart due to a configuration change
*
* \param[in] rsc Resource that action is for
* \param[in] task Name of action whose configuration changed
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in] node Node where resource should be restarted
*/
static void
force_restart(pe_resource_t *rsc, const char *task, guint interval_ms,
pe_node_t *node)
{
char *key = pcmk__op_key(rsc->id, task, interval_ms);
pe_action_t *required = custom_action(rsc, key, task, NULL, FALSE, TRUE,
rsc->cluster);
pe_action_set_reason(required, "resource definition change", true);
trigger_unfencing(rsc, node, "Device parameters changed", NULL,
rsc->cluster);
}
/*!
* \internal
* \brief Reschedule a recurring action
*
* \param[in] rsc Resource that action is for
* \param[in] task Name of action being rescheduled
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in] node Node where action should be rescheduled
*/
static void
reschedule_recurring(pe_resource_t *rsc, const char *task, guint interval_ms,
pe_node_t *node)
{
pe_action_t *op = NULL;
trigger_unfencing(rsc, node, "Device parameters changed (reschedule)",
NULL, rsc->cluster);
op = custom_action(rsc, pcmk__op_key(rsc->id, task, interval_ms),
task, node, TRUE, TRUE, rsc->cluster);
pe__set_action_flags(op, pe_action_reschedule);
}
/*!
* \internal
* \brief Schedule a reload of a resource on a node
*
* \param[in] rsc Resource to reload
* \param[in] node Where resource should be reloaded
*/
static void
schedule_reload(pe_resource_t *rsc, pe_node_t *node)
{
pe_action_t *reload = NULL;
// For collective resources, just call recursively for children
if (rsc->variant > pe_native) {
g_list_foreach(rsc->children, (GFunc) schedule_reload, node);
return;
}
// Skip the reload in certain situations
if ((node == NULL)
|| !pcmk_is_set(rsc->flags, pe_rsc_managed)
|| pcmk_is_set(rsc->flags, pe_rsc_failed)) {
pe_rsc_trace(rsc, "Skip reload of %s:%s%s %s",
rsc->id,
pcmk_is_set(rsc->flags, pe_rsc_managed)? "" : " unmanaged",
pcmk_is_set(rsc->flags, pe_rsc_failed)? " failed" : "",
(node == NULL)? "inactive" : node->details->uname);
return;
}
/* If a resource's configuration changed while a start was pending,
* force a full restart instead of a reload.
*/
if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
pe_rsc_trace(rsc, "%s: preventing agent reload because start pending",
rsc->id);
custom_action(rsc, stop_key(rsc), CRMD_ACTION_STOP, node, FALSE, TRUE,
rsc->cluster);
return;
}
// Schedule the reload
pe__set_resource_flags(rsc, pe_rsc_reload);
reload = custom_action(rsc, reload_key(rsc), CRMD_ACTION_RELOAD_AGENT, node,
FALSE, TRUE, rsc->cluster);
pe_action_set_reason(reload, "resource definition change", FALSE);
// Set orderings so that a required stop or demote cancels the reload
pcmk__new_ordering(NULL, NULL, reload, rsc, stop_key(rsc), NULL,
pe_order_optional|pe_order_then_cancels_first,
rsc->cluster);
pcmk__new_ordering(NULL, NULL, reload, rsc, demote_key(rsc), NULL,
pe_order_optional|pe_order_then_cancels_first,
rsc->cluster);
}
/*!
* \internal
* \brief Handle any configuration change for an action
*
* Given an action from resource history, if the resource's configuration
* changed since the action was done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, etc.).
*
* \param[in] rsc Resource that action is for
* \param[in] node Node that action was on
* \param[in] xml_op Action XML from resource history
*
* \return true if action configuration changed, otherwise false
*/
bool
pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op)
{
guint interval_ms = 0;
const char *task = NULL;
const op_digest_cache_t *digest_data = NULL;
CRM_CHECK((rsc != NULL) && (node != NULL) && (xml_op != NULL),
return false);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
CRM_CHECK(task != NULL, return false);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
// If this is a recurring action, check whether it has been orphaned
if (interval_ms > 0) {
if (action_in_config(rsc, task, interval_ms)) {
pe_rsc_trace(rsc, "%s-interval %s for %s on %s is in configuration",
pcmk__readable_interval(interval_ms), task, rsc->id,
node->details->uname);
} else if (pcmk_is_set(rsc->cluster->flags,
pe_flag_stop_action_orphans)) {
schedule_cancel(rsc,
crm_element_value(xml_op, XML_LRM_ATTR_CALLID),
task, interval_ms, node, "orphan");
return true;
} else {
pe_rsc_debug(rsc, "%s-interval %s for %s on %s is orphaned",
pcmk__readable_interval(interval_ms), task, rsc->id,
node->details->uname);
return true;
}
}
crm_trace("Checking %s-interval %s for %s on %s for configuration changes",
pcmk__readable_interval(interval_ms), task, rsc->id,
node->details->uname);
task = task_for_digest(task, interval_ms);
digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->cluster);
if (only_sanitized_changed(xml_op, digest_data, rsc->cluster)) {
if (!pcmk__is_daemon && (rsc->cluster->priv != NULL)) {
pcmk__output_t *out = rsc->cluster->priv;
out->info(out,
"Only 'private' parameters to %s-interval %s for %s "
"on %s changed: %s",
pcmk__readable_interval(interval_ms), task, rsc->id,
node->details->uname,
crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
}
return false;
}
switch (digest_data->rc) {
case RSC_DIGEST_RESTART:
crm_log_xml_debug(digest_data->params_restart, "params:restart");
force_restart(rsc, task, interval_ms, node);
return true;
case RSC_DIGEST_ALL:
case RSC_DIGEST_UNKNOWN:
// Changes that can potentially be handled by an agent reload
if (interval_ms > 0) {
/* Recurring actions aren't reloaded per se, they are just
* re-scheduled so the next run uses the new parameters.
* The old instance will be cancelled automatically.
*/
crm_log_xml_debug(digest_data->params_all, "params:reschedule");
reschedule_recurring(rsc, task, interval_ms, node);
} else if (crm_element_value(xml_op,
XML_LRM_ATTR_RESTART_DIGEST) != NULL) {
// Agent supports reload, so use it
trigger_unfencing(rsc, node,
"Device parameters changed (reload)", NULL,
rsc->cluster);
crm_log_xml_debug(digest_data->params_all, "params:reload");
schedule_reload(rsc, node);
} else {
pe_rsc_trace(rsc,
"Restarting %s because agent doesn't support reload",
rsc->id);
crm_log_xml_debug(digest_data->params_restart,
"params:restart");
force_restart(rsc, task, interval_ms, node);
}
return true;
default:
break;
}
return false;
}
/*!
* \internal
* \brief Create a list of resource's action history entries, sorted by call ID
*
* \param[in] rsc Resource whose history should be checked
* \param[in] rsc_entry Resource's <lrm_rsc_op> status XML
* \param[out] start_index Where to store index of start-like action, if any
* \param[out] stop_index Where to store index of stop action, if any
*/
static GList *
rsc_history_as_list(pe_resource_t *rsc, xmlNode *rsc_entry,
int *start_index, int *stop_index)
{
GList *ops = NULL;
for (xmlNode *rsc_op = first_named_child(rsc_entry, XML_LRM_TAG_RSC_OP);
rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) {
ops = g_list_prepend(ops, rsc_op);
}
ops = g_list_sort(ops, sort_op_by_callid);
calculate_active_ops(ops, start_index, stop_index);
return ops;
}
/*!
* \internal
* \brief Process a resource's action history from the CIB status
*
* Given a resource's action history, if the resource's configuration
* changed since the actions were done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, clean-up, etc.).
* (This also cancels recurring actions for maintenance mode, which is not
* entirely related but convenient to do here.)
*
* \param[in] rsc_entry Resource's <lrm_rsc_op> status XML
* \param[in] rsc Resource whose history is being processed
* \param[in] node Node whose history is being processed
*/
static void
process_rsc_history(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node)
{
int offset = -1;
int stop_index = 0;
int start_index = 0;
GList *sorted_op_list = NULL;
if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
if (pe_rsc_is_anon_clone(uber_parent(rsc))) {
pe_rsc_trace(rsc,
"Skipping configuration check "
"for orphaned clone instance %s",
rsc->id);
} else {
pe_rsc_trace(rsc,
"Skipping configuration check and scheduling clean-up "
"for orphaned resource %s", rsc->id);
DeleteRsc(rsc, node, FALSE, rsc->cluster);
}
return;
}
if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, false)) {
DeleteRsc(rsc, node, FALSE, rsc->cluster);
}
pe_rsc_trace(rsc,
"Skipping configuration check for %s "
"because no longer active on %s",
rsc->id, node->details->uname);
return;
}
pe_rsc_trace(rsc, "Checking for configuration changes for %s on %s",
rsc->id, node->details->uname);
if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, true)) {
DeleteRsc(rsc, node, FALSE, rsc->cluster);
}
sorted_op_list = rsc_history_as_list(rsc, rsc_entry, &start_index,
&stop_index);
if (start_index < stop_index) {
return; // Resource is stopped
}
for (GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
xmlNode *rsc_op = (xmlNode *) iter->data;
const char *task = NULL;
guint interval_ms = 0;
if (++offset < start_index) {
// Skip actions that happened before a start
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if ((interval_ms > 0)
&& (pcmk_is_set(rsc->flags, pe_rsc_maintenance)
|| node->details->maintenance)) {
// Maintenance mode cancels recurring operations
schedule_cancel(rsc,
crm_element_value(rsc_op, XML_LRM_ATTR_CALLID),
task, interval_ms, node, "maintenance mode");
} else if ((interval_ms > 0)
|| pcmk__strcase_any_of(task, RSC_STATUS, RSC_START,
RSC_PROMOTE, RSC_MIGRATED, NULL)) {
/* If a resource operation failed, and the operation's definition
* has changed, clear any fail count so they can be retried fresh.
*/
if (pe__bundle_needs_remote_name(rsc, rsc->cluster)) {
/* We haven't allocated resources to nodes yet, so if the
* REMOTE_CONTAINER_HACK is used, we may calculate the digest
* based on the literal "#uname" value rather than the properly
* substituted value. That would mistakenly make the action
* definition appear to have been changed. Defer the check until
* later in this case.
*/
pe__add_param_check(rsc_op, rsc, node, pe_check_active,
rsc->cluster);
} else if (pcmk__check_action_config(rsc, node, rsc_op)
&& (pe_get_failcount(node, rsc, NULL, pe_fc_effective,
NULL, rsc->cluster) != 0)) {
pe__clear_failcount(rsc, node, "action definition changed",
rsc->cluster);
}
}
}
g_list_free(sorted_op_list);
}
/*!
* \internal
* \brief Process a node's action history from the CIB status
*
* Given a node's resource history, if the resource's configuration changed
* since the actions were done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, clean-up, etc.).
* (This also cancels recurring actions for maintenance mode, which is not
* entirely related but convenient to do here.)
*
* \param[in] node Node whose history is being processed
* \param[in] lrm_rscs Node's <lrm_resources> from CIB status XML
* \param[in] data_set Cluster working set
*/
static void
process_node_history(pe_node_t *node, xmlNode *lrm_rscs, pe_working_set_t *data_set)
{
crm_trace("Processing history for node %s", node->details->uname);
for (xmlNode *rsc_entry = first_named_child(lrm_rscs, XML_LRM_TAG_RESOURCE);
rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) {
if (xml_has_children(rsc_entry)) {
GList *result = pcmk__rscs_matching_id(ID(rsc_entry), data_set);
for (GList *iter = result; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
if (rsc->variant == pe_native) {
process_rsc_history(rsc_entry, rsc, node);
}
}
g_list_free(result);
}
}
}
// XPath to find a node's resource history
#define XPATH_NODE_HISTORY "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
"/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
"/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES
/*!
* \internal
* \brief Process any resource configuration changes in the CIB status
*
* Go through all nodes' resource history, and if a resource's configuration
* changed since its actions were done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, clean-up, etc.).
* (This also cancels recurring actions for maintenance mode, which is not
* entirely related but convenient to do here.)
*
* \param[in] data_set Cluster working set
*/
void
pcmk__handle_rsc_config_changes(pe_working_set_t *data_set)
{
crm_trace("Check resource and action configuration for changes");
/* Rather than iterate through the status section, iterate through the nodes
* and search for the appropriate status subsection for each. This skips
* orphaned nodes and lets us eliminate some cases before searching the XML.
*/
for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
/* Don't bother checking actions for a node that can't run actions ...
* unless it's in maintenance mode, in which case we still need to
* cancel any existing recurring monitors.
*/
- if (node->details->maintenance || pcmk__node_available(node)) {
+ if (node->details->maintenance
+ || pcmk__node_available(node, false, false)) {
+
char *xpath = NULL;
xmlNode *history = NULL;
xpath = crm_strdup_printf(XPATH_NODE_HISTORY, node->details->uname);
history = get_xpath_object(xpath, data_set->input, LOG_NEVER);
free(xpath);
process_node_history(node, history, data_set);
}
}
}
diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c
index d0bd47f9e8..0fc93969ad 100644
--- a/lib/pacemaker/pcmk_sched_clone.c
+++ b/lib/pacemaker/pcmk_sched_clone.c
@@ -1,1172 +1,1168 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
#define VARIANT_CLONE 1
#include <lib/pengine/variant.h>
static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all);
static pe_node_t *
can_run_instance(pe_resource_t * rsc, pe_node_t * node, int limit)
{
pe_node_t *local_node = NULL;
if (node == NULL && rsc->allowed_nodes) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) {
can_run_instance(rsc, local_node, limit);
}
return NULL;
}
if (!node) {
/* make clang analyzer happy */
goto bail;
- } else if (!pcmk__node_available(node)) {
+ } else if (!pcmk__node_available(node, false, false)) {
goto bail;
} else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
goto bail;
}
local_node = pcmk__top_allowed_node(rsc, node);
if (local_node == NULL) {
crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname);
goto bail;
} else if (local_node->weight < 0) {
common_update_score(rsc, node->details->id, local_node->weight);
pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.",
rsc->id, node->details->uname);
} else if (local_node->count < limit) {
pe_rsc_trace(rsc, "%s can run on %s (already running %d)",
rsc->id, node->details->uname, local_node->count);
return local_node;
} else {
pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)",
rsc->id, node->details->uname, local_node->count, limit);
}
bail:
if (node) {
common_update_score(rsc, node->details->id, -INFINITY);
}
return NULL;
}
static pe_node_t *
allocate_instance(pe_resource_t *rsc, pe_node_t *prefer, gboolean all_coloc,
int limit, pe_working_set_t *data_set)
{
pe_node_t *chosen = NULL;
GHashTable *backup = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)",
rsc->id, (prefer? prefer->details->uname: "none"),
(all_coloc? "all" : "some"));
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return rsc->fns->location(rsc, NULL, FALSE);
} else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
/* Only include positive colocation preferences of dependent resources
* if not every node will get a copy of the clone
*/
append_parent_colocation(rsc->parent, rsc, all_coloc);
if (prefer) {
pe_node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
if (local_prefer == NULL || local_prefer->weight < 0) {
pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id,
prefer->details->uname);
return NULL;
}
}
can_run_instance(rsc, NULL, limit);
backup = pcmk__copy_node_table(rsc->allowed_nodes);
pe_rsc_trace(rsc, "Allocating instance %s", rsc->id);
chosen = rsc->cmds->allocate(rsc, prefer, data_set);
if (chosen && prefer && (chosen->details != prefer->details)) {
crm_info("Not pre-allocating %s to %s because %s is better",
rsc->id, prefer->details->uname, chosen->details->uname);
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = backup;
pcmk__unassign_resource(rsc);
chosen = NULL;
backup = NULL;
}
if (chosen) {
pe_node_t *local_node = pcmk__top_allowed_node(rsc, chosen);
if (local_node) {
local_node->count++;
} else if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
/* what to do? we can't enforce per-node limits in this case */
pcmk__config_err("%s not found in %s (list of %d)",
chosen->details->id, rsc->parent->id,
g_hash_table_size(rsc->parent->allowed_nodes));
}
}
if(backup) {
g_hash_table_destroy(backup);
}
return chosen;
}
static void
append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all)
{
GList *gIter = NULL;
gIter = rsc->rsc_cons;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data;
if (all || cons->score < 0 || cons->score == INFINITY) {
child->rsc_cons = g_list_prepend(child->rsc_cons, cons);
}
}
gIter = rsc->rsc_cons_lhs;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(cons, child)) {
continue;
}
if (all || cons->score < 0) {
child->rsc_cons_lhs = g_list_prepend(child->rsc_cons_lhs, cons);
}
}
}
void
distribute_children(pe_resource_t *rsc, GList *children, GList *nodes,
int max, int per_host_max, pe_working_set_t * data_set);
void
distribute_children(pe_resource_t *rsc, GList *children, GList *nodes,
int max, int per_host_max, pe_working_set_t * data_set)
{
int loop_max = 0;
int allocated = 0;
int available_nodes = 0;
bool all_coloc = false;
/* count now tracks the number of clones currently allocated */
for(GList *nIter = nodes; nIter != NULL; nIter = nIter->next) {
pe_node_t *node = nIter->data;
node->count = 0;
- if (pcmk__node_available(node)) {
+ if (pcmk__node_available(node, false, false)) {
available_nodes++;
}
}
all_coloc = (max < available_nodes) ? true : false;
if(available_nodes) {
loop_max = max / available_nodes;
}
if (loop_max < 1) {
loop_max = 1;
}
pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)",
max, rsc->id, available_nodes, per_host_max, loop_max);
/* Pre-allocate as many instances as we can to their current location */
for (GList *gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
pe_node_t *child_node = NULL;
pe_node_t *local_node = NULL;
if ((child->running_on == NULL)
|| !pcmk_is_set(child->flags, pe_rsc_provisional)
|| pcmk_is_set(child->flags, pe_rsc_failed)) {
continue;
}
child_node = pe__current_node(child);
local_node = pcmk__top_allowed_node(child, child_node);
pe_rsc_trace(rsc,
"Checking pre-allocation of %s to %s (%d remaining of %d)",
child->id, child_node->details->uname, max - allocated,
max);
- if (!pcmk__node_available(child_node) || (child_node->weight < 0)) {
+ if (!pcmk__node_available(child_node, true, false)) {
pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s",
child_node->details->uname, child->id);
continue;
}
if ((local_node != NULL) && (local_node->count >= loop_max)) {
pe_rsc_trace(rsc,
"Not pre-allocating because %s already allocated "
"optimal instances", child_node->details->uname);
continue;
}
if (allocate_instance(child, child_node, all_coloc, per_host_max,
data_set)) {
pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id,
child_node->details->uname);
allocated++;
}
}
pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max);
for (GList *gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
if (child->running_on != NULL) {
pe_node_t *child_node = pe__current_node(child);
pe_node_t *local_node = pcmk__top_allowed_node(child, child_node);
if (local_node == NULL) {
crm_err("%s is running on %s which isn't allowed",
child->id, child_node->details->uname);
}
}
if (!pcmk_is_set(child->flags, pe_rsc_provisional)) {
} else if (allocated >= max) {
pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max);
resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set);
} else {
if (allocate_instance(child, NULL, all_coloc, per_host_max,
data_set)) {
allocated++;
}
}
}
pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d",
allocated, rsc->id, max);
}
pe_node_t *
pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *prefer,
pe_working_set_t *data_set)
{
GList *nodes = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return NULL;
} else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__add_promotion_scores(rsc);
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
/* This information is used by pcmk__cmp_instance() when deciding the order
* in which to assign clone instances to nodes.
*/
for (GList *gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pe_rsc_trace(rsc, "%s: Allocating %s first",
rsc->id, constraint->primary->id);
constraint->primary->cmds->allocate(constraint->primary, prefer,
data_set);
}
for (GList *gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(constraint, NULL)) {
continue;
}
rsc->allowed_nodes = constraint->dependent->cmds->merge_weights(
constraint->dependent, rsc->id, rsc->allowed_nodes,
constraint->node_attribute, (float)constraint->score / INFINITY,
(pe_weights_rollback | pe_weights_positive));
}
pe__show_node_weights(!pcmk_is_set(data_set->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, data_set);
nodes = g_hash_table_get_values(rsc->allowed_nodes);
nodes = pcmk__sort_nodes(nodes, NULL, data_set);
rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance);
distribute_children(rsc, rsc->children, nodes, clone_data->clone_max, clone_data->clone_node_max, data_set);
g_list_free(nodes);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__set_instance_roles(rsc, data_set);
}
pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating);
pe_rsc_trace(rsc, "Done allocating %s", rsc->id);
return NULL;
}
static void
clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting,
gboolean * active)
{
GList *gIter = NULL;
if (rsc->children) {
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
clone_update_pseudo_status(child, stopping, starting, active);
}
return;
}
CRM_ASSERT(active != NULL);
CRM_ASSERT(starting != NULL);
CRM_ASSERT(stopping != NULL);
if (rsc->running_on) {
*active = TRUE;
}
gIter = rsc->actions;
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (*starting && *stopping) {
return;
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid);
continue;
} else if (!pcmk_any_flags_set(action->flags,
pe_action_pseudo|pe_action_runnable)) {
pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid);
continue;
} else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) {
pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid);
*stopping = TRUE;
} else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) {
if (!pcmk_is_set(action->flags, pe_action_runnable)) {
pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d",
action->uuid,
pcmk_is_set(action->flags, pe_action_runnable),
pcmk_is_set(action->flags, pe_action_pseudo));
} else {
pe_rsc_trace(rsc, "Starting due to: %s", action->uuid);
pe_rsc_trace(rsc, "%s run=%d, pseudo=%d",
action->uuid,
pcmk_is_set(action->flags, pe_action_runnable),
pcmk_is_set(action->flags, pe_action_pseudo));
*starting = TRUE;
}
}
}
}
static pe_action_t *
find_rsc_action(pe_resource_t *rsc, const char *task)
{
pe_action_t *match = NULL;
GList *actions = pe__resource_actions(rsc, NULL, task, FALSE);
for (GList *item = actions; item != NULL; item = item->next) {
pe_action_t *op = (pe_action_t *) item->data;
if (!pcmk_is_set(op->flags, pe_action_optional)) {
if (match != NULL) {
// More than one match, don't return any
match = NULL;
break;
}
match = op;
}
}
g_list_free(actions);
return match;
}
static void
child_ordering_constraints(pe_resource_t * rsc, pe_working_set_t * data_set)
{
pe_action_t *stop = NULL;
pe_action_t *start = NULL;
pe_action_t *last_stop = NULL;
pe_action_t *last_start = NULL;
GList *gIter = NULL;
- clone_variant_data_t *clone_data = NULL;
-
- get_clone_variant_data(clone_data, rsc);
- if (clone_data->ordered == FALSE) {
+ if (!pe__clone_is_ordered(rsc)) {
return;
}
+
/* we have to maintain a consistent sorted child list when building order constraints */
rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number);
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
stop = find_rsc_action(child, RSC_STOP);
if (stop) {
if (last_stop) {
/* child/child relative stop */
order_actions(stop, last_stop, pe_order_optional);
}
last_stop = stop;
}
start = find_rsc_action(child, RSC_START);
if (start) {
if (last_start) {
/* child/child relative start */
order_actions(last_start, start, pe_order_optional);
}
last_start = start;
}
}
}
void
clone_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
clone_create_pseudo_actions(rsc, rsc->children, &clone_data->start_notify, &clone_data->stop_notify,data_set);
child_ordering_constraints(rsc, data_set);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
create_promotable_actions(rsc, data_set);
}
}
void
clone_create_pseudo_actions(
pe_resource_t * rsc, GList *children, notify_data_t **start_notify, notify_data_t **stop_notify, pe_working_set_t * data_set)
{
gboolean child_active = FALSE;
gboolean child_starting = FALSE;
gboolean child_stopping = FALSE;
gboolean allow_dependent_migrations = TRUE;
pe_action_t *stop = NULL;
pe_action_t *stopped = NULL;
pe_action_t *start = NULL;
pe_action_t *started = NULL;
pe_rsc_trace(rsc, "Creating actions for %s", rsc->id);
for (GList *gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
gboolean starting = FALSE;
gboolean stopping = FALSE;
child_rsc->cmds->create_actions(child_rsc, data_set);
clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active);
if (stopping && starting) {
allow_dependent_migrations = FALSE;
}
child_stopping |= stopping;
child_starting |= starting;
}
/* start */
start = pcmk__new_rsc_pseudo_action(rsc, RSC_START, !child_starting, true);
started = pcmk__new_rsc_pseudo_action(rsc, RSC_STARTED, !child_starting,
false);
started->priority = INFINITY;
if (child_active || child_starting) {
pe__set_action_flags(started, pe_action_runnable);
}
if (start_notify != NULL && *start_notify == NULL) {
*start_notify = pcmk__clone_notif_pseudo_ops(rsc, RSC_START, start,
started);
}
/* stop */
stop = pcmk__new_rsc_pseudo_action(rsc, RSC_STOP, !child_stopping, true);
stopped = pcmk__new_rsc_pseudo_action(rsc, RSC_STOPPED, !child_stopping,
true);
stopped->priority = INFINITY;
if (allow_dependent_migrations) {
pe__set_action_flags(stop, pe_action_migrate_runnable);
}
if (stop_notify != NULL && *stop_notify == NULL) {
*stop_notify = pcmk__clone_notif_pseudo_ops(rsc, RSC_STOP, stop,
stopped);
if (start_notify && *start_notify && *stop_notify) {
order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional);
}
}
}
void
clone_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
{
pe_resource_t *last_rsc = NULL;
GList *gIter;
- clone_variant_data_t *clone_data = NULL;
-
- get_clone_variant_data(clone_data, rsc);
+ bool ordered = pe__clone_is_ordered(rsc);
pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id);
pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START,
pe_order_optional, data_set);
pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED,
pe_order_runnable_left, data_set);
pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED,
pe_order_runnable_left, data_set);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP,
pe_order_optional, data_set);
pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE,
pe_order_runnable_left, data_set);
}
- if (clone_data->ordered) {
+ if (ordered) {
/* we have to maintain a consistent sorted child list when building order constraints */
rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number);
}
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->internal_constraints(child_rsc, data_set);
pcmk__order_starts(rsc, child_rsc,
pe_order_runnable_left|pe_order_implies_first_printed,
data_set);
pcmk__order_resource_actions(child_rsc, RSC_START, rsc, RSC_STARTED,
pe_order_implies_then_printed, data_set);
- if (clone_data->ordered && last_rsc) {
+ if (ordered && (last_rsc != NULL)) {
pcmk__order_starts(last_rsc, child_rsc, pe_order_optional,
data_set);
}
pcmk__order_stops(rsc, child_rsc, pe_order_implies_first_printed,
data_set);
pcmk__order_resource_actions(child_rsc, RSC_STOP, rsc, RSC_STOPPED,
pe_order_implies_then_printed, data_set);
- if (clone_data->ordered && last_rsc) {
+ if (ordered && (last_rsc != NULL)) {
pcmk__order_stops(child_rsc, last_rsc, pe_order_optional, data_set);
}
last_rsc = child_rsc;
}
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
promotable_constraints(rsc, data_set);
}
}
gboolean
is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current)
{
pe_node_t *node = NULL;
enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current);
CRM_CHECK(child_rsc && local_node, return FALSE);
if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
/* We only want instances that haven't failed */
node = child_rsc->fns->location(child_rsc, NULL, current);
}
if (filter != RSC_ROLE_UNKNOWN && next_role != filter) {
crm_trace("Filtered %s", child_rsc->id);
return FALSE;
}
if (node && (node->details == local_node->details)) {
return TRUE;
} else if (node) {
crm_trace("%s - %s vs %s", child_rsc->id, node->details->uname,
local_node->details->uname);
} else {
crm_trace("%s - not allocated %d", child_rsc->id, current);
}
return FALSE;
}
pe_resource_t *
find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc,
enum rsc_role_e filter, gboolean current,
pe_working_set_t *data_set)
{
pe_resource_t *pair = NULL;
GList *gIter = NULL;
GList *scratch = NULL;
pe_node_t *local_node = NULL;
local_node = local_child->fns->location(local_child, NULL, current);
if (local_node) {
return find_compatible_child_by_node(local_child, local_node, rsc, filter, current);
}
scratch = g_hash_table_get_values(local_child->allowed_nodes);
scratch = pcmk__sort_nodes(scratch, NULL, data_set);
gIter = scratch;
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
pair = find_compatible_child_by_node(local_child, node, rsc, filter, current);
if (pair) {
goto done;
}
}
pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id);
done:
g_list_free(scratch);
return pair;
}
void
clone_rsc_colocation_lh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
/* -- Never called --
*
* Instead we add the colocation constraints to the child and call from there
*/
CRM_ASSERT(FALSE);
}
void
clone_rsc_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
GList *gIter = NULL;
gboolean do_interleave = FALSE;
const char *interleave_s = NULL;
CRM_CHECK(constraint != NULL, return);
CRM_CHECK(dependent != NULL,
pe_err("dependent was NULL for %s", constraint->id); return);
CRM_CHECK(primary != NULL,
pe_err("primary was NULL for %s", constraint->id); return);
CRM_CHECK(dependent->variant == pe_native, return);
pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d",
constraint->id, dependent->id, primary->id, constraint->score);
if (pcmk_is_set(primary->flags, pe_rsc_promotable)) {
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
pe_rsc_trace(primary, "%s is still provisional", primary->id);
return;
} else if (constraint->primary_role == RSC_ROLE_UNKNOWN) {
pe_rsc_trace(primary, "Handling %s as a clone colocation",
constraint->id);
} else {
promotable_colocation_rh(dependent, primary, constraint, data_set);
return;
}
}
/* only the LHS side needs to be labeled as interleave */
interleave_s = g_hash_table_lookup(constraint->dependent->meta,
XML_RSC_ATTR_INTERLEAVE);
if (crm_is_true(interleave_s)
&& (constraint->dependent->variant > pe_group)) {
/* @TODO Do we actually care about multiple primary copies sharing a
* dependent copy anymore?
*/
if (copies_per_node(constraint->dependent) != copies_per_node(constraint->primary)) {
pcmk__config_err("Cannot interleave %s and %s because they do not "
"support the same number of instances per node",
constraint->dependent->id,
constraint->primary->id);
} else {
do_interleave = TRUE;
}
}
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
pe_rsc_trace(primary, "%s is still provisional", primary->id);
return;
} else if (do_interleave) {
pe_resource_t *primary_instance = NULL;
primary_instance = find_compatible_child(dependent, primary,
RSC_ROLE_UNKNOWN, FALSE,
data_set);
if (primary_instance != NULL) {
pe_rsc_debug(primary, "Pairing %s with %s",
dependent->id, primary_instance->id);
dependent->cmds->rsc_colocation_lh(dependent, primary_instance,
constraint, data_set);
} else if (constraint->score >= INFINITY) {
crm_notice("Cannot pair %s with instance of %s",
dependent->id, primary->id);
pcmk__assign_resource(dependent, NULL, true);
} else {
pe_rsc_debug(primary, "Cannot pair %s with instance of %s",
dependent->id, primary->id);
}
return;
} else if (constraint->score >= INFINITY) {
GList *affected_nodes = NULL;
gIter = primary->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
pe_rsc_trace(primary, "Allowing %s: %s %d",
constraint->id, chosen->details->uname,
chosen->weight);
affected_nodes = g_list_prepend(affected_nodes, chosen);
}
}
node_list_exclude(dependent->allowed_nodes, affected_nodes, FALSE);
g_list_free(affected_nodes);
return;
}
gIter = primary->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->rsc_colocation_rh(dependent, child_rsc, constraint,
data_set);
}
}
enum action_tasks
clone_child_action(pe_action_t * action)
{
enum action_tasks result = no_action;
pe_resource_t *child = (pe_resource_t *) action->rsc->children->data;
if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) {
/* Find the action we're notifying about instead */
int stop = 0;
char *key = action->uuid;
int lpc = strlen(key);
for (; lpc > 0; lpc--) {
if (key[lpc] == '_' && stop == 0) {
stop = lpc;
} else if (key[lpc] == '_') {
char *task_mutable = NULL;
lpc++;
task_mutable = strdup(key + lpc);
task_mutable[stop - lpc] = 0;
crm_trace("Extracted action '%s' from '%s'", task_mutable, key);
result = get_complex_task(child, task_mutable, TRUE);
free(task_mutable);
break;
}
}
} else {
result = get_complex_task(child, action->task, TRUE);
}
return result;
}
#define pe__clear_action_summary_flags(flags, action, flag) do { \
flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Action summary", action->rsc->id, \
flags, flag, #flag); \
} while (0)
enum pe_action_flags
summary_action_flags(pe_action_t * action, GList *children, pe_node_t * node)
{
GList *gIter = NULL;
gboolean any_runnable = FALSE;
gboolean check_runnable = TRUE;
enum action_tasks task = clone_child_action(action);
enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo);
const char *task_s = task2text(task);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_action_t *child_action = NULL;
pe_resource_t *child = (pe_resource_t *) gIter->data;
child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node);
pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id,
node ? node->details->uname : "none", child_action?child_action->uuid:"NA");
if (child_action) {
enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node);
if (pcmk_is_set(flags, pe_action_optional)
&& !pcmk_is_set(child_flags, pe_action_optional)) {
pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid,
child_action->uuid);
pe__clear_action_summary_flags(flags, action, pe_action_optional);
pe__clear_action_flags(action, pe_action_optional);
}
if (pcmk_is_set(child_flags, pe_action_runnable)) {
any_runnable = TRUE;
}
}
}
if (check_runnable && any_runnable == FALSE) {
pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid);
pe__clear_action_summary_flags(flags, action, pe_action_runnable);
if (node == NULL) {
pe__clear_action_flags(action, pe_action_runnable);
}
}
return flags;
}
enum pe_action_flags
clone_action_flags(pe_action_t * action, pe_node_t * node)
{
return summary_action_flags(action, action->rsc->children, node);
}
void
clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
{
GList *gIter = rsc->children;
pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id);
pcmk__apply_location(constraint, rsc);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->rsc_location(child_rsc, constraint);
}
}
void
clone_expand(pe_resource_t * rsc, pe_working_set_t * data_set)
{
GList *gIter = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
g_list_foreach(rsc->actions, (GFunc) rsc->cmds->action_flags, NULL);
pcmk__create_notifications(rsc, clone_data->start_notify);
pcmk__create_notifications(rsc, clone_data->stop_notify);
pcmk__create_notifications(rsc, clone_data->promote_notify);
pcmk__create_notifications(rsc, clone_data->demote_notify);
/* Now that the notifcations have been created we can expand the children */
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->expand(child_rsc, data_set);
}
native_expand(rsc, data_set);
/* The notifications are in the graph now, we can destroy the notify_data */
pcmk__free_notification_data(clone_data->demote_notify);
clone_data->demote_notify = NULL;
pcmk__free_notification_data(clone_data->stop_notify);
clone_data->stop_notify = NULL;
pcmk__free_notification_data(clone_data->start_notify);
clone_data->start_notify = NULL;
pcmk__free_notification_data(clone_data->promote_notify);
clone_data->promote_notify = NULL;
}
// Check whether a resource or any of its children is known on node
static bool
rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node)
{
if (rsc->children) {
for (GList *child_iter = rsc->children; child_iter != NULL;
child_iter = child_iter->next) {
pe_resource_t *child = (pe_resource_t *) child_iter->data;
if (rsc_known_on(child, node)) {
return TRUE;
}
}
} else if (rsc->known_on) {
GHashTableIter iter;
pe_node_t *known_node = NULL;
g_hash_table_iter_init(&iter, rsc->known_on);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) {
if (node->details == known_node->details) {
return TRUE;
}
}
}
return FALSE;
}
// Look for an instance of clone that is known on node
static pe_resource_t *
find_instance_on(const pe_resource_t *clone, const pe_node_t *node)
{
for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
if (rsc_known_on(child, node)) {
return child;
}
}
return NULL;
}
// For unique clones, probe each instance separately
static gboolean
probe_unique_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete,
gboolean force, pe_working_set_t *data_set)
{
gboolean any_created = FALSE;
for (GList *child_iter = rsc->children; child_iter != NULL;
child_iter = child_iter->next) {
pe_resource_t *child = (pe_resource_t *) child_iter->data;
any_created |= child->cmds->create_probe(child, node, complete, force,
data_set);
}
return any_created;
}
// For anonymous clones, only a single instance needs to be probed
static gboolean
probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *complete, gboolean force,
pe_working_set_t *data_set)
{
// First, check if we probed an instance on this node last time
pe_resource_t *child = find_instance_on(rsc, node);
// Otherwise, check if we plan to start an instance on this node
if (child == NULL) {
for (GList *child_iter = rsc->children; child_iter && !child;
child_iter = child_iter->next) {
pe_node_t *local_node = NULL;
pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data;
if (child_rsc) { /* make clang analyzer happy */
local_node = child_rsc->fns->location(child_rsc, NULL, FALSE);
if (local_node && (local_node->details == node->details)) {
child = child_rsc;
}
}
}
}
// Otherwise, use the first clone instance
if (child == NULL) {
child = rsc->children->data;
}
CRM_ASSERT(child);
return child->cmds->create_probe(child, node, complete, force, data_set);
}
gboolean
clone_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete,
gboolean force, pe_working_set_t * data_set)
{
gboolean any_created = FALSE;
CRM_ASSERT(rsc);
rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number);
if (rsc->children == NULL) {
pe_warn("Clone %s has no children", rsc->id);
return FALSE;
}
if (rsc->exclusive_discover) {
pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) {
/* exclusive discover is enabled and this node is not marked
* as a node this resource should be discovered on
*
* remove the node from allowed_nodes so that the
* notification contains only nodes that we might ever run
* on
*/
g_hash_table_remove(rsc->allowed_nodes, node->details->id);
/* Bit of a shortcut - might as well take it */
return FALSE;
}
}
if (pcmk_is_set(rsc->flags, pe_rsc_unique)) {
any_created = probe_unique_clone(rsc, node, complete, force, data_set);
} else {
any_created = probe_anonymous_clone(rsc, node, complete, force,
data_set);
}
return any_created;
}
void
clone_append_meta(pe_resource_t * rsc, xmlNode * xml)
{
char *name = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
name = crm_meta_name(XML_RSC_ATTR_UNIQUE);
crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_unique));
free(name);
name = crm_meta_name(XML_RSC_ATTR_NOTIFY);
crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_notify));
free(name);
name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX);
crm_xml_add_int(xml, name, clone_data->clone_max);
free(name);
name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX);
crm_xml_add_int(xml, name, clone_data->clone_node_max);
free(name);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX);
crm_xml_add_int(xml, name, clone_data->promoted_max);
free(name);
name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX);
crm_xml_add_int(xml, name, clone_data->promoted_node_max);
free(name);
/* @COMPAT Maintain backward compatibility with resource agents that
* expect the old names (deprecated since 2.0.0).
*/
name = crm_meta_name(PCMK_XE_PROMOTED_MAX_LEGACY);
crm_xml_add_int(xml, name, clone_data->promoted_max);
free(name);
name = crm_meta_name(PCMK_XE_PROMOTED_NODE_MAX_LEGACY);
crm_xml_add_int(xml, name, clone_data->promoted_node_max);
free(name);
}
}
// Clone implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__clone_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization)
{
bool existing = false;
pe_resource_t *child = NULL;
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return;
}
// Look for any child already existing in the list
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
child = (pe_resource_t *) iter->data;
if (g_list_find(all_rscs, child)) {
existing = true; // Keep checking remaining children
} else {
// If this is a clone of a group, look for group's members
for (GList *member_iter = child->children; member_iter != NULL;
member_iter = member_iter->next) {
pe_resource_t *member = (pe_resource_t *) member_iter->data;
if (g_list_find(all_rscs, member) != NULL) {
// Add *child's* utilization, not group member's
child->cmds->add_utilization(child, orig_rsc, all_rscs,
utilization);
existing = true;
break;
}
}
}
}
if (!existing && (rsc->children != NULL)) {
// If nothing was found, still add first child's utilization
child = (pe_resource_t *) rsc->children->data;
child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization);
}
}
// Clone implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__clone_shutdown_lock(pe_resource_t *rsc)
{
return; // Clones currently don't support shutdown locks
}
diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c
index 3e7cf160fc..79effebc57 100644
--- a/lib/pacemaker/pcmk_sched_native.c
+++ b/lib/pacemaker/pcmk_sched_native.c
@@ -1,2640 +1,2643 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/pengine/rules.h>
#include <crm/msg_xml.h>
#include <crm/common/xml_internal.h>
#include <pacemaker-internal.h>
#include <crm/services.h>
#include "libpacemaker_private.h"
// The controller removes the resource from the CIB, making this redundant
// #define DELETE_THEN_REFRESH 1
#define INFINITY_HACK (INFINITY * -100)
#define VARIANT_NATIVE 1
#include <lib/pengine/variant.h>
extern bool pcmk__is_daemon;
static void Recurring(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node,
pe_working_set_t *data_set);
static void RecurringOp(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node,
xmlNode *operation, pe_working_set_t *data_set);
static void Recurring_Stopped(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node,
pe_working_set_t *data_set);
static void RecurringOp_Stopped(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node,
xmlNode *operation, pe_working_set_t *data_set);
gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set);
gboolean StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set);
gboolean StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set);
gboolean DemoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set);
gboolean PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional,
pe_working_set_t * data_set);
gboolean RoleError(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set);
gboolean NullOp(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set);
/* This array says what the *next* role should be when transitioning from one
* role to another. For example going from Stopped to Promoted, the next role is
* RSC_ROLE_UNPROMOTED, because the resource must be started before being promoted.
* The current state then becomes Started, which is fed into this array again,
* giving a next role of RSC_ROLE_PROMOTED.
*/
static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* Current state Next state*/
/* Unknown Stopped Started Unpromoted Promoted */
/* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED },
/* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_UNPROMOTED },
/* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_PROMOTED },
/* Unpromoted */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_UNPROMOTED, RSC_ROLE_PROMOTED },
/* Promoted */ { RSC_ROLE_STOPPED, RSC_ROLE_UNPROMOTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_PROMOTED },
};
typedef gboolean (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *next,
gboolean optional,
pe_working_set_t *data_set);
// This array picks the function needed to transition from one role to another
static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* Current state Next state */
/* Unknown Stopped Started Unpromoted Promoted */
/* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, },
/* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, },
/* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, },
/* Unpromoted */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, },
/* Promoted */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp, },
};
#define clear_node_weights_flags(nw_flags, nw_rsc, flags_to_clear) do { \
flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Node weight", (nw_rsc)->id, (flags), \
(flags_to_clear), #flags_to_clear); \
} while (0)
static bool
native_choose_node(pe_resource_t * rsc, pe_node_t * prefer, pe_working_set_t * data_set)
{
GList *nodes = NULL;
pe_node_t *chosen = NULL;
pe_node_t *best = NULL;
int multiple = 1;
int length = 0;
bool result = false;
pcmk__ban_insufficient_capacity(rsc, &prefer);
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return rsc->allocated_to != NULL;
}
// Sort allowed nodes by weight
if (rsc->allowed_nodes) {
length = g_hash_table_size(rsc->allowed_nodes);
}
if (length > 0) {
nodes = g_hash_table_get_values(rsc->allowed_nodes);
nodes = pcmk__sort_nodes(nodes, pe__current_node(rsc), data_set);
// First node in sorted list has the best score
best = g_list_nth_data(nodes, 0);
}
if (prefer && nodes) {
chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
if (chosen == NULL) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unknown",
prefer->details->uname, rsc->id);
/* Favor the preferred node as long as its weight is at least as good as
* the best allowed node's.
*
* An alternative would be to favor the preferred node even if the best
* node is better, when the best node's weight is less than INFINITY.
*/
- } else if ((chosen->weight < 0) || (chosen->weight < best->weight)) {
+ } else if (chosen->weight < best->weight) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
chosen->details->uname, rsc->id);
chosen = NULL;
- } else if (!pcmk__node_available(chosen)) {
+ } else if (!pcmk__node_available(chosen, true, false)) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable",
chosen->details->uname, rsc->id);
chosen = NULL;
} else {
pe_rsc_trace(rsc,
"Chose preferred node %s for %s (ignoring %d candidates)",
chosen->details->uname, rsc->id, length);
}
}
if ((chosen == NULL) && nodes) {
/* Either there is no preferred node, or the preferred node is not
* available, but there are other nodes allowed to run the resource.
*/
chosen = best;
pe_rsc_trace(rsc, "Chose node %s for %s from %d candidates",
chosen ? chosen->details->uname : "<none>", rsc->id, length);
if (!pe_rsc_is_unique_clone(rsc->parent)
- && chosen && (chosen->weight > 0) && pcmk__node_available(chosen)) {
+ && (chosen != NULL) && (chosen->weight > 0) // Zero not acceptable
+ && pcmk__node_available(chosen, false, false)) {
/* If the resource is already running on a node, prefer that node if
* it is just as good as the chosen node.
*
* We don't do this for unique clone instances, because
* distribute_children() has already assigned instances to their
* running nodes when appropriate, and if we get here, we don't want
* remaining unallocated instances to prefer a node that's already
* running another instance.
*/
pe_node_t *running = pe__current_node(rsc);
- if ((running != NULL) && !pcmk__node_available(running)) {
+ if ((running != NULL)
+ && !pcmk__node_available(running, true, false)) {
pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources",
rsc->id, running->details->uname);
+
} else if (running) {
for (GList *iter = nodes->next; iter; iter = iter->next) {
pe_node_t *tmp = (pe_node_t *) iter->data;
if (tmp->weight != chosen->weight) {
// The nodes are sorted by weight, so no more are equal
break;
}
if (tmp->details == running->details) {
// Scores are equal, so prefer the current node
chosen = tmp;
}
multiple++;
}
}
}
}
if (multiple > 1) {
static char score[33];
int log_level = (chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO;
score2char_stack(chosen->weight, score, sizeof(score));
do_crm_log(log_level,
"Chose node %s for %s from %d nodes with score %s",
chosen->details->uname, rsc->id, multiple, score);
}
result = pcmk__assign_primitive(rsc, chosen, false);
g_list_free(nodes);
return result;
}
/*!
* \internal
* \brief Find score of highest-scored node that matches colocation attribute
*
* \param[in] rsc Resource whose allowed nodes should be searched
* \param[in] attr Colocation attribute name (must not be NULL)
* \param[in] value Colocation attribute value to require
*/
static int
best_node_score_matching_attr(const pe_resource_t *rsc, const char *attr,
const char *value)
{
GHashTableIter iter;
pe_node_t *node = NULL;
int best_score = -INFINITY;
const char *best_node = NULL;
// Find best allowed node with matching attribute
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
- if ((node->weight > best_score) && pcmk__node_available(node)
+ if ((node->weight > best_score) && pcmk__node_available(node, false, false)
&& pcmk__str_eq(value, pe_node_attribute_raw(node, attr), pcmk__str_casei)) {
best_score = node->weight;
best_node = node->details->uname;
}
}
if (!pcmk__str_eq(attr, CRM_ATTR_UNAME, pcmk__str_casei)) {
if (best_node == NULL) {
crm_info("No allowed node for %s matches node attribute %s=%s",
rsc->id, attr, value);
} else {
crm_info("Allowed node %s for %s had best score (%d) "
"of those matching node attribute %s=%s",
best_node, rsc->id, best_score, attr, value);
}
}
return best_score;
}
/*!
* \internal
* \brief Add resource's colocation matches to current node allocation scores
*
* For each node in a given table, if any of a given resource's allowed nodes
* have a matching value for the colocation attribute, add the highest of those
* nodes' scores to the node's score.
*
* \param[in,out] nodes Hash table of nodes with allocation scores so far
* \param[in] rsc Resource whose allowed nodes should be compared
* \param[in] attr Colocation attribute that must match (NULL for default)
* \param[in] factor Factor by which to multiply scores being added
* \param[in] only_positive Whether to add only positive scores
*/
static void
add_node_scores_matching_attr(GHashTable *nodes, const pe_resource_t *rsc,
const char *attr, float factor,
bool only_positive)
{
GHashTableIter iter;
pe_node_t *node = NULL;
if (attr == NULL) {
attr = CRM_ATTR_UNAME;
}
// Iterate through each node
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
float weight_f = 0;
int weight = 0;
int score = 0;
int new_score = 0;
score = best_node_score_matching_attr(rsc, attr,
pe_node_attribute_raw(node, attr));
if ((factor < 0) && (score < 0)) {
/* Negative preference for a node with a negative score
* should not become a positive preference.
*
* @TODO Consider filtering only if weight is -INFINITY
*/
crm_trace("%s: Filtering %d + %f * %d (double negative disallowed)",
node->details->uname, node->weight, factor, score);
continue;
}
if (node->weight == INFINITY_HACK) {
crm_trace("%s: Filtering %d + %f * %d (node was marked unusable)",
node->details->uname, node->weight, factor, score);
continue;
}
weight_f = factor * score;
// Round the number; see http://c-faq.com/fp/round.html
weight = (int) ((weight_f < 0)? (weight_f - 0.5) : (weight_f + 0.5));
/* Small factors can obliterate the small scores that are often actually
* used in configurations. If the score and factor are nonzero, ensure
* that the result is nonzero as well.
*/
if ((weight == 0) && (score != 0)) {
if (factor > 0.0) {
weight = 1;
} else if (factor < 0.0) {
weight = -1;
}
}
new_score = pcmk__add_scores(weight, node->weight);
if (only_positive && (new_score < 0) && (node->weight > 0)) {
crm_trace("%s: Filtering %d + %f * %d = %d "
"(negative disallowed, marking node unusable)",
node->details->uname, node->weight, factor, score,
new_score);
node->weight = INFINITY_HACK;
continue;
}
if (only_positive && (new_score < 0) && (node->weight == 0)) {
crm_trace("%s: Filtering %d + %f * %d = %d (negative disallowed)",
node->details->uname, node->weight, factor, score,
new_score);
continue;
}
crm_trace("%s: %d + %f * %d = %d", node->details->uname,
node->weight, factor, score, new_score);
node->weight = new_score;
}
}
static inline bool
is_nonempty_group(pe_resource_t *rsc)
{
return rsc && (rsc->variant == pe_group) && (rsc->children != NULL);
}
/*!
* \internal
* \brief Incorporate colocation constraint scores into node weights
*
* \param[in,out] rsc Resource being placed
* \param[in] primary_id ID of primary resource in constraint
* \param[in,out] nodes Nodes, with scores as of this point
* \param[in] attr Colocation attribute (ID by default)
* \param[in] factor Incorporate scores multiplied by this factor
* \param[in] flags Bitmask of enum pe_weights values
*
* \return Nodes, with scores modified by this constraint
* \note This function assumes ownership of the nodes argument. The caller
* should free the returned copy rather than the original.
*/
GHashTable *
pcmk__native_merge_weights(pe_resource_t *rsc, const char *primary_id,
GHashTable *nodes, const char *attr, float factor,
uint32_t flags)
{
GHashTable *work = NULL;
// Avoid infinite recursion
if (pcmk_is_set(rsc->flags, pe_rsc_merging)) {
pe_rsc_info(rsc, "%s: Breaking dependency loop at %s",
primary_id, rsc->id);
return nodes;
}
pe__set_resource_flags(rsc, pe_rsc_merging);
if (pcmk_is_set(flags, pe_weights_init)) {
if (is_nonempty_group(rsc)) {
GList *last = g_list_last(rsc->children);
pe_resource_t *last_rsc = last->data;
pe_rsc_trace(rsc, "%s: Merging scores from group %s "
"using last member %s (at %.6f)",
primary_id, rsc->id, last_rsc->id, factor);
work = pcmk__native_merge_weights(last_rsc, primary_id, NULL, attr,
factor, flags);
} else {
work = pcmk__copy_node_table(rsc->allowed_nodes);
}
clear_node_weights_flags(flags, rsc, pe_weights_init);
} else if (is_nonempty_group(rsc)) {
/* The first member of the group will recursively incorporate any
* constraints involving other members (including the group internal
* colocation).
*
* @TODO The indirect colocations from the dependent group's other
* members will be incorporated at full strength rather than by
* factor, so the group's combined stickiness will be treated as
* (factor + (#members - 1)) * stickiness. It is questionable what
* the right approach should be.
*/
pe_rsc_trace(rsc, "%s: Merging scores from first member of group %s "
"(at %.6f)", primary_id, rsc->id, factor);
work = pcmk__copy_node_table(nodes);
work = pcmk__native_merge_weights(rsc->children->data, primary_id, work,
attr, factor, flags);
} else {
pe_rsc_trace(rsc, "%s: Merging scores from %s (at %.6f)",
primary_id, rsc->id, factor);
work = pcmk__copy_node_table(nodes);
add_node_scores_matching_attr(work, rsc, attr, factor,
pcmk_is_set(flags, pe_weights_positive));
}
if (pcmk__any_node_available(work)) {
GList *gIter = NULL;
int multiplier = (factor < 0)? -1 : 1;
if (pcmk_is_set(flags, pe_weights_forward)) {
gIter = rsc->rsc_cons;
pe_rsc_trace(rsc,
"Checking additional %d optional '%s with' constraints",
g_list_length(gIter), rsc->id);
} else if (is_nonempty_group(rsc)) {
pe_resource_t *last_rsc = g_list_last(rsc->children)->data;
gIter = last_rsc->rsc_cons_lhs;
pe_rsc_trace(rsc, "Checking additional %d optional 'with group %s' "
"constraints using last member %s",
g_list_length(gIter), rsc->id, last_rsc->id);
} else {
gIter = rsc->rsc_cons_lhs;
pe_rsc_trace(rsc,
"Checking additional %d optional 'with %s' constraints",
g_list_length(gIter), rsc->id);
}
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *other = NULL;
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (pcmk_is_set(flags, pe_weights_forward)) {
other = constraint->primary;
} else if (!pcmk__colocation_has_influence(constraint, NULL)) {
continue;
} else {
other = constraint->dependent;
}
pe_rsc_trace(rsc, "Optionally merging score of '%s' constraint (%s with %s)",
constraint->id, constraint->dependent->id,
constraint->primary->id);
work = pcmk__native_merge_weights(other, primary_id, work,
constraint->node_attribute,
multiplier * constraint->score / (float) INFINITY,
flags|pe_weights_rollback);
pe__show_node_weights(true, NULL, primary_id, work, rsc->cluster);
}
} else if (pcmk_is_set(flags, pe_weights_rollback)) {
pe_rsc_info(rsc, "%s: Rolling back optional scores from %s",
primary_id, rsc->id);
g_hash_table_destroy(work);
pe__clear_resource_flags(rsc, pe_rsc_merging);
return nodes;
}
if (pcmk_is_set(flags, pe_weights_positive)) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, work);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (node->weight == INFINITY_HACK) {
node->weight = 1;
}
}
}
if (nodes) {
g_hash_table_destroy(nodes);
}
pe__clear_resource_flags(rsc, pe_rsc_merging);
return work;
}
pe_node_t *
pcmk__native_allocate(pe_resource_t *rsc, pe_node_t *prefer,
pe_working_set_t *data_set)
{
GList *gIter = NULL;
if (rsc->parent && !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) {
/* never allocate children on their own */
pe_rsc_debug(rsc, "Escalating allocation of %s to its parent: %s", rsc->id,
rsc->parent->id);
rsc->parent->cmds->allocate(rsc->parent, prefer, data_set);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return rsc->allocated_to;
}
if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
pe__show_node_weights(true, rsc, "Pre-alloc", rsc->allowed_nodes, data_set);
for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
GHashTable *archive = NULL;
pe_resource_t *primary = constraint->primary;
if ((constraint->dependent_role >= RSC_ROLE_PROMOTED)
|| (constraint->score < 0 && constraint->score > -INFINITY)) {
archive = pcmk__copy_node_table(rsc->allowed_nodes);
}
pe_rsc_trace(rsc,
"%s: Allocating %s first (constraint=%s score=%d role=%s)",
rsc->id, primary->id, constraint->id,
constraint->score, role2text(constraint->dependent_role));
primary->cmds->allocate(primary, NULL, data_set);
rsc->cmds->rsc_colocation_lh(rsc, primary, constraint, data_set);
if (archive && !pcmk__any_node_available(rsc->allowed_nodes)) {
pe_rsc_info(rsc, "%s: Rolling back scores from %s",
rsc->id, primary->id);
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = archive;
archive = NULL;
}
if (archive) {
g_hash_table_destroy(archive);
}
}
pe__show_node_weights(true, rsc, "Post-coloc", rsc->allowed_nodes, data_set);
for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(constraint, NULL)) {
continue;
}
pe_rsc_trace(rsc, "Merging score of '%s' constraint (%s with %s)",
constraint->id, constraint->dependent->id,
constraint->primary->id);
rsc->allowed_nodes = constraint->dependent->cmds->merge_weights(
constraint->dependent, rsc->id, rsc->allowed_nodes,
constraint->node_attribute, constraint->score / (float) INFINITY,
pe_weights_rollback);
}
if (rsc->next_role == RSC_ROLE_STOPPED) {
pe_rsc_trace(rsc, "Making sure %s doesn't get allocated", rsc->id);
/* make sure it doesn't come up again */
resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set);
} else if(rsc->next_role > rsc->role
&& !pcmk_is_set(data_set->flags, pe_flag_have_quorum)
&& data_set->no_quorum_policy == no_quorum_freeze) {
crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze",
rsc->id, role2text(rsc->role), role2text(rsc->next_role));
pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze");
}
pe__show_node_weights(!pcmk_is_set(data_set->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, data_set);
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)
&& !pcmk_is_set(data_set->flags, pe_flag_have_stonith_resource)) {
pe__clear_resource_flags(rsc, pe_rsc_managed);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
const char *reason = NULL;
pe_node_t *assign_to = NULL;
pe__set_next_role(rsc, rsc->role, "unmanaged");
assign_to = pe__current_node(rsc);
if (assign_to == NULL) {
reason = "inactive";
} else if (rsc->role == RSC_ROLE_PROMOTED) {
reason = "promoted";
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
reason = "failed";
} else {
reason = "active";
}
pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id,
(assign_to? assign_to->details->uname : "no node"), reason);
pcmk__assign_primitive(rsc, assign_to, true);
} else if (pcmk_is_set(data_set->flags, pe_flag_stop_everything)) {
pe_rsc_debug(rsc, "Forcing %s to stop", rsc->id);
pcmk__assign_primitive(rsc, NULL, true);
} else if (pcmk_is_set(rsc->flags, pe_rsc_provisional)
&& native_choose_node(rsc, prefer, data_set)) {
pe_rsc_trace(rsc, "Allocated resource %s to %s", rsc->id,
rsc->allocated_to->details->uname);
} else if (rsc->allocated_to == NULL) {
if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
} else if (rsc->running_on != NULL) {
pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id);
}
} else {
pe_rsc_debug(rsc, "Pre-Allocated resource %s to %s", rsc->id,
rsc->allocated_to->details->uname);
}
pe__clear_resource_flags(rsc, pe_rsc_allocating);
if (rsc->is_remote_node) {
pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
CRM_ASSERT(remote_node != NULL);
if (rsc->allocated_to && rsc->next_role != RSC_ROLE_STOPPED) {
crm_trace("Setting Pacemaker Remote node %s to ONLINE",
remote_node->details->id);
remote_node->details->online = TRUE;
/* We shouldn't consider an unseen remote-node unclean if we are going
* to try and connect to it. Otherwise we get an unnecessary fence */
if (remote_node->details->unseen == TRUE) {
remote_node->details->unclean = FALSE;
}
} else {
crm_trace("Setting Pacemaker Remote node %s to SHUTDOWN (next role %s, %sallocated)",
remote_node->details->id, role2text(rsc->next_role),
(rsc->allocated_to? "" : "un"));
remote_node->details->shutdown = TRUE;
}
}
return rsc->allocated_to;
}
static gboolean
is_op_dup(pe_resource_t *rsc, const char *name, guint interval_ms)
{
gboolean dup = FALSE;
const char *id = NULL;
const char *value = NULL;
xmlNode *operation = NULL;
guint interval2_ms = 0;
CRM_ASSERT(rsc);
for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL;
operation = pcmk__xe_next(operation)) {
if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) {
value = crm_element_value(operation, "name");
if (!pcmk__str_eq(value, name, pcmk__str_casei)) {
continue;
}
value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
interval2_ms = crm_parse_interval_spec(value);
if (interval_ms != interval2_ms) {
continue;
}
if (id == NULL) {
id = ID(operation);
} else {
pcmk__config_err("Operation %s is duplicate of %s (do not use "
"same name and interval combination more "
"than once per resource)", ID(operation), id);
dup = TRUE;
}
}
}
return dup;
}
static bool
op_cannot_recur(const char *name)
{
return pcmk__strcase_any_of(name, RSC_STOP, RSC_START, RSC_DEMOTE, RSC_PROMOTE, NULL);
}
static void
RecurringOp(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node,
xmlNode * operation, pe_working_set_t * data_set)
{
char *key = NULL;
const char *name = NULL;
const char *role = NULL;
const char *interval_spec = NULL;
const char *node_uname = node? node->details->uname : "n/a";
guint interval_ms = 0;
pe_action_t *mon = NULL;
gboolean is_optional = TRUE;
GList *possible_matches = NULL;
CRM_ASSERT(rsc);
/* Only process for the operations without role="Stopped" */
role = crm_element_value(operation, "role");
if (role && text2role(role) == RSC_ROLE_STOPPED) {
return;
}
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
interval_ms = crm_parse_interval_spec(interval_spec);
if (interval_ms == 0) {
return;
}
name = crm_element_value(operation, "name");
if (is_op_dup(rsc, name, interval_ms)) {
crm_trace("Not creating duplicate recurring action %s for %dms %s",
ID(operation), interval_ms, name);
return;
}
if (op_cannot_recur(name)) {
pcmk__config_err("Ignoring %s because action '%s' cannot be recurring",
ID(operation), name);
return;
}
key = pcmk__op_key(rsc->id, name, interval_ms);
if (find_rsc_op_entry(rsc, key) == NULL) {
crm_trace("Not creating recurring action %s for disabled resource %s",
ID(operation), rsc->id);
free(key);
return;
}
pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on %s",
ID(operation), rsc->id, role2text(rsc->next_role), node_uname);
if (start != NULL) {
pe_rsc_trace(rsc, "Marking %s %s due to %s", key,
pcmk_is_set(start->flags, pe_action_optional)? "optional" : "mandatory",
start->uuid);
is_optional = (rsc->cmds->action_flags(start, NULL) & pe_action_optional);
} else {
pe_rsc_trace(rsc, "Marking %s optional", key);
is_optional = TRUE;
}
/* start a monitor for an already active resource */
possible_matches = find_actions_exact(rsc->actions, key, node);
if (possible_matches == NULL) {
is_optional = FALSE;
pe_rsc_trace(rsc, "Marking %s mandatory: not active", key);
} else {
GList *gIter = NULL;
for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) {
pe_action_t *op = (pe_action_t *) gIter->data;
if (pcmk_is_set(op->flags, pe_action_reschedule)) {
is_optional = FALSE;
break;
}
}
g_list_free(possible_matches);
}
if (((rsc->next_role == RSC_ROLE_PROMOTED) && (role == NULL))
|| (role != NULL && text2role(role) != rsc->next_role)) {
int log_level = LOG_TRACE;
const char *result = "Ignoring";
if (is_optional) {
char *after_key = NULL;
pe_action_t *cancel_op = NULL;
// It's running, so cancel it
log_level = LOG_INFO;
result = "Cancelling";
cancel_op = pcmk__new_cancel_action(rsc, name, interval_ms, node);
switch (rsc->role) {
case RSC_ROLE_UNPROMOTED:
case RSC_ROLE_STARTED:
if (rsc->next_role == RSC_ROLE_PROMOTED) {
after_key = promote_key(rsc);
} else if (rsc->next_role == RSC_ROLE_STOPPED) {
after_key = stop_key(rsc);
}
break;
case RSC_ROLE_PROMOTED:
after_key = demote_key(rsc);
break;
default:
break;
}
if (after_key) {
pcmk__new_ordering(rsc, NULL, cancel_op, rsc, after_key, NULL,
pe_order_runnable_left, data_set);
}
}
do_crm_log(log_level, "%s action %s (%s vs. %s)",
result, key, role ? role : role2text(RSC_ROLE_UNPROMOTED),
role2text(rsc->next_role));
free(key);
return;
}
mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set);
key = mon->uuid;
if (is_optional) {
pe_rsc_trace(rsc, "%s\t %s (optional)", node_uname, mon->uuid);
}
if ((start == NULL) || !pcmk_is_set(start->flags, pe_action_runnable)) {
pe_rsc_debug(rsc, "%s\t %s (cancelled : start un-runnable)",
node_uname, mon->uuid);
pe__clear_action_flags(mon, pe_action_runnable);
} else if (node == NULL || node->details->online == FALSE || node->details->unclean) {
pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)",
node_uname, mon->uuid);
pe__clear_action_flags(mon, pe_action_runnable);
} else if (!pcmk_is_set(mon->flags, pe_action_optional)) {
pe_rsc_info(rsc, " Start recurring %s (%us) for %s on %s",
mon->task, interval_ms / 1000, rsc->id, node_uname);
}
if (rsc->next_role == RSC_ROLE_PROMOTED) {
char *running_promoted = pcmk__itoa(PCMK_OCF_RUNNING_PROMOTED);
add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_promoted);
free(running_promoted);
}
if ((node == NULL) || pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pcmk__new_ordering(rsc, start_key(rsc), NULL, NULL, strdup(key), mon,
pe_order_implies_then|pe_order_runnable_left,
data_set);
pcmk__new_ordering(rsc, reload_key(rsc), NULL, NULL, strdup(key), mon,
pe_order_implies_then|pe_order_runnable_left,
data_set);
if (rsc->next_role == RSC_ROLE_PROMOTED) {
pcmk__new_ordering(rsc, promote_key(rsc), NULL, rsc, NULL, mon,
pe_order_optional|pe_order_runnable_left,
data_set);
} else if (rsc->role == RSC_ROLE_PROMOTED) {
pcmk__new_ordering(rsc, demote_key(rsc), NULL, rsc, NULL, mon,
pe_order_optional|pe_order_runnable_left,
data_set);
}
}
}
static void
Recurring(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, pe_working_set_t * data_set)
{
if (!pcmk_is_set(rsc->flags, pe_rsc_maintenance) &&
(node == NULL || node->details->maintenance == FALSE)) {
xmlNode *operation = NULL;
for (operation = pcmk__xe_first_child(rsc->ops_xml);
operation != NULL;
operation = pcmk__xe_next(operation)) {
if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) {
RecurringOp(rsc, start, node, operation, data_set);
}
}
}
}
static void
RecurringOp_Stopped(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node,
xmlNode * operation, pe_working_set_t * data_set)
{
char *key = NULL;
const char *name = NULL;
const char *role = NULL;
const char *interval_spec = NULL;
const char *node_uname = node? node->details->uname : "n/a";
guint interval_ms = 0;
GList *possible_matches = NULL;
GList *gIter = NULL;
/* Only process for the operations with role="Stopped" */
role = crm_element_value(operation, "role");
if (role == NULL || text2role(role) != RSC_ROLE_STOPPED) {
return;
}
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
interval_ms = crm_parse_interval_spec(interval_spec);
if (interval_ms == 0) {
return;
}
name = crm_element_value(operation, "name");
if (is_op_dup(rsc, name, interval_ms)) {
crm_trace("Not creating duplicate recurring action %s for %dms %s",
ID(operation), interval_ms, name);
return;
}
if (op_cannot_recur(name)) {
pcmk__config_err("Ignoring %s because action '%s' cannot be recurring",
ID(operation), name);
return;
}
key = pcmk__op_key(rsc->id, name, interval_ms);
if (find_rsc_op_entry(rsc, key) == NULL) {
crm_trace("Not creating recurring action %s for disabled resource %s",
ID(operation), rsc->id);
free(key);
return;
}
// @TODO add support
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
crm_notice("Ignoring %s (recurring monitors for Stopped role are "
"not supported for anonymous clones)",
ID(operation));
return;
}
pe_rsc_trace(rsc,
"Creating recurring action %s for %s in role %s on nodes where it should not be running",
ID(operation), rsc->id, role2text(rsc->next_role));
/* if the monitor exists on the node where the resource will be running, cancel it */
if (node != NULL) {
possible_matches = find_actions_exact(rsc->actions, key, node);
if (possible_matches) {
pe_action_t *cancel_op = NULL;
g_list_free(possible_matches);
cancel_op = pcmk__new_cancel_action(rsc, name, interval_ms, node);
if ((rsc->next_role == RSC_ROLE_STARTED)
|| (rsc->next_role == RSC_ROLE_UNPROMOTED)) {
/* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */
/* rsc->role == RSC_ROLE_STARTED: for a migration, cancel the monitor on the target node before start */
pcmk__new_ordering(rsc, NULL, cancel_op, rsc, start_key(rsc),
NULL, pe_order_runnable_left, data_set);
}
pe_rsc_info(rsc, "Cancel action %s (%s vs. %s) on %s",
key, role, role2text(rsc->next_role), node_uname);
}
}
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
pe_node_t *stop_node = (pe_node_t *) gIter->data;
const char *stop_node_uname = stop_node->details->uname;
gboolean is_optional = TRUE;
gboolean probe_is_optional = TRUE;
gboolean stop_is_optional = TRUE;
pe_action_t *stopped_mon = NULL;
char *rc_inactive = NULL;
GList *stop_ops = NULL;
GList *local_gIter = NULL;
if (node && pcmk__str_eq(stop_node_uname, node_uname, pcmk__str_casei)) {
continue;
}
pe_rsc_trace(rsc, "Creating recurring action %s for %s on %s",
ID(operation), rsc->id,
pcmk__s(stop_node_uname, "unknown node"));
/* start a monitor for an already stopped resource */
possible_matches = find_actions_exact(rsc->actions, key, stop_node);
if (possible_matches == NULL) {
pe_rsc_trace(rsc, "Marking %s mandatory on %s: not active", key,
pcmk__s(stop_node_uname, "unknown node"));
is_optional = FALSE;
} else {
pe_rsc_trace(rsc, "Marking %s optional on %s: already active", key,
pcmk__s(stop_node_uname, "unknown node"));
is_optional = TRUE;
g_list_free(possible_matches);
}
stopped_mon = custom_action(rsc, strdup(key), name, stop_node, is_optional, TRUE, data_set);
rc_inactive = pcmk__itoa(PCMK_OCF_NOT_RUNNING);
add_hash_param(stopped_mon->meta, XML_ATTR_TE_TARGET_RC, rc_inactive);
free(rc_inactive);
if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
GList *probes = pe__resource_actions(rsc, stop_node, RSC_STATUS,
FALSE);
GList *pIter = NULL;
for (pIter = probes; pIter != NULL; pIter = pIter->next) {
pe_action_t *probe = (pe_action_t *) pIter->data;
order_actions(probe, stopped_mon, pe_order_runnable_left);
crm_trace("%s then %s on %s", probe->uuid, stopped_mon->uuid, stop_node->details->uname);
}
g_list_free(probes);
}
stop_ops = pe__resource_actions(rsc, stop_node, RSC_STOP, TRUE);
for (local_gIter = stop_ops; local_gIter != NULL; local_gIter = local_gIter->next) {
pe_action_t *stop = (pe_action_t *) local_gIter->data;
if (!pcmk_is_set(stop->flags, pe_action_optional)) {
stop_is_optional = FALSE;
}
if (!pcmk_is_set(stop->flags, pe_action_runnable)) {
crm_debug("%s\t %s (cancelled : stop un-runnable)",
pcmk__s(stop_node_uname, "<null>"),
stopped_mon->uuid);
pe__clear_action_flags(stopped_mon, pe_action_runnable);
}
if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pcmk__new_ordering(rsc, stop_key(rsc), stop, NULL, strdup(key),
stopped_mon,
pe_order_implies_then|pe_order_runnable_left,
data_set);
}
}
if (stop_ops) {
g_list_free(stop_ops);
}
if (is_optional == FALSE && probe_is_optional && stop_is_optional
&& !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc, "Marking %s optional on %s due to unmanaged",
key, pcmk__s(stop_node_uname, "unknown node"));
pe__set_action_flags(stopped_mon, pe_action_optional);
}
if (pcmk_is_set(stopped_mon->flags, pe_action_optional)) {
pe_rsc_trace(rsc, "%s\t %s (optional)",
pcmk__s(stop_node_uname, "<null>"),
stopped_mon->uuid);
}
if (stop_node->details->online == FALSE || stop_node->details->unclean) {
pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)",
pcmk__s(stop_node_uname, "<null>"),
stopped_mon->uuid);
pe__clear_action_flags(stopped_mon, pe_action_runnable);
}
if (pcmk_is_set(stopped_mon->flags, pe_action_runnable)
&& !pcmk_is_set(stopped_mon->flags, pe_action_optional)) {
crm_notice(" Start recurring %s (%us) for %s on %s", stopped_mon->task,
interval_ms / 1000, rsc->id,
pcmk__s(stop_node_uname, "unknown node"));
}
}
free(key);
}
static void
Recurring_Stopped(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, pe_working_set_t * data_set)
{
if (!pcmk_is_set(rsc->flags, pe_rsc_maintenance) &&
(node == NULL || node->details->maintenance == FALSE)) {
xmlNode *operation = NULL;
for (operation = pcmk__xe_first_child(rsc->ops_xml);
operation != NULL;
operation = pcmk__xe_next(operation)) {
if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) {
RecurringOp_Stopped(rsc, start, node, operation, data_set);
}
}
}
}
static void
handle_migration_actions(pe_resource_t * rsc, pe_node_t *current, pe_node_t *chosen, pe_working_set_t * data_set)
{
pe_action_t *migrate_to = NULL;
pe_action_t *migrate_from = NULL;
pe_action_t *start = NULL;
pe_action_t *stop = NULL;
gboolean partial = rsc->partial_migration_target ? TRUE : FALSE;
pe_rsc_trace(rsc, "Processing migration actions %s moving from %s to %s . partial migration = %s",
rsc->id, current->details->id, chosen->details->id, partial ? "TRUE" : "FALSE");
start = start_action(rsc, chosen, TRUE);
stop = stop_action(rsc, current, TRUE);
if (partial == FALSE) {
migrate_to = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0),
RSC_MIGRATE, current, TRUE, TRUE, data_set);
}
migrate_from = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0),
RSC_MIGRATED, chosen, TRUE, TRUE, data_set);
if ((migrate_to && migrate_from) || (migrate_from && partial)) {
pe__set_action_flags(start, pe_action_migrate_runnable);
pe__set_action_flags(stop, pe_action_migrate_runnable);
// This is easier than trying to delete it from the graph
pe__set_action_flags(start, pe_action_pseudo);
/* order probes before migrations */
if (partial) {
pe__set_action_flags(migrate_from, pe_action_migrate_runnable);
migrate_from->needs = start->needs;
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0),
NULL, pe_order_optional, data_set);
} else {
pe__set_action_flags(migrate_from, pe_action_migrate_runnable);
pe__set_action_flags(migrate_to, pe_action_migrate_runnable);
migrate_to->needs = start->needs;
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0),
NULL, pe_order_optional, data_set);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0),
NULL,
pe_order_optional|pe_order_implies_first_migratable,
data_set);
}
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
pe_order_optional|pe_order_implies_first_migratable,
data_set);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
pe_order_optional|pe_order_implies_first_migratable|pe_order_pseudo_left,
data_set);
}
if (migrate_to) {
add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname);
add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname);
/* Pacemaker Remote connections don't require pending to be recorded in
* the CIB. We can reduce CIB writes by not setting PENDING for them.
*/
if (rsc->is_remote_node == FALSE) {
/* migrate_to takes place on the source node, but can
* have an effect on the target node depending on how
* the agent is written. Because of this, we have to maintain
* a record that the migrate_to occurred, in case the source node
* loses membership while the migrate_to action is still in-flight.
*/
add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true");
}
}
if (migrate_from) {
add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname);
add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname);
}
}
/*!
* \internal
* \brief Schedule actions to bring resource down and back to current role
*
* \param[in] rsc Resource to restart
* \param[in] current Node that resource should be brought down on
* \param[in] chosen Node that resource should be brought up on
* \param[in] need_stop Whether the resource must be stopped
* \param[in] need_promote Whether the resource must be promoted
*
* \return Role that resource would have after scheduled actions are taken
*/
static void
schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
pe_node_t *chosen, bool need_stop, bool need_promote)
{
enum rsc_role_e role = rsc->role;
enum rsc_role_e next_role;
pe__set_resource_flags(rsc, pe_rsc_restarting);
// Bring resource down to a stop on its current node
while (role != RSC_ROLE_STOPPED) {
next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
(need_stop? "required" : "optional"), rsc->id,
role2text(role), role2text(next_role));
if (!rsc_action_matrix[role][next_role](rsc, current, !need_stop,
rsc->cluster)) {
break;
}
role = next_role;
}
// Bring resource up to its next role on its next node
while ((rsc->role <= rsc->next_role) && (role != rsc->role)
&& !pcmk_is_set(rsc->flags, pe_rsc_block)) {
bool required = need_stop;
next_role = rsc_state_matrix[role][rsc->role];
if ((next_role == RSC_ROLE_PROMOTED) && need_promote) {
required = true;
}
pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
(required? "required" : "optional"), rsc->id,
role2text(role), role2text(next_role));
if (!rsc_action_matrix[role][next_role](rsc, chosen, !required,
rsc->cluster)) {
break;
}
role = next_role;
}
pe__clear_resource_flags(rsc, pe_rsc_restarting);
}
void
native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
{
pe_action_t *start = NULL;
pe_node_t *chosen = NULL;
pe_node_t *current = NULL;
gboolean need_stop = FALSE;
bool need_promote = FALSE;
gboolean is_moving = FALSE;
gboolean allow_migrate = FALSE;
GList *gIter = NULL;
unsigned int num_all_active = 0;
unsigned int num_clean_active = 0;
bool multiply_active = FALSE;
enum rsc_role_e role = RSC_ROLE_UNKNOWN;
enum rsc_role_e next_role = RSC_ROLE_UNKNOWN;
CRM_ASSERT(rsc != NULL);
allow_migrate = pcmk_is_set(rsc->flags, pe_rsc_allow_migrate)? TRUE : FALSE;
chosen = rsc->allocated_to;
next_role = rsc->next_role;
if (next_role == RSC_ROLE_UNKNOWN) {
pe__set_next_role(rsc,
(chosen == NULL)? RSC_ROLE_STOPPED : RSC_ROLE_STARTED,
"allocation");
}
pe_rsc_trace(rsc, "Creating all actions for %s transition from %s to %s (%s) on %s",
rsc->id, role2text(rsc->role), role2text(rsc->next_role),
((next_role == RSC_ROLE_UNKNOWN)? "implicit" : "explicit"),
((chosen == NULL)? "no node" : chosen->details->uname));
current = pe__find_active_on(rsc, &num_all_active, &num_clean_active);
for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) {
pe_node_t *dangling_source = (pe_node_t *) gIter->data;
pe_action_t *stop = NULL;
pe_rsc_trace(rsc, "Creating stop action %sfor %s on %s due to dangling migration",
pcmk_is_set(data_set->flags, pe_flag_remove_after_stop)? "and cleanup " : "",
rsc->id, dangling_source->details->uname);
stop = stop_action(rsc, dangling_source, FALSE);
pe__set_action_flags(stop, pe_action_dangle);
if (pcmk_is_set(data_set->flags, pe_flag_remove_after_stop)) {
DeleteRsc(rsc, dangling_source, FALSE, data_set);
}
}
if ((num_all_active == 2) && (num_clean_active == 2) && chosen
&& rsc->partial_migration_source && rsc->partial_migration_target
&& (current->details == rsc->partial_migration_source->details)
&& (chosen->details == rsc->partial_migration_target->details)) {
/* The chosen node is still the migration target from a partial
* migration. Attempt to continue the migration instead of recovering
* by stopping the resource everywhere and starting it on a single node.
*/
pe_rsc_trace(rsc, "Will attempt to continue with partial migration "
"to target %s from %s",
rsc->partial_migration_target->details->id,
rsc->partial_migration_source->details->id);
} else if (!pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) {
/* If a resource has "requires" set to nothing or quorum, don't consider
* it active on unclean nodes (similar to how all resources behave when
* stonith-enabled is false). We can start such resources elsewhere
* before fencing completes, and if we considered the resource active on
* the failed node, we would attempt recovery for being active on
* multiple nodes.
*/
multiply_active = (num_clean_active > 1);
} else {
multiply_active = (num_all_active > 1);
}
if (multiply_active) {
if (rsc->partial_migration_target && rsc->partial_migration_source) {
// Migration was in progress, but we've chosen a different target
crm_notice("Resource %s can no longer migrate from %s to %s "
"(will stop on both nodes)",
rsc->id, rsc->partial_migration_source->details->uname,
rsc->partial_migration_target->details->uname);
multiply_active = false;
} else {
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
// Resource was (possibly) incorrectly multiply active
pe_proc_err("%s resource %s might be active on %u nodes (%s)",
pcmk__s(class, "Untyped"), rsc->id, num_all_active,
recovery2text(rsc->recovery_type));
crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ#Resource_is_Too_Active for more information");
}
switch (rsc->recovery_type) {
case recovery_stop_start:
need_stop = TRUE;
break;
case recovery_stop_unexpected:
need_stop = TRUE; // StopRsc() will skip expected node
pe__set_resource_flags(rsc, pe_rsc_stop_unexpected);
break;
default:
break;
}
/* If by chance a partial migration is in process, but the migration
* target is not chosen still, clear all partial migration data.
*/
rsc->partial_migration_source = rsc->partial_migration_target = NULL;
allow_migrate = FALSE;
}
if (!multiply_active) {
pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
}
if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
pe_rsc_trace(rsc, "Creating start action for %s to represent already pending start",
rsc->id);
start = start_action(rsc, chosen, TRUE);
pe__set_action_flags(start, pe_action_print_always);
}
if (current && chosen && current->details != chosen->details) {
pe_rsc_trace(rsc, "Moving %s from %s to %s",
rsc->id, pcmk__s(current->details->uname, "unknown node"),
pcmk__s(chosen->details->uname, "unknown node"));
is_moving = TRUE;
need_stop = TRUE;
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
if (pcmk_is_set(rsc->flags, pe_rsc_stop)) {
need_stop = TRUE;
pe_rsc_trace(rsc, "Recovering %s", rsc->id);
} else {
pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
if (rsc->next_role == RSC_ROLE_PROMOTED) {
need_promote = TRUE;
}
}
} else if (pcmk_is_set(rsc->flags, pe_rsc_block)) {
pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
need_stop = TRUE;
} else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) {
pe_rsc_trace(rsc, "Creating start action for promoted resource %s",
rsc->id);
start = start_action(rsc, chosen, TRUE);
if (!pcmk_is_set(start->flags, pe_action_optional)) {
// Recovery of a promoted resource
pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
need_stop = TRUE;
}
}
/* Create any additional actions required when bringing resource down and
* back up to same level.
*/
schedule_restart_actions(rsc, current, chosen, need_stop, need_promote);
/* Required steps from this role to the next */
role = rsc->role;
while (role != rsc->next_role) {
next_role = rsc_state_matrix[role][rsc->next_role];
pe_rsc_trace(rsc, "Creating action to take %s from %s to %s (ending at %s)",
rsc->id, role2text(role), role2text(next_role),
role2text(rsc->next_role));
if (rsc_action_matrix[role][next_role] (rsc, chosen, FALSE, data_set) == FALSE) {
break;
}
role = next_role;
}
if (pcmk_is_set(rsc->flags, pe_rsc_block)) {
pe_rsc_trace(rsc, "Not creating recurring monitors for blocked resource %s",
rsc->id);
} else if ((rsc->next_role != RSC_ROLE_STOPPED)
|| !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc, "Creating recurring monitors for %s resource %s",
((rsc->next_role == RSC_ROLE_STOPPED)? "unmanaged" : "active"),
rsc->id);
start = start_action(rsc, chosen, TRUE);
Recurring(rsc, start, chosen, data_set);
Recurring_Stopped(rsc, start, chosen, data_set);
} else {
pe_rsc_trace(rsc, "Creating recurring monitors for inactive resource %s",
rsc->id);
Recurring_Stopped(rsc, NULL, NULL, data_set);
}
/* if we are stuck in a partial migration, where the target
* of the partial migration no longer matches the chosen target.
* A full stop/start is required */
if (rsc->partial_migration_target && (chosen == NULL || rsc->partial_migration_target->details != chosen->details)) {
pe_rsc_trace(rsc, "Not allowing partial migration of %s to continue",
rsc->id);
allow_migrate = FALSE;
} else if (!is_moving || !pcmk_is_set(rsc->flags, pe_rsc_managed)
|| pcmk_any_flags_set(rsc->flags,
pe_rsc_failed|pe_rsc_start_pending)
|| (current && current->details->unclean)
|| rsc->next_role < RSC_ROLE_STARTED) {
allow_migrate = FALSE;
}
if (allow_migrate) {
handle_migration_actions(rsc, current, chosen, data_set);
}
}
static void
rsc_avoids_remote_nodes(pe_resource_t *rsc)
{
GHashTableIter iter;
pe_node_t *node = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (node->details->remote_rsc) {
node->weight = -INFINITY;
}
}
}
/*!
* \internal
* \brief Return allowed nodes as (possibly sorted) list
*
* Convert a resource's hash table of allowed nodes to a list. If printing to
* stdout, sort the list, to keep action ID numbers consistent for regression
* test output (while avoiding the performance hit on a live cluster).
*
* \param[in] rsc Resource to check for allowed nodes
* \param[in] data_set Cluster working set
*
* \return List of resource's allowed nodes
* \note Callers should take care not to rely on the list being sorted.
*/
static GList *
allowed_nodes_as_list(pe_resource_t *rsc, pe_working_set_t *data_set)
{
GList *allowed_nodes = NULL;
if (rsc->allowed_nodes) {
allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes);
}
if (!pcmk__is_daemon) {
allowed_nodes = g_list_sort(allowed_nodes, sort_node_uname);
}
return allowed_nodes;
}
void
native_internal_constraints(pe_resource_t * rsc, pe_working_set_t * data_set)
{
/* This function is on the critical path and worth optimizing as much as possible */
pe_resource_t *top = NULL;
GList *allowed_nodes = NULL;
bool check_unfencing = FALSE;
bool check_utilization = false;
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc,
"Skipping native constraints for unmanaged resource: %s",
rsc->id);
return;
}
top = uber_parent(rsc);
// Whether resource requires unfencing
check_unfencing = !pcmk_is_set(rsc->flags, pe_rsc_fence_device)
&& pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)
&& pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing);
// Whether a non-default placement strategy is used
check_utilization = (g_hash_table_size(rsc->utilization) > 0)
&& !pcmk__str_eq(data_set->placement_strategy,
"default", pcmk__str_casei);
// Order stops before starts (i.e. restart)
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
pe_order_optional|pe_order_implies_then|pe_order_restart,
data_set);
// Promotable ordering: demote before stop, start before promote
if (pcmk_is_set(top->flags, pe_rsc_promotable)
|| (rsc->role > RSC_ROLE_UNPROMOTED)) {
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_DEMOTE, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
pe_order_promoted_implies_first, data_set);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_PROMOTE, 0), NULL,
pe_order_runnable_left, data_set);
}
// Don't clear resource history if probing on same node
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0),
NULL, rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0),
NULL, pe_order_same_node|pe_order_then_cancels_first,
data_set);
// Certain checks need allowed nodes
if (check_unfencing || check_utilization || rsc->container) {
allowed_nodes = allowed_nodes_as_list(rsc, data_set);
}
if (check_unfencing) {
/* Check if the node needs to be unfenced first */
for (GList *item = allowed_nodes; item; item = item->next) {
pe_node_t *node = item->data;
pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, data_set);
crm_debug("Ordering any stops of %s before %s, and any starts after",
rsc->id, unfence->uuid);
/*
* It would be more efficient to order clone resources once,
* rather than order each instance, but ordering the instance
* allows us to avoid unnecessary dependencies that might conflict
* with user constraints.
*
* @TODO: This constraint can still produce a transition loop if the
* resource has a stop scheduled on the node being unfenced, and
* there is a user ordering constraint to start some other resource
* (which will be ordered after the unfence) before stopping this
* resource. An example is "start some slow-starting cloned service
* before stopping an associated virtual IP that may be moving to
* it":
* stop this -> unfencing -> start that -> stop this
*/
pcmk__new_ordering(rsc, stop_key(rsc), NULL,
NULL, strdup(unfence->uuid), unfence,
pe_order_optional|pe_order_same_node, data_set);
pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
rsc, start_key(rsc), NULL,
pe_order_implies_then_on_node|pe_order_same_node,
data_set);
}
}
if (check_utilization) {
pcmk__create_utilization_constraints(rsc, allowed_nodes);
}
if (rsc->container) {
pe_resource_t *remote_rsc = NULL;
if (rsc->is_remote_node) {
// rsc is the implicit remote connection for a guest or bundle node
/* Do not allow a guest resource to live on a Pacemaker Remote node,
* to avoid nesting remotes. However, allow bundles to run on remote
* nodes.
*/
if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
rsc_avoids_remote_nodes(rsc->container);
}
/* If someone cleans up a guest or bundle node's container, we will
* likely schedule a (re-)probe of the container and recovery of the
* connection. Order the connection stop after the container probe,
* so that if we detect the container running, we will trigger a new
* transition and avoid the unnecessary recovery.
*/
pcmk__order_resource_actions(rsc->container, RSC_STATUS, rsc,
RSC_STOP, pe_order_optional, data_set);
/* A user can specify that a resource must start on a Pacemaker Remote
* node by explicitly configuring it with the container=NODENAME
* meta-attribute. This is of questionable merit, since location
* constraints can accomplish the same thing. But we support it, so here
* we check whether a resource (that is not itself a remote connection)
* has container set to a remote node or guest node resource.
*/
} else if (rsc->container->is_remote_node) {
remote_rsc = rsc->container;
} else {
remote_rsc = pe__resource_contains_guest_node(data_set,
rsc->container);
}
if (remote_rsc) {
/* Force the resource on the Pacemaker Remote node instead of
* colocating the resource with the container resource.
*/
for (GList *item = allowed_nodes; item; item = item->next) {
pe_node_t *node = item->data;
if (node->details->remote_rsc != remote_rsc) {
node->weight = -INFINITY;
}
}
} else {
/* This resource is either a filler for a container that does NOT
* represent a Pacemaker Remote node, or a Pacemaker Remote
* connection resource for a guest node or bundle.
*/
int score;
crm_trace("Order and colocate %s relative to its container %s",
rsc->id, rsc->container->id);
pcmk__new_ordering(rsc->container,
pcmk__op_key(rsc->container->id, RSC_START, 0),
NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0),
NULL,
pe_order_implies_then|pe_order_runnable_left,
data_set);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
rsc->container,
pcmk__op_key(rsc->container->id, RSC_STOP, 0),
NULL, pe_order_implies_first, data_set);
if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
score = 10000; /* Highly preferred but not essential */
} else {
score = INFINITY; /* Force them to run on the same host */
}
pcmk__new_colocation("resource-with-container", NULL, score, rsc,
rsc->container, NULL, NULL, true, data_set);
}
}
if (rsc->is_remote_node || pcmk_is_set(rsc->flags, pe_rsc_fence_device)) {
/* don't allow remote nodes to run stonith devices
* or remote connection resources.*/
rsc_avoids_remote_nodes(rsc);
}
g_list_free(allowed_nodes);
}
void
native_rsc_colocation_lh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
if (dependent == NULL) {
pe_err("dependent was NULL for %s", constraint->id);
return;
} else if (constraint->primary == NULL) {
pe_err("primary was NULL for %s", constraint->id);
return;
}
pe_rsc_trace(dependent,
"Processing colocation constraint between %s and %s",
dependent->id, primary->id);
primary->cmds->rsc_colocation_rh(dependent, primary, constraint, data_set);
}
void
native_rsc_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
enum pcmk__coloc_affects filter_results;
CRM_ASSERT((dependent != NULL) && (primary != NULL));
filter_results = pcmk__colocation_affects(dependent, primary, constraint,
false);
pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
((constraint->score > 0)? "Colocating" : "Anti-colocating"),
dependent->id, primary->id, constraint->id, constraint->score,
filter_results);
switch (filter_results) {
case pcmk__coloc_affects_role:
pcmk__apply_coloc_to_priority(dependent, primary, constraint);
break;
case pcmk__coloc_affects_location:
pcmk__apply_coloc_to_weights(dependent, primary, constraint);
break;
case pcmk__coloc_affects_nothing:
default:
return;
}
}
enum pe_action_flags
native_action_flags(pe_action_t * action, pe_node_t * node)
{
return action->flags;
}
static inline bool
is_primitive_action(pe_action_t *action)
{
return action && action->rsc && (action->rsc->variant == pe_native);
}
/*!
* \internal
* \brief Clear a single action flag and set reason text
*
* \param[in] action Action whose flag should be cleared
* \param[in] flag Action flag that should be cleared
* \param[in] reason Action that is the reason why flag is being cleared
*/
#define clear_action_flag_because(action, flag, reason) do { \
if (pcmk_is_set((action)->flags, (flag))) { \
pe__clear_action_flags(action, flag); \
if ((action)->rsc != (reason)->rsc) { \
char *reason_text = pe__action2reason((reason), (flag)); \
pe_action_set_reason((action), reason_text, \
((flag) == pe_action_migrate_runnable)); \
free(reason_text); \
} \
} \
} while (0)
/*!
* \internal
* \brief Set action bits appropriately when pe_restart_order is used
*
* \param[in] first 'First' action in an ordering with pe_restart_order
* \param[in] then 'Then' action in an ordering with pe_restart_order
* \param[in] filter What ordering flags to care about
*
* \note pe_restart_order is set for "stop resource before starting it" and
* "stop later group member before stopping earlier group member"
*/
static void
handle_restart_ordering(pe_action_t *first, pe_action_t *then,
enum pe_action_flags filter)
{
const char *reason = NULL;
CRM_ASSERT(is_primitive_action(first));
CRM_ASSERT(is_primitive_action(then));
// We need to update the action in two cases:
// ... if 'then' is required
if (pcmk_is_set(filter, pe_action_optional)
&& !pcmk_is_set(then->flags, pe_action_optional)) {
reason = "restart";
}
/* ... if 'then' is unrunnable action on same resource (if a resource
* should restart but can't start, we still want to stop)
*/
if (pcmk_is_set(filter, pe_action_runnable)
&& !pcmk_is_set(then->flags, pe_action_runnable)
&& pcmk_is_set(then->rsc->flags, pe_rsc_managed)
&& (first->rsc == then->rsc)) {
reason = "stop";
}
if (reason == NULL) {
return;
}
pe_rsc_trace(first->rsc, "Handling %s -> %s for %s",
first->uuid, then->uuid, reason);
// Make 'first' required if it is runnable
if (pcmk_is_set(first->flags, pe_action_runnable)) {
clear_action_flag_because(first, pe_action_optional, then);
}
// Make 'first' required if 'then' is required
if (!pcmk_is_set(then->flags, pe_action_optional)) {
clear_action_flag_because(first, pe_action_optional, then);
}
// Make 'first' unmigratable if 'then' is unmigratable
if (!pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
clear_action_flag_because(first, pe_action_migrate_runnable, then);
}
// Make 'then' unrunnable if 'first' is required but unrunnable
if (!pcmk_is_set(first->flags, pe_action_optional)
&& !pcmk_is_set(first->flags, pe_action_runnable)) {
clear_action_flag_because(then, pe_action_runnable, first);
}
}
/* \param[in] flags Flags from action_flags_for_ordering()
*/
enum pe_graph_flags
native_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node,
enum pe_action_flags flags, enum pe_action_flags filter,
enum pe_ordering type, pe_working_set_t *data_set)
{
enum pe_graph_flags changed = pe_graph_none;
enum pe_action_flags then_flags = then->flags;
enum pe_action_flags first_flags = first->flags;
if (type & pe_order_asymmetrical) {
pe_resource_t *then_rsc = then->rsc;
enum rsc_role_e then_rsc_role = then_rsc ? then_rsc->fns->state(then_rsc, TRUE) : 0;
if (!then_rsc) {
/* ignore */
} else if ((then_rsc_role == RSC_ROLE_STOPPED) && pcmk__str_eq(then->task, RSC_STOP, pcmk__str_casei)) {
/* ignore... if 'then' is supposed to be stopped after 'first', but
* then is already stopped, there is nothing to be done when non-symmetrical. */
} else if ((then_rsc_role >= RSC_ROLE_STARTED)
&& pcmk__str_eq(then->task, RSC_START, pcmk__str_casei)
&& pcmk_is_set(then->flags, pe_action_optional)
&& then->node
&& pcmk__list_of_1(then_rsc->running_on)
&& then->node->details == ((pe_node_t *) then_rsc->running_on->data)->details) {
/* Ignore. If 'then' is supposed to be started after 'first', but
* 'then' is already started, there is nothing to be done when
* asymmetrical -- unless the start is mandatory, which indicates
* the resource is restarting, and the ordering is still needed.
*/
} else if (!(first->flags & pe_action_runnable)) {
/* prevent 'then' action from happening if 'first' is not runnable and
* 'then' has not yet occurred. */
clear_action_flag_because(then, pe_action_optional, first);
clear_action_flag_because(then, pe_action_runnable, first);
} else {
/* ignore... then is allowed to start/stop if it wants to. */
}
}
if (pcmk_is_set(type, pe_order_implies_first)
&& !pcmk_is_set(then_flags, pe_action_optional)) {
// Then is required, and implies first should be, too
if (pcmk_is_set(filter, pe_action_optional)
&& !pcmk_is_set(flags, pe_action_optional)
&& pcmk_is_set(first_flags, pe_action_optional)) {
clear_action_flag_because(first, pe_action_optional, then);
}
if (pcmk_is_set(flags, pe_action_migrate_runnable) &&
!pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
clear_action_flag_because(first, pe_action_migrate_runnable, then);
}
}
if (type & pe_order_promoted_implies_first) {
if ((filter & pe_action_optional) &&
((then->flags & pe_action_optional) == FALSE) &&
(then->rsc != NULL) && (then->rsc->role == RSC_ROLE_PROMOTED)) {
clear_action_flag_because(first, pe_action_optional, then);
if (pcmk_is_set(first->flags, pe_action_migrate_runnable) &&
!pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
clear_action_flag_because(first, pe_action_migrate_runnable,
then);
}
}
}
if ((type & pe_order_implies_first_migratable)
&& pcmk_is_set(filter, pe_action_optional)) {
if (((then->flags & pe_action_migrate_runnable) == FALSE) ||
((then->flags & pe_action_runnable) == FALSE)) {
clear_action_flag_because(first, pe_action_runnable, then);
}
if ((then->flags & pe_action_optional) == 0) {
clear_action_flag_because(first, pe_action_optional, then);
}
}
if ((type & pe_order_pseudo_left)
&& pcmk_is_set(filter, pe_action_optional)) {
if ((first->flags & pe_action_runnable) == FALSE) {
clear_action_flag_because(then, pe_action_migrate_runnable, first);
pe__clear_action_flags(then, pe_action_pseudo);
}
}
if (pcmk_is_set(type, pe_order_runnable_left)
&& pcmk_is_set(filter, pe_action_runnable)
&& pcmk_is_set(then->flags, pe_action_runnable)
&& !pcmk_is_set(flags, pe_action_runnable)) {
clear_action_flag_because(then, pe_action_runnable, first);
clear_action_flag_because(then, pe_action_migrate_runnable, first);
}
if (pcmk_is_set(type, pe_order_implies_then)
&& pcmk_is_set(filter, pe_action_optional)
&& pcmk_is_set(then->flags, pe_action_optional)
&& !pcmk_is_set(flags, pe_action_optional)
&& !pcmk_is_set(first->flags, pe_action_migrate_runnable)) {
clear_action_flag_because(then, pe_action_optional, first);
}
if (pcmk_is_set(type, pe_order_restart)) {
handle_restart_ordering(first, then, filter);
}
if (then_flags != then->flags) {
pe__set_graph_flags(changed, first, pe_graph_updated_then);
pe_rsc_trace(then->rsc,
"%s on %s: flags are now %#.6x (was %#.6x) "
"because of 'first' %s (%#.6x)",
then->uuid,
then->node? then->node->details->uname : "no node",
then->flags, then_flags, first->uuid, first->flags);
if(then->rsc && then->rsc->parent) {
/* "X_stop then X_start" doesn't get handled for cloned groups unless we do this */
pcmk__update_action_for_orderings(then, data_set);
}
}
if (first_flags != first->flags) {
pe__set_graph_flags(changed, first, pe_graph_updated_first);
pe_rsc_trace(first->rsc,
"%s on %s: flags are now %#.6x (was %#.6x) "
"because of 'then' %s (%#.6x)",
first->uuid,
first->node? first->node->details->uname : "no node",
first->flags, first_flags, then->uuid, then->flags);
}
return changed;
}
void
native_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
{
pcmk__apply_location(constraint, rsc);
}
void
native_expand(pe_resource_t * rsc, pe_working_set_t * data_set)
{
GList *gIter = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Processing actions from %s", rsc->id);
for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
crm_trace("processing action %d for rsc=%s", action->id, rsc->id);
pcmk__add_action_to_graph(action, data_set);
}
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->expand(child_rsc, data_set);
}
}
/*!
* \internal
* \brief Check whether a node is a multiply active resource's expected node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p rsc is multiply active with multiple-active set to
* stop_unexpected, and \p node is the node where it will remain active
* \note This assumes that the resource's next role cannot be changed to stopped
* after this is called, which should be reasonable if status has already
* been unpacked and resources have been assigned to nodes.
*/
static bool
is_expected_node(const pe_resource_t *rsc, const pe_node_t *node)
{
return pcmk_all_flags_set(rsc->flags,
pe_rsc_stop_unexpected|pe_rsc_restarting)
&& (rsc->next_role > RSC_ROLE_STOPPED)
&& (rsc->allocated_to != NULL) && (node != NULL)
&& (rsc->allocated_to->details == node->details);
}
gboolean
StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set)
{
GList *gIter = NULL;
CRM_ASSERT(rsc);
for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
pe_node_t *current = (pe_node_t *) gIter->data;
pe_action_t *stop;
if (is_expected_node(rsc, current)) {
/* We are scheduling restart actions for a multiply active resource
* with multiple-active=stop_unexpected, and this is where it should
* not be stopped.
*/
pe_rsc_trace(rsc,
"Skipping stop of multiply active resource %s "
"on expected node %s",
rsc->id, current->details->uname);
continue;
}
if (rsc->partial_migration_target) {
if (rsc->partial_migration_target->details == current->details
// Only if the allocated node still is the migration target.
&& rsc->allocated_to
&& rsc->allocated_to->details == rsc->partial_migration_target->details) {
pe_rsc_trace(rsc,
"Skipping stop of %s on %s "
"because migration to %s in progress",
rsc->id, current->details->uname,
next->details->uname);
continue;
} else {
pe_rsc_trace(rsc,
"Forcing stop of %s on %s "
"because migration target changed",
rsc->id, current->details->uname);
optional = FALSE;
}
}
pe_rsc_trace(rsc, "Scheduling stop of %s on %s",
rsc->id, current->details->uname);
stop = stop_action(rsc, current, optional);
if(rsc->allocated_to == NULL) {
pe_action_set_reason(stop, "node availability", TRUE);
} else if (pcmk_all_flags_set(rsc->flags, pe_rsc_restarting
|pe_rsc_stop_unexpected)) {
/* We are stopping a multiply active resource on a node that is
* not its expected node, and we are still scheduling restart
* actions, so the stop is for being multiply active.
*/
pe_action_set_reason(stop, "being multiply active", TRUE);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe__clear_action_flags(stop, pe_action_runnable);
}
if (pcmk_is_set(data_set->flags, pe_flag_remove_after_stop)) {
DeleteRsc(rsc, current, optional, data_set);
}
if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) {
pe_action_t *unfence = pe_fence_op(current, "on", TRUE, NULL, FALSE, data_set);
order_actions(stop, unfence, pe_order_implies_first);
if (!pcmk__node_unfenced(current)) {
pe_proc_err("Stopping %s until %s can be unfenced", rsc->id, current->details->uname);
}
}
}
return TRUE;
}
gboolean
StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set)
{
pe_action_t *start = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (weight=%d)",
(optional? "optional" : "required"), rsc->id,
((next == NULL)? "N/A" : next->details->uname),
((next == NULL)? 0 : next->weight));
start = start_action(rsc, next, TRUE);
pcmk__order_vs_unfence(rsc, next, start, pe_order_implies_then, data_set);
if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) {
pe__clear_action_flags(start, pe_action_optional);
}
if (is_expected_node(rsc, next)) {
/* This could be a problem if the start becomes necessary for other
* reasons later.
*/
pe_rsc_trace(rsc,
"Start of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, next->details->uname);
pe__set_action_flags(start, pe_action_pseudo);
}
return TRUE;
}
gboolean
PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set)
{
GList *gIter = NULL;
gboolean runnable = TRUE;
GList *action_list = NULL;
CRM_ASSERT(rsc);
CRM_CHECK(next != NULL, return FALSE);
pe_rsc_trace(rsc, "%s on %s", rsc->id, next->details->uname);
action_list = pe__resource_actions(rsc, next, RSC_START, TRUE);
for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
pe_action_t *start = (pe_action_t *) gIter->data;
if (!pcmk_is_set(start->flags, pe_action_runnable)) {
runnable = FALSE;
}
}
g_list_free(action_list);
if (runnable) {
pe_action_t *promote = promote_action(rsc, next, optional);
if (is_expected_node(rsc, next)) {
/* This could be a problem if the promote becomes necessary for
* other reasons later.
*/
pe_rsc_trace(rsc,
"Promotion of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, next->details->uname);
pe__set_action_flags(promote, pe_action_pseudo);
}
return TRUE;
}
pe_rsc_debug(rsc, "%s\tPromote %s (canceled)", next->details->uname, rsc->id);
action_list = pe__resource_actions(rsc, next, RSC_PROMOTE, TRUE);
for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
pe_action_t *promote = (pe_action_t *) gIter->data;
pe__clear_action_flags(promote, pe_action_runnable);
}
g_list_free(action_list);
return TRUE;
}
gboolean
DemoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set)
{
GList *gIter = NULL;
CRM_ASSERT(rsc);
if (is_expected_node(rsc, next)) {
pe_rsc_trace(rsc,
"Skipping demote of multiply active resource %s "
"on expected node %s",
rsc->id, next->details->uname);
return TRUE;
}
pe_rsc_trace(rsc, "%s", rsc->id);
/* CRM_CHECK(rsc->next_role == RSC_ROLE_UNPROMOTED, return FALSE); */
for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
pe_node_t *current = (pe_node_t *) gIter->data;
pe_rsc_trace(rsc, "%s on %s", rsc->id, next ? next->details->uname : "N/A");
demote_action(rsc, current, optional);
}
return TRUE;
}
gboolean
RoleError(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set)
{
CRM_ASSERT(rsc);
crm_err("%s on %s", rsc->id, next ? next->details->uname : "N/A");
CRM_CHECK(FALSE, return FALSE);
return FALSE;
}
gboolean
NullOp(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set)
{
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "%s", rsc->id);
return FALSE;
}
gboolean
DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set)
{
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname);
return FALSE;
} else if (node == NULL) {
pe_rsc_trace(rsc, "Resource %s not deleted: NULL node", rsc->id);
return FALSE;
} else if (node->details->unclean || node->details->online == FALSE) {
pe_rsc_trace(rsc, "Resource %s not deleted from %s: unrunnable", rsc->id,
node->details->uname);
return FALSE;
}
crm_notice("Removing %s from %s", rsc->id, node->details->uname);
delete_action(rsc, node, optional);
pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_DELETE,
optional? pe_order_implies_then : pe_order_optional,
data_set);
pcmk__order_resource_actions(rsc, RSC_DELETE, rsc, RSC_START,
optional? pe_order_implies_then : pe_order_optional,
data_set);
return TRUE;
}
gboolean
native_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete,
gboolean force, pe_working_set_t * data_set)
{
enum pe_ordering flags = pe_order_optional;
char *key = NULL;
pe_action_t *probe = NULL;
pe_node_t *running = NULL;
pe_node_t *allowed = NULL;
pe_resource_t *top = uber_parent(rsc);
static const char *rc_promoted = NULL;
static const char *rc_inactive = NULL;
if (rc_inactive == NULL) {
rc_inactive = pcmk__itoa(PCMK_OCF_NOT_RUNNING);
rc_promoted = pcmk__itoa(PCMK_OCF_RUNNING_PROMOTED);
}
CRM_CHECK(node != NULL, return FALSE);
if (!force && !pcmk_is_set(data_set->flags, pe_flag_startup_probes)) {
pe_rsc_trace(rsc, "Skipping active resource detection for %s", rsc->id);
return FALSE;
}
if (pe__is_guest_or_remote_node(node)) {
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
pe_rsc_trace(rsc,
"Skipping probe for %s on %s because Pacemaker Remote nodes cannot run stonith agents",
rsc->id, node->details->id);
return FALSE;
} else if (pe__is_guest_node(node)
&& pe__resource_contains_guest_node(data_set, rsc)) {
pe_rsc_trace(rsc,
"Skipping probe for %s on %s because guest nodes cannot run resources containing guest nodes",
rsc->id, node->details->id);
return FALSE;
} else if (rsc->is_remote_node) {
pe_rsc_trace(rsc,
"Skipping probe for %s on %s because Pacemaker Remote nodes cannot host remote connections",
rsc->id, node->details->id);
return FALSE;
}
}
if (rsc->children) {
GList *gIter = NULL;
gboolean any_created = FALSE;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
any_created = child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set)
|| any_created;
}
return any_created;
} else if ((rsc->container) && (!rsc->is_remote_node)) {
pe_rsc_trace(rsc, "Skipping %s: it is within container %s", rsc->id, rsc->container->id);
return FALSE;
}
if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
pe_rsc_trace(rsc, "Skipping orphan: %s", rsc->id);
return FALSE;
}
// Check whether resource is already known on node
if (!force && g_hash_table_lookup(rsc->known_on, node->details->id)) {
pe_rsc_trace(rsc, "Skipping known: %s on %s", rsc->id, node->details->uname);
return FALSE;
}
allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (rsc->exclusive_discover || top->exclusive_discover) {
if (allowed == NULL) {
/* exclusive discover is enabled and this node is not in the allowed list. */
pe_rsc_trace(rsc, "Skipping probe for %s on node %s, A", rsc->id, node->details->id);
return FALSE;
} else if (allowed->rsc_discover_mode != pe_discover_exclusive) {
/* exclusive discover is enabled and this node is not marked
* as a node this resource should be discovered on */
pe_rsc_trace(rsc, "Skipping probe for %s on node %s, B", rsc->id, node->details->id);
return FALSE;
}
}
if(allowed == NULL && node->rsc_discover_mode == pe_discover_never) {
/* If this node was allowed to host this resource it would
* have been explicitly added to the 'allowed_nodes' list.
* However it wasn't and the node has discovery disabled, so
* no need to probe for this resource.
*/
pe_rsc_trace(rsc, "Skipping probe for %s on node %s, C", rsc->id, node->details->id);
return FALSE;
}
if (allowed && allowed->rsc_discover_mode == pe_discover_never) {
/* this resource is marked as not needing to be discovered on this node */
pe_rsc_trace(rsc, "Skipping probe for %s on node %s, discovery mode", rsc->id, node->details->id);
return FALSE;
}
if (pe__is_guest_node(node)) {
pe_resource_t *remote = node->details->remote_rsc->container;
if(remote->role == RSC_ROLE_STOPPED) {
/* If the container is stopped, then we know anything that
* might have been inside it is also stopped and there is
* no need to probe.
*
* If we don't know the container's state on the target
* either:
*
* - the container is running, the transition will abort
* and we'll end up in a different case next time, or
*
* - the container is stopped
*
* Either way there is no need to probe.
*
*/
if(remote->allocated_to
&& g_hash_table_lookup(remote->known_on, remote->allocated_to->details->id) == NULL) {
/* For safety, we order the 'rsc' start after 'remote'
* has been probed.
*
* Using 'top' helps for groups, but we may need to
* follow the start's ordering chain backwards.
*/
pcmk__new_ordering(remote,
pcmk__op_key(remote->id, RSC_STATUS, 0),
NULL, top,
pcmk__op_key(top->id, RSC_START, 0), NULL,
pe_order_optional, data_set);
}
pe_rsc_trace(rsc, "Skipping probe for %s on node %s, %s is stopped",
rsc->id, node->details->id, remote->id);
return FALSE;
/* Here we really we want to check if remote->stop is required,
* but that information doesn't exist yet
*/
} else if(node->details->remote_requires_reset
|| node->details->unclean
|| pcmk_is_set(remote->flags, pe_rsc_failed)
|| remote->next_role == RSC_ROLE_STOPPED
|| (remote->allocated_to
&& pe_find_node(remote->running_on, remote->allocated_to->details->uname) == NULL)
) {
/* The container is stopping or restarting, don't start
* 'rsc' until 'remote' stops as this also implies that
* 'rsc' is stopped - avoiding the need to probe
*/
pcmk__new_ordering(remote, pcmk__op_key(remote->id, RSC_STOP, 0),
NULL, top, pcmk__op_key(top->id, RSC_START, 0),
NULL, pe_order_optional, data_set);
pe_rsc_trace(rsc, "Skipping probe for %s on node %s, %s is stopping, restarting or moving",
rsc->id, node->details->id, remote->id);
return FALSE;
/* } else {
* The container is running so there is no problem probing it
*/
}
}
key = pcmk__op_key(rsc->id, RSC_STATUS, 0);
probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set);
pe__clear_action_flags(probe, pe_action_optional);
pcmk__order_vs_unfence(rsc, node, probe, pe_order_optional, data_set);
/*
* We need to know if it's running_on (not just known_on) this node
* to correctly determine the target rc.
*/
running = pe_find_node_id(rsc->running_on, node->details->id);
if (running == NULL) {
add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_inactive);
} else if (rsc->role == RSC_ROLE_PROMOTED) {
add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_promoted);
}
crm_debug("Probing %s on %s (%s) %d %p", rsc->id, node->details->uname, role2text(rsc->role),
pcmk_is_set(probe->flags, pe_action_runnable), rsc->running_on);
if ((pcmk_is_set(rsc->flags, pe_rsc_fence_device)
&& pcmk_is_set(data_set->flags, pe_flag_enable_unfencing))
|| !pe_rsc_is_clone(top)) {
top = rsc;
} else {
crm_trace("Probing %s on %s (%s) as %s", rsc->id, node->details->uname, role2text(rsc->role), top->id);
}
if (!pcmk_is_set(probe->flags, pe_action_runnable)
&& (rsc->running_on == NULL)) {
/* Prevent the start from occurring if rsc isn't active, but
* don't cause it to stop if it was active already
*/
pe__set_order_flags(flags, pe_order_runnable_left);
}
pcmk__new_ordering(rsc, NULL, probe, top,
pcmk__op_key(top->id, RSC_START, 0), NULL, flags,
data_set);
// Order the probe before any agent reload
pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
pe_order_optional, data_set);
return TRUE;
}
void
native_append_meta(pe_resource_t * rsc, xmlNode * xml)
{
char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION);
pe_resource_t *parent;
if (value) {
char *name = NULL;
name = crm_meta_name(XML_RSC_ATTR_INCARNATION);
crm_xml_add(xml, name, value);
free(name);
}
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE);
if (value) {
char *name = NULL;
name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE);
crm_xml_add(xml, name, value);
free(name);
}
for (parent = rsc; parent != NULL; parent = parent->parent) {
if (parent->container) {
crm_xml_add(xml, CRM_META"_"XML_RSC_ATTR_CONTAINER, parent->container->id);
}
}
}
// Primitive implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__primitive_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization)
{
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return;
}
pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization",
orig_rsc->id, rsc->id);
pcmk__release_node_capacity(utilization, rsc);
}
/*!
* \internal
* \brief Get epoch time of node's shutdown attribute (or now if none)
*
* \param[in] node Node to check
* \param[in] data_set Cluster working set
*
* \return Epoch time corresponding to shutdown attribute if set or now if not
*/
static time_t
shutdown_time(pe_node_t *node, pe_working_set_t *data_set)
{
const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
time_t result = 0;
if (shutdown != NULL) {
long long result_ll;
if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) {
result = (time_t) result_ll;
}
}
return (result == 0)? get_effective_time(data_set) : result;
}
// Primitive implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__primitive_shutdown_lock(pe_resource_t *rsc)
{
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
// Fence devices and remote connections can't be locked
if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches)
|| pe__resource_is_remote_conn(rsc, rsc->cluster)) {
return;
}
if (rsc->lock_node != NULL) {
// The lock was obtained from resource history
if (rsc->running_on != NULL) {
/* The resource was started elsewhere even though it is now
* considered locked. This shouldn't be possible, but as a
* failsafe, we don't want to disturb the resource now.
*/
pe_rsc_info(rsc,
"Cancelling shutdown lock because %s is already active",
rsc->id);
pe__clear_resource_history(rsc, rsc->lock_node, rsc->cluster);
rsc->lock_node = NULL;
rsc->lock_time = 0;
}
// Only a resource active on exactly one node can be locked
} else if (pcmk__list_of_1(rsc->running_on)) {
pe_node_t *node = rsc->running_on->data;
if (node->details->shutdown) {
if (node->details->unclean) {
pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
rsc->id, node->details->uname);
} else {
rsc->lock_node = node;
rsc->lock_time = shutdown_time(node, rsc->cluster);
}
}
}
if (rsc->lock_node == NULL) {
// No lock needed
return;
}
if (rsc->cluster->shutdown_lock > 0) {
time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock;
pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
rsc->id, rsc->lock_node->details->uname,
(long long) lock_expiration);
pe__update_recheck_time(++lock_expiration, rsc->cluster);
} else {
pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
rsc->id, rsc->lock_node->details->uname);
}
// If resource is locked to one node, ban it from all other nodes
for (GList *item = rsc->cluster->nodes; item != NULL; item = item->next) {
pe_node_t *node = item->data;
if (strcmp(node->details->uname, rsc->lock_node->details->uname)) {
resource_location(rsc, node, -CRM_SCORE_INFINITY,
XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster);
}
}
}
diff --git a/lib/pacemaker/pcmk_sched_nodes.c b/lib/pacemaker/pcmk_sched_nodes.c
index ceafaa419e..ebd59dab04 100644
--- a/lib/pacemaker/pcmk_sched_nodes.c
+++ b/lib/pacemaker/pcmk_sched_nodes.c
@@ -1,340 +1,360 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <crm/lrmd.h> // lrmd_event_data_t
#include <crm/common/xml_internal.h>
#include <pacemaker-internal.h>
#include <pacemaker.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Check whether a node is available to run resources
*
- * \param[in] node Node to check
+ * \param[in] node Node to check
+ * \param[in] consider_score If true, consider a negative score unavailable
+ * \param[in] consider_guest If true, consider a guest node unavailable whose
+ * resource will not be active
*
* \return true if node is online and not shutting down, unclean, or in standby
* or maintenance mode, otherwise false
*/
bool
-pcmk__node_available(const pe_node_t *node)
+pcmk__node_available(const pe_node_t *node, bool consider_score,
+ bool consider_guest)
{
- // @TODO Should we add (node->weight >= 0)?
- return (node != NULL) && (node->details != NULL) && node->details->online
- && !node->details->shutdown && !node->details->unclean
- && !node->details->standby && !node->details->maintenance;
+ if ((node == NULL) || (node->details == NULL) || !node->details->online
+ || node->details->shutdown || node->details->unclean
+ || node->details->standby || node->details->maintenance) {
+ return false;
+ }
+
+ if (consider_score && (node->weight < 0)) {
+ return false;
+ }
+
+ // @TODO Go through all callers to see which should set consider_guest
+ if (consider_guest && pe__is_guest_node(node)) {
+ pe_resource_t *guest = node->details->remote_rsc->container;
+
+ if (guest->fns->location(guest, NULL, FALSE) == NULL) {
+ return false;
+ }
+ }
+
+ return true;
}
/*!
* \internal
* \brief Copy a hash table of node objects
*
* \param[in] nodes Hash table to copy
*
* \return New copy of nodes (or NULL if nodes is NULL)
*/
GHashTable *
pcmk__copy_node_table(GHashTable *nodes)
{
GHashTable *new_table = NULL;
GHashTableIter iter;
pe_node_t *node = NULL;
if (nodes == NULL) {
return NULL;
}
new_table = pcmk__strkey_table(NULL, free);
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
pe_node_t *new_node = pe__copy_node(node);
g_hash_table_insert(new_table, (gpointer) new_node->details->id,
new_node);
}
return new_table;
}
/*!
* \internal
* \brief Copy a list of node objects
*
* \param[in] list List to copy
* \param[in] reset Set copies' scores to 0
*
* \return New list of shallow copies of nodes in original list
*/
GList *
pcmk__copy_node_list(const GList *list, bool reset)
{
GList *result = NULL;
for (const GList *gIter = list; gIter != NULL; gIter = gIter->next) {
pe_node_t *new_node = NULL;
pe_node_t *this_node = (pe_node_t *) gIter->data;
new_node = pe__copy_node(this_node);
if (reset) {
new_node->weight = 0;
}
result = g_list_prepend(result, new_node);
}
return result;
}
struct node_weight_s {
pe_node_t *active;
pe_working_set_t *data_set;
};
/*!
* \internal
* \brief Compare two nodes for allocation desirability
*
* Given two nodes, check which one is more preferred by allocation criteria
* such as node weight and utilization.
*
* \param[in] a First node to compare
* \param[in] b Second node to compare
* \param[in] data Sort data (as struct node_weight_s *)
*
* \return -1 if \p a is preferred, +1 if \p b is preferred, or 0 if they are
* equally preferred
*/
static gint
compare_nodes(gconstpointer a, gconstpointer b, gpointer data)
{
const pe_node_t *node1 = (const pe_node_t *) a;
const pe_node_t *node2 = (const pe_node_t *) b;
struct node_weight_s *nw = data;
int node1_weight = 0;
int node2_weight = 0;
int result = 0;
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
// Compare node weights
- node1_weight = pcmk__node_available(node1)? node1->weight : -INFINITY;
- node2_weight = pcmk__node_available(node2)? node2->weight : -INFINITY;
+ node1_weight = pcmk__node_available(node1, false, false)? node1->weight : -INFINITY;
+ node2_weight = pcmk__node_available(node2, false, false)? node2->weight : -INFINITY;
if (node1_weight > node2_weight) {
crm_trace("%s (%d) > %s (%d) : weight",
node1->details->uname, node1_weight, node2->details->uname,
node2_weight);
return -1;
}
if (node1_weight < node2_weight) {
crm_trace("%s (%d) < %s (%d) : weight",
node1->details->uname, node1_weight, node2->details->uname,
node2_weight);
return 1;
}
crm_trace("%s (%d) == %s (%d) : weight",
node1->details->uname, node1_weight, node2->details->uname,
node2_weight);
// If appropriate, compare node utilization
if (pcmk__str_eq(nw->data_set->placement_strategy, "minimal",
pcmk__str_casei)) {
goto equal;
}
if (pcmk__str_eq(nw->data_set->placement_strategy, "balanced",
pcmk__str_casei)) {
result = pcmk__compare_node_capacities(node1, node2);
if (result < 0) {
crm_trace("%s > %s : capacity (%d)",
node1->details->uname, node2->details->uname, result);
return -1;
} else if (result > 0) {
crm_trace("%s < %s : capacity (%d)",
node1->details->uname, node2->details->uname, result);
return 1;
}
}
// Compare number of allocated resources
if (node1->details->num_resources < node2->details->num_resources) {
crm_trace("%s (%d) > %s (%d) : resources",
node1->details->uname, node1->details->num_resources,
node2->details->uname, node2->details->num_resources);
return -1;
} else if (node1->details->num_resources > node2->details->num_resources) {
crm_trace("%s (%d) < %s (%d) : resources",
node1->details->uname, node1->details->num_resources,
node2->details->uname, node2->details->num_resources);
return 1;
}
// Check whether one node is already running desired resource
if (nw->active != NULL) {
if (nw->active->details == node1->details) {
crm_trace("%s (%d) > %s (%d) : active",
node1->details->uname, node1->details->num_resources,
node2->details->uname, node2->details->num_resources);
return -1;
} else if (nw->active->details == node2->details) {
crm_trace("%s (%d) < %s (%d) : active",
node1->details->uname, node1->details->num_resources,
node2->details->uname, node2->details->num_resources);
return 1;
}
}
// If all else is equal, prefer node with lowest-sorting name
equal:
crm_trace("%s = %s", node1->details->uname, node2->details->uname);
return strcmp(node1->details->uname, node2->details->uname);
}
/*!
* \internal
* \brief Sort a list of nodes by allocation desirability
*
* \param[in] nodes Node list to sort
* \param[in] active_node If not NULL, node currently running resource
* \param[in] data_set Cluster working set
*
* \return New head of sorted list
*/
GList *
pcmk__sort_nodes(GList *nodes, pe_node_t *active_node,
pe_working_set_t *data_set)
{
struct node_weight_s nw = { active_node, data_set };
return g_list_sort_with_data(nodes, compare_nodes, &nw);
}
/*!
* \internal
* \brief Check whether any node is available to run resources
*
* \param[in] nodes Nodes to check
*
* \return true if any node in \p nodes is available to run resources,
* otherwise false
*/
bool
pcmk__any_node_available(GHashTable *nodes)
{
GHashTableIter iter;
pe_node_t *node = NULL;
if (nodes == NULL) {
return false;
}
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
- if ((node->weight >= 0) && pcmk__node_available(node)) {
+ if (pcmk__node_available(node, true, false)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Apply node health values for all nodes in cluster
*
* \param[in] data_set Cluster working set
*/
void
pcmk__apply_node_health(pe_working_set_t *data_set)
{
int base_health = 0;
enum pcmk__health_strategy strategy;
const char *strategy_str = pe_pref(data_set->config_hash,
PCMK__OPT_NODE_HEALTH_STRATEGY);
strategy = pcmk__parse_health_strategy(strategy_str);
if (strategy == pcmk__health_strategy_none) {
return;
}
crm_info("Applying node health strategy '%s'", strategy_str);
// The progressive strategy can use a base health score
if (strategy == pcmk__health_strategy_progressive) {
base_health = pe__health_score(PCMK__OPT_NODE_HEALTH_BASE, data_set);
}
for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
int health = pe__sum_node_health_scores(node, base_health);
// An overall health score of 0 has no effect
if (health == 0) {
continue;
}
crm_info("Node %s overall system health is %d",
node->details->uname, health);
// Use node health as a location score for each resource on the node
for (GList *r = data_set->resources; r != NULL; r = r->next) {
pe_resource_t *rsc = (pe_resource_t *) r->data;
bool constrain = true;
if (health < 0) {
/* Negative health scores do not apply to resources with
* allow-unhealthy-nodes=true.
*/
constrain = !crm_is_true(g_hash_table_lookup(rsc->meta,
PCMK__META_ALLOW_UNHEALTHY_NODES));
}
if (constrain) {
pcmk__new_location(strategy_str, rsc, health, NULL, node,
data_set);
} else {
pe_rsc_trace(rsc, "%s is immune from health ban on %s",
rsc->id, node->details->uname);
}
}
}
}
/*!
* \internal
* \brief Check for a node in a resource's parent's allowed nodes
*
* \param[in] rsc Resource whose parent should be checked
* \param[in] node Node to check for
*
* \return Equivalent of \p node from \p rsc's parent's allowed nodes if any,
* otherwise NULL
*/
pe_node_t *
pcmk__top_allowed_node(const pe_resource_t *rsc, const pe_node_t *node)
{
GHashTable *allowed_nodes = NULL;
if ((rsc == NULL) || (node == NULL)) {
return NULL;
} else if (rsc->parent == NULL) {
allowed_nodes = rsc->allowed_nodes;
} else {
allowed_nodes = rsc->parent->allowed_nodes;
}
return pe_hash_table_lookup(allowed_nodes, node->details->id);
}
diff --git a/lib/pacemaker/pcmk_sched_promotable.c b/lib/pacemaker/pcmk_sched_promotable.c
index f97555c817..c889281cc5 100644
--- a/lib/pacemaker/pcmk_sched_promotable.c
+++ b/lib/pacemaker/pcmk_sched_promotable.c
@@ -1,1049 +1,1024 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
#define VARIANT_CLONE 1
#include <lib/pengine/variant.h>
-extern bool pcmk__is_daemon;
-
+/*!
+ * \internal
+ * \brief Add implicit promotion ordering for a promotable instance
+ *
+ * \param[in] clone Clone resource
+ * \param[in] child Instance of \p clone being ordered
+ * \param[in] last Previous instance ordered (NULL if \p child is first)
+ */
static void
-child_promoting_constraints(clone_variant_data_t * clone_data, enum pe_ordering type,
- pe_resource_t * rsc, pe_resource_t * child, pe_resource_t * last,
- pe_working_set_t * data_set)
+order_instance_promotion(pe_resource_t *clone, pe_resource_t *child,
+ pe_resource_t *last)
{
- if (child == NULL) {
- if (clone_data->ordered && last != NULL) {
- pe_rsc_trace(rsc, "Ordered version (last node)");
- /* last child promote before promoted started */
- pcmk__order_resource_actions(last, RSC_PROMOTE, rsc, RSC_PROMOTED,
- type, data_set);
- }
- return;
- }
-
- /* child promote before global promoted */
- pcmk__order_resource_actions(child, RSC_PROMOTE, rsc, RSC_PROMOTED, type,
- data_set);
-
- /* global promote before child promote */
- pcmk__order_resource_actions(rsc, RSC_PROMOTE, child, RSC_PROMOTE, type,
- data_set);
-
- if (clone_data->ordered) {
- pe_rsc_trace(rsc, "Ordered version");
- if (last == NULL) {
- /* global promote before first child promote */
- last = rsc;
-
- }
- /* else: child/child relative promote */
- pcmk__order_starts(last, child, type, data_set);
+ // "Promote clone" -> promote instance -> "clone promoted"
+ pcmk__order_resource_actions(clone, RSC_PROMOTE, child, RSC_PROMOTE,
+ pe_order_optional, clone->cluster);
+ pcmk__order_resource_actions(child, RSC_PROMOTE, clone, RSC_PROMOTED,
+ pe_order_optional, clone->cluster);
+
+ // If clone is ordered, order this instance relative to last
+ if ((last != NULL) && pe__clone_is_ordered(clone)) {
pcmk__order_resource_actions(last, RSC_PROMOTE, child, RSC_PROMOTE,
- type, data_set);
-
- } else {
- pe_rsc_trace(rsc, "Un-ordered version");
+ pe_order_optional, clone->cluster);
}
}
+/*!
+ * \internal
+ * \brief Add implicit demotion ordering for a promotable instance
+ *
+ * \param[in] clone Clone resource
+ * \param[in] child Instance of \p clone being ordered
+ * \param[in] last Previous instance ordered (NULL if \p child is first)
+ */
static void
-child_demoting_constraints(clone_variant_data_t * clone_data, enum pe_ordering type,
- pe_resource_t * rsc, pe_resource_t * child, pe_resource_t * last,
- pe_working_set_t * data_set)
+order_instance_demotion(pe_resource_t *clone, pe_resource_t *child,
+ pe_resource_t *last)
{
- if (child == NULL) {
- if (clone_data->ordered && last != NULL) {
- pe_rsc_trace(rsc, "Ordered version (last node)");
- /* global demote before first child demote */
- pcmk__order_resource_actions(rsc, RSC_DEMOTE, last, RSC_DEMOTE,
- pe_order_optional, data_set);
- }
- return;
- }
-
- /* child demote before global demoted */
- pcmk__order_resource_actions(child, RSC_DEMOTE, rsc, RSC_DEMOTED,
- pe_order_implies_then_printed, data_set);
-
- /* global demote before child demote */
- pcmk__order_resource_actions(rsc, RSC_DEMOTE, child, RSC_DEMOTE,
- pe_order_implies_first_printed, data_set);
-
- if (clone_data->ordered && last != NULL) {
- pe_rsc_trace(rsc, "Ordered version");
-
- /* child/child relative demote */
- pcmk__order_resource_actions(child, RSC_DEMOTE, last, RSC_DEMOTE, type,
- data_set);
-
- } else if (clone_data->ordered) {
- pe_rsc_trace(rsc, "Ordered version (1st node)");
- /* first child stop before global stopped */
- pcmk__order_resource_actions(child, RSC_DEMOTE, rsc, RSC_DEMOTED, type,
- data_set);
-
- } else {
- pe_rsc_trace(rsc, "Un-ordered version");
+ // "Demote clone" -> demote instance -> "clone demoted"
+ pcmk__order_resource_actions(clone, RSC_DEMOTE, child, RSC_DEMOTE,
+ pe_order_implies_first_printed,
+ clone->cluster);
+ pcmk__order_resource_actions(child, RSC_DEMOTE, clone, RSC_DEMOTED,
+ pe_order_implies_then_printed, clone->cluster);
+
+ // If clone is ordered, order this instance relative to last
+ if ((last != NULL) && pe__clone_is_ordered(clone)) {
+ pcmk__order_resource_actions(child, RSC_DEMOTE, last, RSC_DEMOTE,
+ pe_order_optional, clone->cluster);
}
}
+/*!
+ * \internal
+ * \brief Check whether an instance will be promoted or demoted
+ *
+ * \param[in] rsc Instance to check
+ * \param[in] demoting If \p rsc will be demoted, this will be set to true
+ * \param[in] promoting If \p rsc will be promoted, this will be set to true
+ */
static void
-check_promotable_actions(pe_resource_t *rsc, gboolean *demoting,
- gboolean *promoting)
+check_for_role_change(pe_resource_t *rsc, bool *demoting, bool *promoting)
{
- GList *gIter = NULL;
+ GList *iter = NULL;
- if (rsc->children) {
- gIter = rsc->children;
- for (; gIter != NULL; gIter = gIter->next) {
- pe_resource_t *child = (pe_resource_t *) gIter->data;
-
- check_promotable_actions(child, demoting, promoting);
+ // If this is a cloned group, check group members recursively
+ if (rsc->children != NULL) {
+ for (iter = rsc->children; iter != NULL; iter = iter->next) {
+ check_for_role_change((pe_resource_t *) iter->data,
+ demoting, promoting);
}
return;
}
- CRM_ASSERT(demoting != NULL);
- CRM_ASSERT(promoting != NULL);
-
- gIter = rsc->actions;
- for (; gIter != NULL; gIter = gIter->next) {
- pe_action_t *action = (pe_action_t *) gIter->data;
+ for (iter = rsc->actions; iter != NULL; iter = iter->next) {
+ pe_action_t *action = (pe_action_t *) iter->data;
if (*promoting && *demoting) {
return;
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
continue;
- } else if (pcmk__str_eq(RSC_DEMOTE, action->task, pcmk__str_casei)) {
- *demoting = TRUE;
+ } else if (pcmk__str_eq(RSC_DEMOTE, action->task, pcmk__str_none)) {
+ *demoting = true;
- } else if (pcmk__str_eq(RSC_PROMOTE, action->task, pcmk__str_casei)) {
- *promoting = TRUE;
+ } else if (pcmk__str_eq(RSC_PROMOTE, action->task, pcmk__str_none)) {
+ *promoting = true;
}
}
}
+/*!
+ * \internal
+ * \brief Add promoted-role location constraint scores to an instance's priority
+ *
+ * Adjust a promotable clone instance's promotion priority by the scores of any
+ * location constraints in a list that are both limited to the promoted role and
+ * for the node where the instance will be placed.
+ *
+ * \param[in] child Promotable clone instance
+ * \param[in] location_constraints List of location constraints to apply
+ * \param[in] chosen Node where \p child will be placed
+ */
static void
-apply_promoted_location(pe_resource_t *child, GList *location_constraints,
- pe_node_t *chosen)
+apply_promoted_locations(pe_resource_t *child, GList *location_constraints,
+ pe_node_t *chosen)
{
- CRM_CHECK(child && chosen, return);
- for (GList *gIter = location_constraints; gIter; gIter = gIter->next) {
- pe_node_t *cons_node = NULL;
- pe__location_t *cons = gIter->data;
-
- if (cons->role_filter == RSC_ROLE_PROMOTED) {
- pe_rsc_trace(child, "Applying %s to %s", cons->id, child->id);
- cons_node = pe_find_node_id(cons->node_list_rh, chosen->details->id);
+ for (GList *iter = location_constraints; iter; iter = iter->next) {
+ pe__location_t *location = iter->data;
+ pe_node_t *weighted_node = NULL;
+
+ if (location->role_filter == RSC_ROLE_PROMOTED) {
+ weighted_node = pe_find_node_id(location->node_list_rh,
+ chosen->details->id);
}
- if (cons_node != NULL) {
+ if (weighted_node != NULL) {
int new_priority = pcmk__add_scores(child->priority,
- cons_node->weight);
+ weighted_node->weight);
- pe_rsc_trace(child, "\t%s[%s]: %d -> %d (%d)",
- child->id, cons_node->details->uname, child->priority,
- new_priority, cons_node->weight);
+ pe_rsc_trace(child,
+ "Applying location %s to %s promotion priority on %s: "
+ "%d + %d = %d",
+ location->id, child->id, weighted_node->details->uname,
+ child->priority, weighted_node->weight, new_priority);
child->priority = new_priority;
}
}
}
-static pe_node_t *
-guest_location(pe_node_t *guest_node)
-{
- pe_resource_t *guest = guest_node->details->remote_rsc->container;
-
- return guest->fns->location(guest, NULL, FALSE);
-}
-
+/*!
+ * \internal
+ * \brief Get the node that an instance will be promoted on
+ *
+ * \param[in] rsc Promotable clone instance to check
+ *
+ * \return Node that \p rsc will be promoted on, or NULL if none
+ */
static pe_node_t *
node_to_be_promoted_on(pe_resource_t *rsc)
{
pe_node_t *node = NULL;
pe_node_t *local_node = NULL;
pe_resource_t *parent = uber_parent(rsc);
clone_variant_data_t *clone_data = NULL;
-#if 0
- enum rsc_role_e role = RSC_ROLE_UNKNOWN;
-
- role = rsc->fns->state(rsc, FALSE);
- crm_info("%s role: %s", rsc->id, role2text(role));
-#endif
-
- if (rsc->children) {
- GList *gIter = rsc->children;
-
- for (; gIter != NULL; gIter = gIter->next) {
- pe_resource_t *child = (pe_resource_t *) gIter->data;
+ // If this is a cloned group, bail if any group member can't be promoted
+ if (rsc->children != NULL) {
+ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
+ pe_resource_t *child = (pe_resource_t *) iter->data;
if (node_to_be_promoted_on(child) == NULL) {
- pe_rsc_trace(rsc, "Child %s of %s can't be promoted", child->id, rsc->id);
+ pe_rsc_trace(rsc,
+ "%s can't be promoted because member %s can't",
+ rsc->id, child->id);
return NULL;
}
}
}
node = rsc->fns->location(rsc, NULL, FALSE);
if (node == NULL) {
- pe_rsc_trace(rsc, "%s cannot be promoted: not allocated", rsc->id);
+ pe_rsc_trace(rsc, "%s can't be promoted because it won't be active",
+ rsc->id);
return NULL;
} else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
if (rsc->fns->state(rsc, TRUE) == RSC_ROLE_PROMOTED) {
- crm_notice("Forcing unmanaged instance %s to remain promoted on %s",
+ crm_notice("Unmanaged instance %s will be left promoted on %s",
rsc->id, node->details->uname);
-
} else {
+ pe_rsc_trace(rsc, "%s can't be promoted because it is unmanaged",
+ rsc->id);
return NULL;
}
} else if (rsc->priority < 0) {
- pe_rsc_trace(rsc, "%s cannot be promoted: preference: %d",
+ pe_rsc_trace(rsc,
+ "%s can't be promoted because its promotion priority %d "
+ "is negative",
rsc->id, rsc->priority);
return NULL;
- } else if (!pcmk__node_available(node)) {
- crm_trace("Node can't run any resources: %s", node->details->uname);
- return NULL;
-
- /* @TODO It's possible this check should be done in pcmk__node_available()
- * instead. We should investigate all its callers to figure out whether that
- * would be a good idea.
- */
- } else if (pe__is_guest_node(node) && (guest_location(node) == NULL)) {
- pe_rsc_trace(rsc, "%s cannot be promoted: guest %s not allocated",
- rsc->id, node->details->remote_rsc->container->id);
+ } else if (!pcmk__node_available(node, false, true)) {
+ pe_rsc_trace(rsc, "%s can't be promoted because %s can't run resources",
+ rsc->id, node->details->uname);
return NULL;
}
get_clone_variant_data(clone_data, parent);
local_node = pe_hash_table_lookup(parent->allowed_nodes, node->details->id);
if (local_node == NULL) {
- crm_err("%s cannot run on %s: node not allowed", rsc->id, node->details->uname);
+ /* It should not be possible for the scheduler to have allocated the
+ * instance to a node where its parent is not allowed, but it's good to
+ * have a fail-safe.
+ */
+ if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ crm_warn("%s can't be promoted because %s is not allowed on %s "
+ "(scheduler bug?)",
+ rsc->id, parent->id, node->details->uname);
+ } // else the instance is unmanaged and already promoted
return NULL;
- } else if ((local_node->count < clone_data->promoted_node_max)
- || !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
- return local_node;
-
- } else {
- pe_rsc_trace(rsc, "%s cannot be promoted on %s: node full",
+ } else if ((local_node->count >= clone_data->promoted_node_max)
+ && pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ pe_rsc_trace(rsc,
+ "%s can't be promoted because %s has "
+ "maximum promoted instances already",
rsc->id, node->details->uname);
+ return NULL;
}
- return NULL;
+ return local_node;
}
+/*!
+ * \internal
+ * \brief Compare two promotable clone instances by promotion priority
+ *
+ * \param[in] a First instance to compare
+ * \param[in] b Second instance to compare
+ *
+ * \return A negative number if \p a has higher promotion priority,
+ * a positive number if \p b has higher promotion priority,
+ * or 0 if promotion priorities are equal
+ */
static gint
-sort_promotable_instance(gconstpointer a, gconstpointer b, gpointer data_set)
+cmp_promotable_instance(gconstpointer a, gconstpointer b)
{
- int rc;
+ const pe_resource_t *rsc1 = (const pe_resource_t *) a;
+ const pe_resource_t *rsc2 = (const pe_resource_t *) b;
+
enum rsc_role_e role1 = RSC_ROLE_UNKNOWN;
enum rsc_role_e role2 = RSC_ROLE_UNKNOWN;
- const pe_resource_t *resource1 = (const pe_resource_t *)a;
- const pe_resource_t *resource2 = (const pe_resource_t *)b;
-
- CRM_ASSERT(resource1 != NULL);
- CRM_ASSERT(resource2 != NULL);
+ CRM_ASSERT((rsc1 != NULL) && (rsc2 != NULL));
- role1 = resource1->fns->state(resource1, TRUE);
- role2 = resource2->fns->state(resource2, TRUE);
-
- rc = sort_rsc_index(a, b);
- if (rc != 0) {
- crm_trace("%s %c %s (index)", resource1->id, rc < 0 ? '<' : '>', resource2->id);
- return rc;
+ // Check sort index set by pcmk__set_instance_roles()
+ if (rsc1->sort_index > rsc2->sort_index) {
+ pe_rsc_trace(rsc1,
+ "%s has higher promotion priority than %s "
+ "(sort index %d > %d)",
+ rsc1->id, rsc2->id, rsc1->sort_index, rsc2->sort_index);
+ return -1;
+ } else if (rsc1->sort_index < rsc2->sort_index) {
+ pe_rsc_trace(rsc1,
+ "%s has lower promotion priority than %s "
+ "(sort index %d < %d)",
+ rsc1->id, rsc2->id, rsc1->sort_index, rsc2->sort_index);
+ return 1;
}
+ // If those are the same, prefer instance whose current role is higher
+ role1 = rsc1->fns->state(rsc1, TRUE);
+ role2 = rsc2->fns->state(rsc2, TRUE);
if (role1 > role2) {
- crm_trace("%s %c %s (role)", resource1->id, '<', resource2->id);
+ pe_rsc_trace(rsc1,
+ "%s has higher promotion priority than %s "
+ "(higher current role)",
+ rsc1->id, rsc2->id);
return -1;
-
} else if (role1 < role2) {
- crm_trace("%s %c %s (role)", resource1->id, '>', resource2->id);
+ pe_rsc_trace(rsc1,
+ "%s has lower promotion priority than %s "
+ "(lower current role)",
+ rsc1->id, rsc2->id);
return 1;
}
+ // Finally, do normal clone instance sorting
return pcmk__cmp_instance(a, b);
}
static void
promotion_order(pe_resource_t *rsc, pe_working_set_t *data_set)
{
GList *gIter = NULL;
pe_node_t *node = NULL;
pe_node_t *chosen = NULL;
- clone_variant_data_t *clone_data = NULL;
char score[33];
size_t len = sizeof(score);
- get_clone_variant_data(clone_data, rsc);
-
- if (clone_data->added_promoted_constraints) {
+ if (pe__set_clone_flag(rsc, pe__clone_promotion_constrained)
+ == pcmk_rc_already) {
return;
}
- clone_data->added_promoted_constraints = true;
pe_rsc_trace(rsc, "Merging weights for %s", rsc->id);
pe__set_resource_flags(rsc, pe_rsc_merging);
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
pe_rsc_trace(rsc, "Sort index: %s = %d", child->id, child->sort_index);
}
pe__show_node_weights(true, rsc, "Before", rsc->allowed_nodes, data_set);
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
chosen = child->fns->location(child, NULL, FALSE);
if (chosen == NULL || child->sort_index < 0) {
pe_rsc_trace(rsc, "Skipping %s", child->id);
continue;
}
node = (pe_node_t *) pe_hash_table_lookup(rsc->allowed_nodes, chosen->details->id);
CRM_ASSERT(node != NULL);
// Add promotion preferences and rsc_location scores when role=Promoted
score2char_stack(child->sort_index, score, len);
pe_rsc_trace(rsc, "Adding %s to %s from %s", score,
node->details->uname, child->id);
node->weight = pcmk__add_scores(child->sort_index, node->weight);
}
pe__show_node_weights(true, rsc, "Middle", rsc->allowed_nodes, data_set);
gIter = rsc->rsc_cons;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
/* (Re-)add location preferences of resources that a promoted instance
* should/must be colocated with.
*/
if (constraint->dependent_role == RSC_ROLE_PROMOTED) {
enum pe_weights flags = constraint->score == INFINITY ? 0 : pe_weights_rollback;
pe_rsc_trace(rsc, "RHS: %s with %s: %d",
constraint->dependent->id, constraint->primary->id,
constraint->score);
rsc->allowed_nodes = constraint->primary->cmds->merge_weights(
constraint->primary, rsc->id, rsc->allowed_nodes,
constraint->node_attribute,
constraint->score / (float) INFINITY, flags);
}
}
gIter = rsc->rsc_cons_lhs;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(constraint, NULL)) {
continue;
}
/* (Re-)add location preferences of resources that wish to be colocated
* with a promoted instance.
*/
if (constraint->primary_role == RSC_ROLE_PROMOTED) {
pe_rsc_trace(rsc, "LHS: %s with %s: %d",
constraint->dependent->id, constraint->primary->id,
constraint->score);
rsc->allowed_nodes = constraint->dependent->cmds->merge_weights(
constraint->dependent, rsc->id, rsc->allowed_nodes,
constraint->node_attribute,
constraint->score / (float) INFINITY,
pe_weights_rollback|pe_weights_positive);
}
}
// Ban resource from all nodes if it needs a ticket but doesn't have it
pcmk__require_promotion_tickets(rsc);
pe__show_node_weights(true, rsc, "After", rsc->allowed_nodes, data_set);
/* write them back and sort */
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
chosen = child->fns->location(child, NULL, FALSE);
if (!pcmk_is_set(child->flags, pe_rsc_managed)
&& (child->next_role == RSC_ROLE_PROMOTED)) {
child->sort_index = INFINITY;
} else if (chosen == NULL || child->sort_index < 0) {
pe_rsc_trace(rsc, "%s: %d", child->id, child->sort_index);
} else {
node = (pe_node_t *) pe_hash_table_lookup(rsc->allowed_nodes, chosen->details->id);
CRM_ASSERT(node != NULL);
child->sort_index = node->weight;
}
pe_rsc_trace(rsc, "Set sort index: %s = %d", child->id, child->sort_index);
}
- rsc->children = g_list_sort_with_data(rsc->children,
- sort_promotable_instance, data_set);
+ rsc->children = g_list_sort(rsc->children, cmp_promotable_instance);
pe__clear_resource_flags(rsc, pe_rsc_merging);
}
static gboolean
filter_anonymous_instance(pe_resource_t *rsc, const pe_node_t *node)
{
GList *rIter = NULL;
char *key = clone_strip(rsc->id);
pe_resource_t *parent = uber_parent(rsc);
for (rIter = parent->children; rIter; rIter = rIter->next) {
/* If there is an active instance on the node, only it receives the
* promotion score. Use ->find_rsc() in case this is a cloned group.
*/
pe_resource_t *child = rIter->data;
pe_resource_t *active = parent->fns->find_rsc(child, key, node, pe_find_clone|pe_find_current);
if(rsc == active) {
pe_rsc_trace(rsc, "Found %s for %s active on %s: done", active->id, key, node->details->uname);
free(key);
return TRUE;
} else if(active) {
pe_rsc_trace(rsc, "Found %s for %s on %s: not %s", active->id, key, node->details->uname, rsc->id);
free(key);
return FALSE;
} else {
pe_rsc_trace(rsc, "%s on %s: not active", key, node->details->uname);
}
}
for (rIter = parent->children; rIter; rIter = rIter->next) {
pe_resource_t *child = rIter->data;
/*
* We know it's not running, but any score will still count if
* the instance has been probed on $node
*
* Again use ->find_rsc() because we might be a cloned group
* and knowing that other members of the group are known here
* implies nothing
*/
rsc = parent->fns->find_rsc(child, key, NULL, pe_find_clone);
CRM_LOG_ASSERT(rsc);
if(rsc) {
pe_rsc_trace(rsc, "Checking %s for %s on %s", rsc->id, key, node->details->uname);
if (g_hash_table_lookup(rsc->known_on, node->details->id)) {
free(key);
return TRUE;
}
}
}
free(key);
return FALSE;
}
static const char *
lookup_promotion_score(pe_resource_t *rsc, const pe_node_t *node, const char *name)
{
const char *attr_value = NULL;
if (node && name) {
char *attr_name = pcmk_promotion_score_name(name);
attr_value = pe_node_attribute_calculated(node, attr_name, rsc);
free(attr_name);
}
return attr_value;
}
static int
promotion_score(pe_resource_t *rsc, const pe_node_t *node, int not_set_value)
{
char *name = rsc->id;
const char *attr_value = NULL;
int score = not_set_value;
pe_node_t *match = NULL;
CRM_CHECK(node != NULL, return not_set_value);
if (rsc->children) {
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
int c_score = promotion_score(child, node, not_set_value);
if (score == not_set_value) {
score = c_score;
} else {
score += c_score;
}
}
return score;
}
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)
&& filter_anonymous_instance(rsc, node)) {
pe_rsc_trace(rsc, "Anonymous clone %s is allowed on %s", rsc->id, node->details->uname);
} else if (rsc->running_on || g_hash_table_size(rsc->known_on)) {
/* If we've probed and/or started the resource anywhere, consider
* promotion scores only from nodes where we know the status. However,
* if the status of all nodes is unknown (e.g. cluster startup),
* skip this code, to make sure we take into account any permanent
* promotion scores set previously.
*/
pe_node_t *known = pe_hash_table_lookup(rsc->known_on, node->details->id);
match = pe_find_node_id(rsc->running_on, node->details->id);
if ((match == NULL) && (known == NULL)) {
pe_rsc_trace(rsc, "skipping %s (aka. %s) promotion score on %s because inactive",
rsc->id, rsc->clone_name, node->details->uname);
return score;
}
}
match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (match == NULL) {
return score;
} else if (match->weight < 0) {
pe_rsc_trace(rsc, "%s on %s has score: %d - ignoring",
rsc->id, match->details->uname, match->weight);
return score;
}
if (rsc->clone_name) {
/* Use the name the lrm knows this resource as,
* since that's what crm_attribute --promotion would have used
*/
name = rsc->clone_name;
}
attr_value = lookup_promotion_score(rsc, node, name);
pe_rsc_trace(rsc, "Promotion score for %s on %s = %s",
name, node->details->uname, pcmk__s(attr_value, "(unset)"));
if ((attr_value == NULL) && !pcmk_is_set(rsc->flags, pe_rsc_unique)) {
/* If we don't have any LRM history yet, we won't have clone_name -- in
* that case, for anonymous clones, try the resource name without any
* instance number.
*/
name = clone_strip(rsc->id);
if (strcmp(rsc->id, name)) {
attr_value = lookup_promotion_score(rsc, node, name);
pe_rsc_trace(rsc, "Stripped promotion score for %s on %s = %s",
name, node->details->uname,
pcmk__s(attr_value, "(unset)"));
}
free(name);
}
if (attr_value != NULL) {
score = char2score(attr_value);
}
return score;
}
void
pcmk__add_promotion_scores(pe_resource_t *rsc)
{
int score, new_score;
GList *gIter = rsc->children;
- clone_variant_data_t *clone_data = NULL;
- get_clone_variant_data(clone_data, rsc);
-
- if (clone_data->added_promotion_scores) {
- /* Make sure we only do this once */
+ if (pe__set_clone_flag(rsc, pe__clone_promotion_added) == pcmk_rc_already) {
return;
}
- clone_data->added_promotion_scores = true;
-
for (; gIter != NULL; gIter = gIter->next) {
GHashTableIter iter;
pe_node_t *node = NULL;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
g_hash_table_iter_init(&iter, child_rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
- if (!pcmk__node_available(node)) {
+ if (!pcmk__node_available(node, false, false)) {
/* This node will never be promoted, so don't apply the
* promotion score, as that may lead to clone shuffling.
*/
continue;
}
score = promotion_score(child_rsc, node, 0);
if (score > 0) {
new_score = pcmk__add_scores(node->weight, score);
if (new_score != node->weight) {
pe_rsc_trace(rsc, "\t%s: Updating preference for %s (%d->%d)",
child_rsc->id, node->details->uname, node->weight, new_score);
node->weight = new_score;
}
}
new_score = QB_MAX(child_rsc->priority, score);
if (new_score != child_rsc->priority) {
pe_rsc_trace(rsc, "\t%s: Updating priority (%d->%d)",
child_rsc->id, child_rsc->priority, new_score);
child_rsc->priority = new_score;
}
}
}
}
static void
set_role_unpromoted(pe_resource_t *rsc, bool current)
{
GList *gIter = rsc->children;
if (current) {
if (rsc->role == RSC_ROLE_STARTED) {
rsc->role = RSC_ROLE_UNPROMOTED;
}
} else {
GList *allocated = NULL;
rsc->fns->location(rsc, &allocated, FALSE);
pe__set_next_role(rsc, (allocated? RSC_ROLE_UNPROMOTED : RSC_ROLE_STOPPED),
"unpromoted instance");
g_list_free(allocated);
}
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
set_role_unpromoted(child_rsc, current);
}
}
static void
set_role_promoted(pe_resource_t *rsc, gpointer user_data)
{
if (rsc->next_role == RSC_ROLE_UNKNOWN) {
pe__set_next_role(rsc, RSC_ROLE_PROMOTED, "promoted instance");
}
g_list_foreach(rsc->children, (GFunc) set_role_promoted, NULL);
}
pe_node_t *
pcmk__set_instance_roles(pe_resource_t *rsc, pe_working_set_t *data_set)
{
int promoted = 0;
GList *gIter = NULL;
GList *gIter2 = NULL;
GHashTableIter iter;
pe_node_t *node = NULL;
pe_node_t *chosen = NULL;
enum rsc_role_e next_role = RSC_ROLE_UNKNOWN;
char score[33];
size_t len = sizeof(score);
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
// Repurpose count to track the number of promoted instances allocated
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
node->count = 0;
}
/*
* assign priority
*/
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
GList *list = NULL;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_rsc_trace(rsc, "Assigning priority for %s: %s", child_rsc->id,
role2text(child_rsc->next_role));
if (child_rsc->fns->state(child_rsc, TRUE) == RSC_ROLE_STARTED) {
set_role_unpromoted(child_rsc, true);
}
chosen = child_rsc->fns->location(child_rsc, &list, FALSE);
if (pcmk__list_of_multiple(list)) {
pcmk__config_err("Cannot promote non-colocated child %s",
child_rsc->id);
}
g_list_free(list);
if (chosen == NULL) {
continue;
}
next_role = child_rsc->fns->state(child_rsc, FALSE);
switch (next_role) {
case RSC_ROLE_STARTED:
case RSC_ROLE_UNKNOWN:
/*
* Default to -1 if no value is set
*
* This allows instances eligible for promotion to be specified
* based solely on rsc_location constraints,
* but prevents anyone from being promoted if
* neither a constraint nor a promotion score is present
*/
child_rsc->priority = promotion_score(child_rsc, chosen, -1);
break;
case RSC_ROLE_UNPROMOTED:
case RSC_ROLE_STOPPED:
child_rsc->priority = -INFINITY;
break;
case RSC_ROLE_PROMOTED:
/* We will arrive here if we're re-creating actions after a stonith
*/
break;
default:
CRM_CHECK(FALSE /* unhandled */ ,
crm_err("Unknown resource role: %d for %s", next_role, child_rsc->id));
}
- apply_promoted_location(child_rsc, child_rsc->rsc_location, chosen);
- apply_promoted_location(child_rsc, rsc->rsc_location, chosen);
+ apply_promoted_locations(child_rsc, child_rsc->rsc_location, chosen);
+ apply_promoted_locations(child_rsc, rsc->rsc_location, chosen);
for (gIter2 = child_rsc->rsc_cons; gIter2 != NULL; gIter2 = gIter2->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter2->data;
child_rsc->cmds->rsc_colocation_lh(child_rsc, cons->primary, cons,
data_set);
}
child_rsc->sort_index = child_rsc->priority;
pe_rsc_trace(rsc, "Assigning priority for %s: %d", child_rsc->id, child_rsc->priority);
if (next_role == RSC_ROLE_PROMOTED) {
child_rsc->sort_index = INFINITY;
}
}
pe__show_node_weights(true, rsc, "Pre merge", rsc->allowed_nodes, data_set);
promotion_order(rsc, data_set);
// Choose the first N eligible instances to be promoted
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
score2char_stack(child_rsc->sort_index, score, len);
chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
if (pcmk_is_set(data_set->flags, pe_flag_show_scores) && !pcmk__is_daemon) {
if (data_set->priv != NULL) {
pcmk__output_t *out = data_set->priv;
out->message(out, "promotion-score", child_rsc, chosen, score);
}
} else {
pe_rsc_trace(rsc, "%s promotion score on %s: %s", child_rsc->id,
(chosen? chosen->details->uname : "none"), score);
}
chosen = NULL; /* nuke 'chosen' so that we don't promote more than the
* required number of instances
*/
if (child_rsc->sort_index < 0) {
pe_rsc_trace(rsc, "Not supposed to promote child: %s", child_rsc->id);
} else if ((promoted < clone_data->promoted_max)
|| !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
chosen = node_to_be_promoted_on(child_rsc);
}
pe_rsc_debug(rsc, "%s promotion score: %d", child_rsc->id, child_rsc->priority);
if (chosen == NULL) {
set_role_unpromoted(child_rsc, false);
continue;
} else if ((child_rsc->role < RSC_ROLE_PROMOTED)
&& !pcmk_is_set(data_set->flags, pe_flag_have_quorum)
&& data_set->no_quorum_policy == no_quorum_freeze) {
crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze",
child_rsc->id, role2text(child_rsc->role), role2text(child_rsc->next_role));
set_role_unpromoted(child_rsc, false);
continue;
}
chosen->count++;
pe_rsc_info(rsc, "Promoting %s (%s %s)",
child_rsc->id, role2text(child_rsc->role), chosen->details->uname);
set_role_promoted(child_rsc, NULL);
promoted++;
}
pe_rsc_info(rsc, "%s: Promoted %d instances of a possible %d",
rsc->id, promoted, clone_data->promoted_max);
return NULL;
}
void
create_promotable_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
{
pe_action_t *action = NULL;
GList *gIter = rsc->children;
pe_action_t *action_complete = NULL;
- gboolean any_promoting = FALSE;
- gboolean any_demoting = FALSE;
- pe_resource_t *last_promote_rsc = NULL;
- pe_resource_t *last_demote_rsc = NULL;
+ bool any_promoting = false;
+ bool any_demoting = false;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
pe_rsc_debug(rsc, "Creating actions for %s", rsc->id);
for (; gIter != NULL; gIter = gIter->next) {
- gboolean child_promoting = FALSE;
- gboolean child_demoting = FALSE;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_rsc_trace(rsc, "Creating actions for %s", child_rsc->id);
child_rsc->cmds->create_actions(child_rsc, data_set);
- check_promotable_actions(child_rsc, &child_demoting, &child_promoting);
-
- any_demoting = any_demoting || child_demoting;
- any_promoting = any_promoting || child_promoting;
- pe_rsc_trace(rsc, "Created actions for %s: %d %d", child_rsc->id, child_promoting,
- child_demoting);
+ check_for_role_change(child_rsc, &any_demoting, &any_promoting);
}
/* promote */
action = pcmk__new_rsc_pseudo_action(rsc, RSC_PROMOTE, !any_promoting,
true);
action_complete = pcmk__new_rsc_pseudo_action(rsc, RSC_PROMOTED,
!any_promoting, true);
action_complete->priority = INFINITY;
- child_promoting_constraints(clone_data, pe_order_optional,
- rsc, NULL, last_promote_rsc, data_set);
-
if (clone_data->promote_notify == NULL) {
clone_data->promote_notify = pcmk__clone_notif_pseudo_ops(rsc,
RSC_PROMOTE,
action,
action_complete);
}
/* demote */
action = pcmk__new_rsc_pseudo_action(rsc, RSC_DEMOTE, !any_demoting, true);
action_complete = pcmk__new_rsc_pseudo_action(rsc, RSC_DEMOTED,
!any_demoting, true);
action_complete->priority = INFINITY;
- child_demoting_constraints(clone_data, pe_order_optional, rsc, NULL, last_demote_rsc, data_set);
-
if (clone_data->demote_notify == NULL) {
clone_data->demote_notify = pcmk__clone_notif_pseudo_ops(rsc,
RSC_DEMOTE,
action,
action_complete);
if (clone_data->promote_notify) {
/* If we ever wanted groups to have notifications we'd need to move this to native_internal_constraints() one day
* Requires exposing *_notify
*/
order_actions(clone_data->stop_notify->post_done, clone_data->promote_notify->pre,
pe_order_optional);
order_actions(clone_data->start_notify->post_done, clone_data->promote_notify->pre,
pe_order_optional);
order_actions(clone_data->demote_notify->post_done, clone_data->promote_notify->pre,
pe_order_optional);
order_actions(clone_data->demote_notify->post_done, clone_data->start_notify->pre,
pe_order_optional);
order_actions(clone_data->demote_notify->post_done, clone_data->stop_notify->pre,
pe_order_optional);
}
}
/* restore the correct priority */
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->priority = rsc->priority;
}
}
void
promote_demote_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
{
/* global stopped before start */
pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START,
pe_order_optional, data_set);
/* global stopped before promote */
pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_PROMOTE,
pe_order_optional, data_set);
/* global demoted before start */
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_START,
pe_order_optional, data_set);
/* global started before promote */
pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE,
pe_order_optional, data_set);
/* global demoted before stop */
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP,
pe_order_optional, data_set);
/* global demote before demoted */
pcmk__order_resource_actions(rsc, RSC_DEMOTE, rsc, RSC_DEMOTED,
pe_order_optional, data_set);
/* global demoted before promote */
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_PROMOTE,
pe_order_optional, data_set);
}
void
promotable_constraints(pe_resource_t * rsc, pe_working_set_t * data_set)
{
GList *gIter = rsc->children;
pe_resource_t *last_rsc = NULL;
- clone_variant_data_t *clone_data = NULL;
-
- get_clone_variant_data(clone_data, rsc);
promote_demote_constraints(rsc, data_set);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
/* child demote before promote */
pcmk__order_resource_actions(child_rsc, RSC_DEMOTE, child_rsc,
RSC_PROMOTE, pe_order_optional, data_set);
- child_promoting_constraints(clone_data, pe_order_optional,
- rsc, child_rsc, last_rsc, data_set);
-
- child_demoting_constraints(clone_data, pe_order_optional,
- rsc, child_rsc, last_rsc, data_set);
+ order_instance_promotion(rsc, child_rsc, last_rsc);
+ order_instance_demotion(rsc, child_rsc, last_rsc);
last_rsc = child_rsc;
}
}
static void
node_hash_update_one(GHashTable * hash, pe_node_t * other, const char *attr, int score)
{
GHashTableIter iter;
pe_node_t *node = NULL;
const char *value = NULL;
if (other == NULL) {
return;
} else if (attr == NULL) {
attr = CRM_ATTR_UNAME;
}
value = pe_node_attribute_raw(other, attr);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
const char *tmp = pe_node_attribute_raw(node, attr);
if (pcmk__str_eq(value, tmp, pcmk__str_casei)) {
crm_trace("%s: %d + %d", node->details->uname, node->weight, other->weight);
node->weight = pcmk__add_scores(node->weight, score);
}
}
}
void
promotable_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
GList *gIter = NULL;
if (pcmk_is_set(dependent->flags, pe_rsc_provisional)) {
GList *affected_nodes = NULL;
for (gIter = primary->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, FALSE);
pe_rsc_trace(primary, "Processing: %s", child_rsc->id);
if ((chosen != NULL) && (next_role == constraint->primary_role)) {
pe_rsc_trace(primary, "Applying: %s %s %s %d", child_rsc->id,
role2text(next_role), chosen->details->uname, constraint->score);
if (constraint->score < INFINITY) {
node_hash_update_one(dependent->allowed_nodes, chosen,
constraint->node_attribute, constraint->score);
}
affected_nodes = g_list_prepend(affected_nodes, chosen);
}
}
/* Only do this if it's not a promoted-with-promoted colocation. Doing
* this unconditionally would prevent unpromoted instances from being
* started.
*/
if ((constraint->dependent_role != RSC_ROLE_PROMOTED)
|| (constraint->primary_role != RSC_ROLE_PROMOTED)) {
if (constraint->score >= INFINITY) {
node_list_exclude(dependent->allowed_nodes, affected_nodes,
TRUE);
}
}
g_list_free(affected_nodes);
} else if (constraint->dependent_role == RSC_ROLE_PROMOTED) {
pe_resource_t *primary_instance;
primary_instance = find_compatible_child(dependent, primary,
constraint->primary_role,
FALSE, data_set);
if ((primary_instance == NULL) && (constraint->score >= INFINITY)) {
pe_rsc_trace(dependent, "%s can't be promoted %s",
dependent->id, constraint->id);
dependent->priority = -INFINITY;
} else if (primary_instance != NULL) {
int new_priority = pcmk__add_scores(dependent->priority,
constraint->score);
pe_rsc_debug(dependent, "Applying %s to %s",
constraint->id, dependent->id);
pe_rsc_debug(dependent, "\t%s: %d->%d",
dependent->id, dependent->priority, new_priority);
dependent->priority = new_priority;
}
}
return;
}
diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c
index 3cf19806ad..67530dd4a4 100644
--- a/lib/pacemaker/pcmk_sched_resource.c
+++ b/lib/pacemaker/pcmk_sched_resource.c
@@ -1,1091 +1,1092 @@
/*
* Copyright 2014-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdlib.h>
#include <string.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
// Resource allocation methods that vary by resource variant
static resource_alloc_functions_t allocation_methods[] = {
{
pcmk__native_merge_weights,
pcmk__native_allocate,
native_create_actions,
native_create_probe,
native_internal_constraints,
native_rsc_colocation_lh,
native_rsc_colocation_rh,
pcmk__colocated_resources,
native_rsc_location,
native_action_flags,
native_update_actions,
pcmk__output_resource_actions,
native_expand,
native_append_meta,
pcmk__primitive_add_utilization,
pcmk__primitive_shutdown_lock,
},
{
pcmk__group_merge_weights,
pcmk__group_allocate,
group_create_actions,
native_create_probe,
group_internal_constraints,
group_rsc_colocation_lh,
group_rsc_colocation_rh,
pcmk__group_colocated_resources,
group_rsc_location,
group_action_flags,
group_update_actions,
pcmk__output_resource_actions,
group_expand,
group_append_meta,
pcmk__group_add_utilization,
pcmk__group_shutdown_lock,
},
{
pcmk__native_merge_weights,
pcmk__clone_allocate,
clone_create_actions,
clone_create_probe,
clone_internal_constraints,
clone_rsc_colocation_lh,
clone_rsc_colocation_rh,
pcmk__colocated_resources,
clone_rsc_location,
clone_action_flags,
pcmk__multi_update_actions,
pcmk__output_resource_actions,
clone_expand,
clone_append_meta,
pcmk__clone_add_utilization,
pcmk__clone_shutdown_lock,
},
{
pcmk__native_merge_weights,
pcmk__bundle_allocate,
pcmk__bundle_create_actions,
pcmk__bundle_create_probe,
pcmk__bundle_internal_constraints,
pcmk__bundle_rsc_colocation_lh,
pcmk__bundle_rsc_colocation_rh,
pcmk__colocated_resources,
pcmk__bundle_rsc_location,
pcmk__bundle_action_flags,
pcmk__multi_update_actions,
pcmk__output_bundle_actions,
pcmk__bundle_expand,
pcmk__bundle_append_meta,
pcmk__bundle_add_utilization,
pcmk__bundle_shutdown_lock,
}
};
/*!
* \internal
* \brief Check whether a resource's agent standard, provider, or type changed
*
* \param[in] rsc Resource to check
* \param[in] node Node needing unfencing/restart if agent changed
* \param[in] rsc_entry XML with previously known agent information
* \param[in] active_on_node Whether \p rsc is active on \p node
*
* \return true if agent for \p rsc changed, otherwise false
*/
bool
pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node,
const xmlNode *rsc_entry, bool active_on_node)
{
bool changed = false;
const char *attr_list[] = {
XML_ATTR_TYPE,
XML_AGENT_ATTR_CLASS,
XML_AGENT_ATTR_PROVIDER
};
for (int i = 0; i < PCMK__NELEM(attr_list); i++) {
const char *value = crm_element_value(rsc->xml, attr_list[i]);
const char *old_value = crm_element_value(rsc_entry, attr_list[i]);
if (!pcmk__str_eq(value, old_value, pcmk__str_none)) {
changed = true;
trigger_unfencing(rsc, node, "Device definition changed", NULL,
rsc->cluster);
if (active_on_node) {
crm_notice("Forcing restart of %s on %s "
"because %s changed from '%s' to '%s'",
rsc->id, node->details->uname, attr_list[i],
pcmk__s(old_value, ""), pcmk__s(value, ""));
}
}
}
if (changed && active_on_node) {
// Make sure the resource is restarted
custom_action(rsc, stop_key(rsc), CRMD_ACTION_STOP, node, FALSE, TRUE,
rsc->cluster);
pe__set_resource_flags(rsc, pe_rsc_start_pending);
}
return changed;
}
/*!
* \internal
* \brief Add resource (and any matching children) to list if it matches ID
*
* \param[in] result List to add resource to
* \param[in] rsc Resource to check
* \param[in] id ID to match
*
* \return (Possibly new) head of list
*/
static GList *
add_rsc_if_matching(GList *result, pe_resource_t *rsc, const char *id)
{
if ((strcmp(rsc->id, id) == 0)
|| ((rsc->clone_name != NULL) && (strcmp(rsc->clone_name, id) == 0))) {
result = g_list_prepend(result, rsc);
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
result = add_rsc_if_matching(result, child, id);
}
return result;
}
/*!
* \internal
* \brief Find all resources matching a given ID by either ID or clone name
*
* \param[in] id Resource ID to check
* \param[in] data_set Cluster working set
*
* \return List of all resources that match \p id
* \note The caller is responsible for freeing the return value with
* g_list_free().
*/
GList *
pcmk__rscs_matching_id(const char *id, pe_working_set_t *data_set)
{
GList *result = NULL;
CRM_CHECK((id != NULL) && (data_set != NULL), return NULL);
for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
result = add_rsc_if_matching(result, (pe_resource_t *) iter->data, id);
}
return result;
}
/*!
* \internal
* \brief Set the variant-appropriate allocation methods for a resource
*
* \param[in] rsc Resource to set allocation methods for
* \param[in] ignored Only here so function can be used with g_list_foreach()
*/
static void
set_allocation_methods_for_rsc(pe_resource_t *rsc, void *ignored)
{
rsc->cmds = &allocation_methods[rsc->variant];
g_list_foreach(rsc->children, (GFunc) set_allocation_methods_for_rsc, NULL);
}
/*!
* \internal
* \brief Set the variant-appropriate allocation methods for all resources
*
* \param[in] data_set Cluster working set
*/
void
pcmk__set_allocation_methods(pe_working_set_t *data_set)
{
g_list_foreach(data_set->resources, (GFunc) set_allocation_methods_for_rsc,
NULL);
}
// Shared implementation of resource_alloc_functions_t:colocated_resources()
GList *
pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs)
{
GList *gIter = NULL;
if (orig_rsc == NULL) {
orig_rsc = rsc;
}
if ((rsc == NULL) || (g_list_find(colocated_rscs, rsc) != NULL)) {
return colocated_rscs;
}
pe_rsc_trace(orig_rsc, "%s is in colocation chain with %s",
rsc->id, orig_rsc->id);
colocated_rscs = g_list_append(colocated_rscs, rsc);
// Follow colocations where this resource is the dependent resource
for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pe_resource_t *primary = constraint->primary;
if (primary == orig_rsc) {
continue; // Break colocation loop
}
if ((constraint->score == INFINITY) &&
(pcmk__colocation_affects(rsc, primary, constraint,
true) == pcmk__coloc_affects_location)) {
colocated_rscs = primary->cmds->colocated_resources(primary,
orig_rsc,
colocated_rscs);
}
}
// Follow colocations where this resource is the primary resource
for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pe_resource_t *dependent = constraint->dependent;
if (dependent == orig_rsc) {
continue; // Break colocation loop
}
if (pe_rsc_is_clone(rsc) && !pe_rsc_is_clone(dependent)) {
continue; // We can't be sure whether dependent will be colocated
}
if ((constraint->score == INFINITY) &&
(pcmk__colocation_affects(dependent, rsc, constraint,
true) == pcmk__coloc_affects_location)) {
colocated_rscs = dependent->cmds->colocated_resources(dependent,
orig_rsc,
colocated_rscs);
}
}
return colocated_rscs;
}
void
pcmk__output_resource_actions(pe_resource_t *rsc)
{
pcmk__output_t *out = rsc->cluster->priv;
pe_node_t *next = NULL;
pe_node_t *current = NULL;
if (rsc->children != NULL) {
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
child->cmds->output_actions(child);
}
return;
}
next = rsc->allocated_to;
if (rsc->running_on) {
current = pe__current_node(rsc);
if (rsc->role == RSC_ROLE_STOPPED) {
/*
* This can occur when resources are being recovered
* We fiddle with the current role in native_create_actions()
*/
rsc->role = RSC_ROLE_STARTED;
}
}
if ((current == NULL) && pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
/* Don't log stopped orphans */
return;
}
out->message(out, "rsc-action", rsc, current, next);
}
/*!
* \internal
* \brief Assign a specified primitive resource to a node
*
* Assign a specified primitive resource to a specified node, if the node can
* run the resource (or unconditionally, if \p force is true). Mark the resource
* as no longer provisional. If the primitive can't be assigned (or \p chosen is
* NULL), unassign any previous assignment for it, set its next role to stopped,
* and update any existing actions scheduled for it. This is not done
* recursively for children, so it should be called only for primitives.
*
* \param[in] rsc Resource to assign
* \param[in] chosen Node to assign \p rsc to
* \param[in] force If true, assign to \p chosen even if unavailable
*
* \return true if \p rsc could be assigned, otherwise false
*
* \note Assigning a resource to the NULL node using this function is different
* from calling pcmk__unassign_resource(), in that it will also update any
* actions created for the resource.
*/
bool
pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force)
{
pcmk__output_t *out = rsc->cluster->priv;
CRM_ASSERT(rsc->variant == pe_native);
if (!force && (chosen != NULL)) {
if ((chosen->weight < 0)
// Allow the graph to assume that guest node connections will come up
- || (!pcmk__node_available(chosen) && !pe__is_guest_node(chosen))) {
+ || (!pcmk__node_available(chosen, true, false)
+ && !pe__is_guest_node(chosen))) {
crm_debug("All nodes for resource %s are unavailable, unclean or "
"shutting down (%s can%s run resources, with weight %d)",
rsc->id, chosen->details->uname,
- (pcmk__node_available(chosen)? "" : "not"),
+ (pcmk__node_available(chosen, true, false)? "" : "not"),
chosen->weight);
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability");
chosen = NULL;
}
}
pcmk__unassign_resource(rsc);
pe__clear_resource_flags(rsc, pe_rsc_provisional);
if (chosen == NULL) {
crm_debug("Could not allocate a node for %s", rsc->id);
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to allocate");
for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
pe_action_t *op = (pe_action_t *) iter->data;
crm_debug("Updating %s for allocation failure", op->uuid);
if (pcmk__str_eq(op->task, RSC_STOP, pcmk__str_casei)) {
pe__clear_action_flags(op, pe_action_optional);
} else if (pcmk__str_eq(op->task, RSC_START, pcmk__str_casei)) {
pe__clear_action_flags(op, pe_action_runnable);
//pe__set_resource_flags(rsc, pe_rsc_block);
} else {
// Cancel recurring actions, unless for stopped state
const char *interval_ms_s = NULL;
const char *target_rc_s = NULL;
char *rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING);
interval_ms_s = g_hash_table_lookup(op->meta,
XML_LRM_ATTR_INTERVAL_MS);
target_rc_s = g_hash_table_lookup(op->meta,
XML_ATTR_TE_TARGET_RC);
if ((interval_ms_s != NULL)
&& !pcmk__str_eq(interval_ms_s, "0", pcmk__str_none)
&& !pcmk__str_eq(rc_stopped, target_rc_s, pcmk__str_none)) {
pe__clear_action_flags(op, pe_action_runnable);
}
free(rc_stopped);
}
}
return false;
}
crm_debug("Assigning %s to %s", rsc->id, chosen->details->uname);
rsc->allocated_to = pe__copy_node(chosen);
chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc,
rsc);
chosen->details->num_resources++;
chosen->count++;
pcmk__consume_node_capacity(chosen->details->utilization, rsc);
if (pcmk_is_set(rsc->cluster->flags, pe_flag_show_utilization)) {
out->message(out, "resource-util", rsc, chosen, __func__);
}
return true;
}
/*!
* \internal
* \brief Assign a specified resource (of any variant) to a node
*
* Assign a specified resource and its children (if any) to a specified node, if
* the node can run the resource (or unconditionally, if \p force is true). Mark
* the resources as no longer provisional. If the resources can't be assigned
* (or \p chosen is NULL), unassign any previous assignments, set next role to
* stopped, and update any existing actions scheduled for them.
*
* \param[in] rsc Resource to assign
* \param[in] chosen Node to assign \p rsc to
* \param[in] force If true, assign to \p chosen even if unavailable
*
* \return true if \p rsc could be assigned, otherwise false
*
* \note Assigning a resource to the NULL node using this function is different
* from calling pcmk__unassign_resource(), in that it will also update any
* actions created for the resource.
*/
bool
pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force)
{
bool changed = false;
if (rsc->children == NULL) {
if (rsc->allocated_to != NULL) {
changed = true;
}
pcmk__assign_primitive(rsc, node, force);
} else {
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
changed |= pcmk__assign_resource(child_rsc, node, force);
}
}
return changed;
}
/*!
* \internal
* \brief Remove any assignment of a specified resource to a node
*
* If a specified resource has been assigned to a node, remove that assignment
* and mark the resource as provisional again. This is not done recursively for
* children, so it should be called only for primitives.
*
* \param[in] rsc Resource to unassign
*/
void
pcmk__unassign_resource(pe_resource_t *rsc)
{
pe_node_t *old = rsc->allocated_to;
if (old == NULL) {
return;
}
crm_info("Unassigning %s from %s", rsc->id, old->details->uname);
pe__set_resource_flags(rsc, pe_rsc_provisional);
rsc->allocated_to = NULL;
/* We're going to free the pe_node_t, but its details member is shared and
* will remain, so update that appropriately first.
*/
old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc,
rsc);
old->details->num_resources--;
pcmk__release_node_capacity(old->details->utilization, rsc);
free(old);
}
/*!
* \internal
* \brief Check whether a resource has reached its migration threshold on a node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
* \param[out] failed If the threshold has been reached, this will be set to
* the resource that failed (possibly a parent of \p rsc)
*
* \return true if the migration threshold has been reached, false otherwise
*/
bool
pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node,
pe_resource_t **failed)
{
int fail_count, remaining_tries;
pe_resource_t *rsc_to_ban = rsc;
// Migration threshold of 0 means never force away
if (rsc->migration_threshold == 0) {
return false;
}
// If we're ignoring failures, also ignore the migration threshold
if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) {
return false;
}
// If there are no failures, there's no need to force away
fail_count = pe_get_failcount(node, rsc, NULL,
pe_fc_effective|pe_fc_fillers, NULL,
rsc->cluster);
if (fail_count <= 0) {
return false;
}
// If failed resource is anonymous clone instance, we'll force clone away
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
rsc_to_ban = uber_parent(rsc);
}
// How many more times recovery will be tried on this node
remaining_tries = rsc->migration_threshold - fail_count;
if (remaining_tries <= 0) {
crm_warn("%s cannot run on %s due to reaching migration threshold "
"(clean up resource to allow again)"
CRM_XS " failures=%d migration-threshold=%d",
rsc_to_ban->id, node->details->uname, fail_count,
rsc->migration_threshold);
if (failed != NULL) {
*failed = rsc_to_ban;
}
return true;
}
crm_info("%s can fail %d more time%s on "
"%s before reaching migration threshold (%d)",
rsc_to_ban->id, remaining_tries, pcmk__plural_s(remaining_tries),
node->details->uname, rsc->migration_threshold);
return false;
}
static void *
convert_const_pointer(const void *ptr)
{
/* Worst function ever */
return (void *)ptr;
}
/*!
* \internal
* \brief Get a node's weight
*
* \param[in] node Unweighted node to check (for node ID)
* \param[in] nodes List of weighted nodes to look for \p node in
*
* \return Node's weight, or -INFINITY if not found
*/
static int
get_node_weight(pe_node_t *node, GHashTable *nodes)
{
pe_node_t *weighted_node = NULL;
if ((node != NULL) && (nodes != NULL)) {
weighted_node = g_hash_table_lookup(nodes, node->details->id);
}
return (weighted_node == NULL)? -INFINITY : weighted_node->weight;
}
/*!
* \internal
* \brief Compare two resources according to which should be allocated first
*
* \param[in] a First resource to compare
* \param[in] b Second resource to compare
* \param[in] data Sorted list of all nodes in cluster
*
* \return -1 if \p a should be allocated before \b, 0 if they are equal,
* or +1 if \p a should be allocated after \b
*/
static gint
cmp_resources(gconstpointer a, gconstpointer b, gpointer data)
{
const pe_resource_t *resource1 = a;
const pe_resource_t *resource2 = b;
GList *nodes = (GList *) data;
int rc = 0;
int r1_weight = -INFINITY;
int r2_weight = -INFINITY;
pe_node_t *r1_node = NULL;
pe_node_t *r2_node = NULL;
GHashTable *r1_nodes = NULL;
GHashTable *r2_nodes = NULL;
const char *reason = NULL;
// Resources with highest priority should be allocated first
reason = "priority";
r1_weight = resource1->priority;
r2_weight = resource2->priority;
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
// We need nodes to make any other useful comparisons
reason = "no node list";
if (nodes == NULL) {
goto done;
}
// Calculate and log node weights
r1_nodes = pcmk__native_merge_weights(convert_const_pointer(resource1),
resource1->id, NULL, NULL, 1,
pe_weights_forward | pe_weights_init);
r2_nodes = pcmk__native_merge_weights(convert_const_pointer(resource2),
resource2->id, NULL, NULL, 1,
pe_weights_forward | pe_weights_init);
pe__show_node_weights(true, NULL, resource1->id, r1_nodes,
resource1->cluster);
pe__show_node_weights(true, NULL, resource2->id, r2_nodes,
resource2->cluster);
// The resource with highest score on its current node goes first
reason = "current location";
if (resource1->running_on != NULL) {
r1_node = pe__current_node(resource1);
}
if (resource2->running_on != NULL) {
r2_node = pe__current_node(resource2);
}
r1_weight = get_node_weight(r1_node, r1_nodes);
r2_weight = get_node_weight(r2_node, r2_nodes);
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
// Otherwise a higher weight on any node will do
reason = "score";
for (GList *iter = nodes; iter != NULL; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
r1_weight = get_node_weight(node, r1_nodes);
r2_weight = get_node_weight(node, r2_nodes);
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
}
done:
crm_trace("%s (%d)%s%s %c %s (%d)%s%s: %s",
resource1->id, r1_weight,
((r1_node == NULL)? "" : " on "),
((r1_node == NULL)? "" : r1_node->details->id),
((rc < 0)? '>' : ((rc > 0)? '<' : '=')),
resource2->id, r2_weight,
((r2_node == NULL)? "" : " on "),
((r2_node == NULL)? "" : r2_node->details->id),
reason);
if (r1_nodes != NULL) {
g_hash_table_destroy(r1_nodes);
}
if (r2_nodes != NULL) {
g_hash_table_destroy(r2_nodes);
}
return rc;
}
/*!
* \internal
* \brief Sort resources in the order they should be allocated to nodes
*
* \param[in] data_set Cluster working set
*/
void
pcmk__sort_resources(pe_working_set_t *data_set)
{
GList *nodes = g_list_copy(data_set->nodes);
nodes = pcmk__sort_nodes(nodes, NULL, data_set);
data_set->resources = g_list_sort_with_data(data_set->resources,
cmp_resources, nodes);
g_list_free(nodes);
}
/*!
* \internal
* \brief Create a hash table with a single node in it
*
* \param[in] node Node to copy into new table
*
* \return Newly created hash table containing a copy of \p node
* \note The caller is responsible for freeing the result with
* g_hash_table_destroy().
*/
static GHashTable *
new_node_table(pe_node_t *node)
{
GHashTable *table = pcmk__strkey_table(NULL, free);
node = pe__copy_node(node);
g_hash_table_insert(table, (gpointer) node->details->id, node);
return table;
}
/*!
* \internal
* \brief Apply a resource's parent's colocation scores to a node table
*
* \param[in] rsc Resource whose colocations should be applied
* \param[in,out] nodes Node table to apply colocations to
*/
static void
apply_parent_colocations(const pe_resource_t *rsc, GHashTable **nodes)
{
GList *iter = NULL;
pcmk__colocation_t *colocation = NULL;
for (iter = rsc->parent->rsc_cons; iter != NULL; iter = iter->next) {
colocation = (pcmk__colocation_t *) iter->data;
*nodes = pcmk__native_merge_weights(colocation->primary, rsc->id,
*nodes, colocation->node_attribute,
colocation->score / (float) INFINITY,
0);
}
for (iter = rsc->parent->rsc_cons_lhs; iter != NULL; iter = iter->next) {
colocation = (pcmk__colocation_t *) iter->data;
if (!pcmk__colocation_has_influence(colocation, rsc)) {
continue;
}
*nodes = pcmk__native_merge_weights(colocation->dependent, rsc->id,
*nodes, colocation->node_attribute,
colocation->score / (float) INFINITY,
pe_weights_positive);
}
}
/*!
* \internal
* \brief Compare clone or bundle instances based on colocation scores
*
* Determine the relative order in which two clone or bundle instances should be
* assigned to nodes, considering the scores of colocation constraints directly
* or indirectly involving them.
*
* \param[in] instance1 First instance to compare
* \param[in] instance2 Second instance to compare
*
* \return A negative number if \p instance1 should be assigned first,
* a positive number if \p instance2 should be assigned first,
* or 0 if assignment order doesn't matter
*/
static int
cmp_instance_by_colocation(const pe_resource_t *instance1,
const pe_resource_t *instance2)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
pe_node_t *current_node1 = pe__current_node(instance1);
pe_node_t *current_node2 = pe__current_node(instance2);
GHashTable *colocated_scores1 = NULL;
GHashTable *colocated_scores2 = NULL;
CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL)
&& (instance2 != NULL) && (instance2->parent != NULL)
&& (current_node1 != NULL) && (current_node2 != NULL));
// Create node tables initialized with each node
colocated_scores1 = new_node_table(current_node1);
colocated_scores2 = new_node_table(current_node2);
// Apply parental colocations
apply_parent_colocations(instance1, &colocated_scores1);
apply_parent_colocations(instance2, &colocated_scores2);
// Find original nodes again, with scores updated for colocations
node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id);
node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id);
// Compare nodes by updated scores
if (node1->weight < node2->weight) {
crm_trace("Assign %s (%d on %s) after %s (%d on %s)",
instance1->id, node1->weight, node1->details->uname,
instance2->id, node2->weight, node2->details->uname);
rc = 1;
} else if (node1->weight > node2->weight) {
crm_trace("Assign %s (%d on %s) before %s (%d on %s)",
instance1->id, node1->weight, node1->details->uname,
instance2->id, node2->weight, node2->details->uname);
rc = -1;
}
g_hash_table_destroy(colocated_scores1);
g_hash_table_destroy(colocated_scores2);
return rc;
}
/*!
* \internal
* \brief Check whether a resource or any of its children are failed
*
* \param[in] rsc Resource to check
*
* \return true if \p rsc or any of its children are failed, otherwise false
*/
static bool
did_fail(const pe_resource_t * rsc)
{
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
return true;
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
if (did_fail((pe_resource_t *) iter->data)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether a node is allowed to run a resource
*
* \param[in] rsc Resource to check
* \param[in,out] node Node to check (will be set NULL if not allowed)
*
* \return true if *node is either NULL or allowed for \p rsc, otherwise false
*/
static bool
node_is_allowed(const pe_resource_t *rsc, pe_node_t **node)
{
if (*node != NULL) {
pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes,
(*node)->details->id);
if ((allowed == NULL) || (allowed->weight < 0)) {
pe_rsc_trace(rsc, "%s: current location (%s) is unavailable",
rsc->id, (*node)->details->uname);
*node = NULL;
return false;
}
}
return true;
}
/*!
* \internal
* \brief Compare two clone or bundle instances' instance numbers
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a's instance number is lower,
* a positive number if \p b's instance number is lower,
* or 0 if their instance numbers are the same
*/
gint
pcmk__cmp_instance_number(gconstpointer a, gconstpointer b)
{
const pe_resource_t *instance1 = (const pe_resource_t *) a;
const pe_resource_t *instance2 = (const pe_resource_t *) b;
char *div1 = NULL;
char *div2 = NULL;
CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
// Clone numbers are after a colon, bundle numbers after a dash
div1 = strrchr(instance1->id, ':');
if (div1 == NULL) {
div1 = strrchr(instance1->id, '-');
}
div2 = strrchr(instance2->id, ':');
if (div2 == NULL) {
div2 = strrchr(instance2->id, '-');
}
CRM_ASSERT((div1 != NULL) && (div2 != NULL));
return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10));
}
/*!
* \internal
* \brief Compare clone or bundle instances according to assignment order
*
* Compare two clone or bundle instances according to the order they should be
* assigned to nodes, preferring (in order):
*
* - Active instance that is less multiply active
* - Instance that is not active on a disallowed node
* - Instance with higher configured priority
* - Active instance whose current node can run resources
* - Active instance whose parent is allowed on current node
* - Active instance whose current node has fewer other instances
* - Active instance
* - Failed instance
* - Instance whose colocations result in higher score on current node
* - Instance with lower ID in lexicographic order
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a should be assigned first,
* a positive number if \p b should be assigned first,
* or 0 if assignment order doesn't matter
*/
gint
pcmk__cmp_instance(gconstpointer a, gconstpointer b)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
unsigned int nnodes1 = 0;
unsigned int nnodes2 = 0;
bool can1 = true;
bool can2 = true;
const pe_resource_t *instance1 = (const pe_resource_t *) a;
const pe_resource_t *instance2 = (const pe_resource_t *) b;
CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
node1 = pe__find_active_on(instance1, &nnodes1, NULL);
node2 = pe__find_active_on(instance2, &nnodes2, NULL);
/* If both instances are running and at least one is multiply
* active, prefer instance that's running on fewer nodes.
*/
if ((nnodes1 > 0) && (nnodes2 > 0)) {
if (nnodes1 < nnodes2) {
crm_trace("Assign %s (active on %d) before %s (active on %d): "
"less multiply active",
instance1->id, nnodes1, instance2->id, nnodes2);
return -1;
} else if (nnodes1 > nnodes2) {
crm_trace("Assign %s (active on %d) after %s (active on %d): "
"more multiply active",
instance1->id, nnodes1, instance2->id, nnodes2);
return 1;
}
}
/* An instance that is either inactive or active on an allowed node is
* preferred over an instance that is active on a no-longer-allowed node.
*/
can1 = node_is_allowed(instance1, &node1);
can2 = node_is_allowed(instance2, &node2);
if (can1 && !can2) {
crm_trace("Assign %s before %s: not active on a disallowed node",
instance1->id, instance2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("Assign %s after %s: active on a disallowed node",
instance1->id, instance2->id);
return 1;
}
// Prefer instance with higher configured priority
if (instance1->priority > instance2->priority) {
crm_trace("Assign %s before %s: priority (%d > %d)",
instance1->id, instance2->id,
instance1->priority, instance2->priority);
return -1;
} else if (instance1->priority < instance2->priority) {
crm_trace("Assign %s after %s: priority (%d < %d)",
instance1->id, instance2->id,
instance1->priority, instance2->priority);
return 1;
}
// Prefer active instance
if ((node1 == NULL) && (node2 == NULL)) {
crm_trace("No assignment preference for %s vs. %s: inactive",
instance1->id, instance2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("Assign %s after %s: active", instance1->id, instance2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("Assign %s before %s: active", instance1->id, instance2->id);
return -1;
}
// Prefer instance whose current node can run resources
- can1 = pcmk__node_available(node1);
- can2 = pcmk__node_available(node2);
+ can1 = pcmk__node_available(node1, false, false);
+ can2 = pcmk__node_available(node2, false, false);
if (can1 && !can2) {
crm_trace("Assign %s before %s: current node can run resources",
instance1->id, instance2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("Assign %s after %s: current node can't run resources",
instance1->id, instance2->id);
return 1;
}
// Prefer instance whose parent is allowed to run on instance's current node
node1 = pcmk__top_allowed_node(instance1, node1);
node2 = pcmk__top_allowed_node(instance2, node2);
if ((node1 == NULL) && (node2 == NULL)) {
crm_trace("No assignment preference for %s vs. %s: "
"parent not allowed on either instance's current node",
instance1->id, instance2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("Assign %s after %s: parent not allowed on current node",
instance1->id, instance2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("Assign %s before %s: parent allowed on current node",
instance1->id, instance2->id);
return -1;
}
// Prefer instance whose current node is running fewer other instances
if (node1->count < node2->count) {
crm_trace("Assign %s before %s: fewer active instances on current node",
instance1->id, instance2->id);
return -1;
} else if (node1->count > node2->count) {
crm_trace("Assign %s after %s: more active instances on current node",
instance1->id, instance2->id);
return 1;
}
// Prefer failed instance
can1 = did_fail(instance1);
can2 = did_fail(instance2);
if (!can1 && can2) {
crm_trace("Assign %s before %s: failed", instance1->id, instance2->id);
return -1;
} else if (can1 && !can2) {
crm_trace("Assign %s after %s: not failed",
instance1->id, instance2->id);
return 1;
}
// Prefer instance with higher cumulative colocation score on current node
rc = cmp_instance_by_colocation(instance1, instance2);
if (rc != 0) {
return rc;
}
// Prefer instance with lower instance number
rc = pcmk__cmp_instance_number(instance1, instance2);
if (rc < 0) {
crm_trace("Assign %s before %s: instance number",
instance1->id, instance2->id);
} else if (rc > 0) {
crm_trace("Assign %s after %s: instance number",
instance1->id, instance2->id);
} else {
crm_trace("No assignment preference for %s vs. %s",
instance1->id, instance2->id);
}
return rc;
}
diff --git a/lib/pacemaker/pcmk_sched_utilization.c b/lib/pacemaker/pcmk_sched_utilization.c
index d9cb02ba2e..3a41af4095 100644
--- a/lib/pacemaker/pcmk_sched_utilization.c
+++ b/lib/pacemaker/pcmk_sched_utilization.c
@@ -1,465 +1,465 @@
/*
* Copyright 2014-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
// Name for a pseudo-op to use in ordering constraints for utilization
#define LOAD_STOPPED "load_stopped"
/*!
* \internal
* \brief Get integer utilization from a string
*
* \param[in] s String representation of a node utilization value
*
* \return Integer equivalent of \p s
* \todo It would make sense to restrict utilization values to nonnegative
* integers, but the documentation just says "integers" and we didn't
* restrict them initially, so for backward compatibility, allow any
* integer.
*/
static int
utilization_value(const char *s)
{
int value = 0;
if ((s != NULL) && (pcmk__scan_min_int(s, &value, INT_MIN) == EINVAL)) {
pe_warn("Using 0 for utilization instead of invalid value '%s'", value);
value = 0;
}
return value;
}
/*
* Functions for comparing node capacities
*/
struct compare_data {
const pe_node_t *node1;
const pe_node_t *node2;
bool node2_only;
int result;
};
/*!
* \internal
* \brief Compare a single utilization attribute for two nodes
*
* Compare one utilization attribute for two nodes, incrementing the result if
* the first node has greater capacity, and decrementing it if the second node
* has greater capacity.
*
* \param[in] key Utilization attribute name to compare
* \param[in] value Utilization attribute value to compare
* \param[in] user_data Comparison data (as struct compare_data*)
*/
static void
compare_utilization_value(gpointer key, gpointer value, gpointer user_data)
{
int node1_capacity = 0;
int node2_capacity = 0;
struct compare_data *data = user_data;
const char *node2_value = NULL;
if (data->node2_only) {
if (g_hash_table_lookup(data->node1->details->utilization, key)) {
return; // We've already compared this attribute
}
} else {
node1_capacity = utilization_value((const char *) value);
}
node2_value = g_hash_table_lookup(data->node2->details->utilization, key);
node2_capacity = utilization_value(node2_value);
if (node1_capacity > node2_capacity) {
data->result--;
} else if (node1_capacity < node2_capacity) {
data->result++;
}
}
/*!
* \internal
* \brief Compare utilization capacities of two nodes
*
* \param[in] node1 First node to compare
* \param[in] node2 Second node to compare
*
* \return Negative integer if node1 has more free capacity,
* 0 if the capacities are equal, or a positive integer
* if node2 has more free capacity
*/
int
pcmk__compare_node_capacities(const pe_node_t *node1, const pe_node_t *node2)
{
struct compare_data data = {
.node1 = node1,
.node2 = node2,
.node2_only = false,
.result = 0,
};
// Compare utilization values that node1 and maybe node2 have
g_hash_table_foreach(node1->details->utilization, compare_utilization_value,
&data);
// Compare utilization values that only node2 has
data.node2_only = true;
g_hash_table_foreach(node2->details->utilization, compare_utilization_value,
&data);
return data.result;
}
/*
* Functions for updating node capacities
*/
struct calculate_data {
GHashTable *current_utilization;
bool plus;
};
/*!
* \internal
* \brief Update a single utilization attribute with a new value
*
* \param[in] key Name of utilization attribute to update
* \param[in] value Value to add or substract
* \param[in] user_data Calculation data (as struct calculate_data *)
*/
static void
update_utilization_value(gpointer key, gpointer value, gpointer user_data)
{
int result = 0;
const char *current = NULL;
struct calculate_data *data = user_data;
current = g_hash_table_lookup(data->current_utilization, key);
if (data->plus) {
result = utilization_value(current) + utilization_value(value);
} else if (current) {
result = utilization_value(current) - utilization_value(value);
}
g_hash_table_replace(data->current_utilization,
strdup(key), pcmk__itoa(result));
}
/*!
* \internal
* \brief Subtract a resource's utilization from node capacity
*
* \param[in] current_utilization Current node utilization attributes
* \param[in] rsc Resource with utilization to subtract
*/
void
pcmk__consume_node_capacity(GHashTable *current_utilization, pe_resource_t *rsc)
{
struct calculate_data data = {
.current_utilization = current_utilization,
.plus = false,
};
g_hash_table_foreach(rsc->utilization, update_utilization_value, &data);
}
/*!
* \internal
* \brief Add a resource's utilization to node capacity
*
* \param[in] current_utilization Current node utilization attributes
* \param[in] rsc Resource with utilization to add
*/
void
pcmk__release_node_capacity(GHashTable *current_utilization, pe_resource_t *rsc)
{
struct calculate_data data = {
.current_utilization = current_utilization,
.plus = true,
};
g_hash_table_foreach(rsc->utilization, update_utilization_value, &data);
}
/*
* Functions for checking for sufficient node capacity
*/
struct capacity_data {
pe_node_t *node;
const char *rsc_id;
bool is_enough;
};
/*!
* \internal
* \brief Check whether a single utilization attribute has sufficient capacity
*
* \param[in] key Name of utilization attribute to check
* \param[in] value Amount of utilization required
* \param[in] user_data Capacity data (as struct capacity_data *)
*/
static void
check_capacity(gpointer key, gpointer value, gpointer user_data)
{
int required = 0;
int remaining = 0;
const char *node_value_s = NULL;
struct capacity_data *data = user_data;
node_value_s = g_hash_table_lookup(data->node->details->utilization, key);
required = utilization_value(value);
remaining = utilization_value(node_value_s);
if (required > remaining) {
crm_debug("Remaining capacity for %s on %s (%d) is insufficient "
"for resource %s usage (%d)",
(const char *) key, data->node->details->uname, remaining,
data->rsc_id, required);
data->is_enough = false;
}
}
/*!
* \internal
* \brief Check whether a node has sufficient capacity for a resource
*
* \param[in] node Node to check
* \param[in] rsc_id ID of resource to check (for debug logs only)
* \param[in] utilization Required utilization amounts
*
* \return true if node has sufficient capacity for resource, otherwise false
*/
static bool
have_enough_capacity(pe_node_t *node, const char *rsc_id,
GHashTable *utilization)
{
struct capacity_data data = {
.node = node,
.rsc_id = rsc_id,
.is_enough = true,
};
g_hash_table_foreach(utilization, check_capacity, &data);
return data.is_enough;
}
/*!
* \internal
* \brief Sum the utilization requirements of a list of resources
*
* \param[in] orig_rsc Resource being allocated (for logging purposes)
* \param[in] rscs Resources whose utilization should be summed
*
* \return Newly allocated hash table with sum of all utilization values
* \note It is the caller's responsibility to free the return value using
* g_hash_table_destroy().
*/
static GHashTable *
sum_resource_utilization(pe_resource_t *orig_rsc, GList *rscs)
{
GHashTable *utilization = pcmk__strkey_table(free, free);
for (GList *iter = rscs; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
rsc->cmds->add_utilization(rsc, orig_rsc, rscs, utilization);
}
return utilization;
}
/*!
* \internal
* \brief Ban resource from nodes with insufficient utilization capacity
*
* \param[in] rsc Resource to check
* \param[in,out] prefer Resource's preferred node (might be updated)
*/
void
pcmk__ban_insufficient_capacity(pe_resource_t *rsc, pe_node_t **prefer)
{
bool any_capable = false;
char *rscs_id = NULL;
pe_node_t *node = NULL;
pe_node_t *most_capable_node = NULL;
GList *colocated_rscs = NULL;
GHashTable *unallocated_utilization = NULL;
GHashTableIter iter;
CRM_CHECK((rsc != NULL) && (prefer != NULL), return);
// The default placement strategy ignores utilization
if (pcmk__str_eq(rsc->cluster->placement_strategy, "default",
pcmk__str_casei)) {
return;
}
// Check whether any resources are colocated with this one
colocated_rscs = rsc->cmds->colocated_resources(rsc, NULL, NULL);
if (colocated_rscs == NULL) {
return;
}
rscs_id = crm_strdup_printf("%s and its colocated resources", rsc->id);
// If rsc isn't in the list, add it so we include its utilization
if (g_list_find(colocated_rscs, rsc) == NULL) {
colocated_rscs = g_list_append(colocated_rscs, rsc);
}
// Sum utilization of colocated resources that haven't been allocated yet
unallocated_utilization = sum_resource_utilization(rsc, colocated_rscs);
// Check whether any node has enough capacity for all the resources
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
- if (!pcmk__node_available(node) || (node->weight < 0)) {
+ if (!pcmk__node_available(node, true, false)) {
continue;
}
if (have_enough_capacity(node, rscs_id, unallocated_utilization)) {
any_capable = true;
}
// Keep track of node with most free capacity
if ((most_capable_node == NULL)
|| (pcmk__compare_node_capacities(node, most_capable_node) < 0)) {
most_capable_node = node;
}
}
if (any_capable) {
// If so, ban resource from any node with insufficient capacity
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
- if ((node->weight >= 0) && pcmk__node_available(node)
+ if (pcmk__node_available(node, true, false)
&& !have_enough_capacity(node, rscs_id,
unallocated_utilization)) {
pe_rsc_debug(rsc, "%s does not have enough capacity for %s",
node->details->uname, rscs_id);
resource_location(rsc, node, -INFINITY, "__limit_utilization__",
rsc->cluster);
}
}
} else {
// Otherwise, ban from nodes with insufficient capacity for rsc alone
if (*prefer == NULL) {
*prefer = most_capable_node;
}
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
- if ((node->weight >= 0) && pcmk__node_available(node)
+ if (pcmk__node_available(node, true, false)
&& !have_enough_capacity(node, rsc->id, rsc->utilization)) {
pe_rsc_debug(rsc, "%s does not have enough capacity for %s",
node->details->uname, rsc->id);
resource_location(rsc, node, -INFINITY, "__limit_utilization__",
rsc->cluster);
}
}
}
g_hash_table_destroy(unallocated_utilization);
g_list_free(colocated_rscs);
free(rscs_id);
pe__show_node_weights(true, rsc, "Post-utilization",
rsc->allowed_nodes, rsc->cluster);
}
/*!
* \internal
* \brief Create a new load_stopped pseudo-op for a node
*
* \param[in] node Node to create op for
* \param[in] data_set Cluster working set
*
* \return Newly created load_stopped op
*/
static pe_action_t *
new_load_stopped_op(const pe_node_t *node, pe_working_set_t *data_set)
{
char *load_stopped_task = crm_strdup_printf(LOAD_STOPPED "_%s",
node->details->uname);
pe_action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set);
if (load_stopped->node == NULL) {
load_stopped->node = pe__copy_node(node);
pe__clear_action_flags(load_stopped, pe_action_optional);
}
free(load_stopped_task);
return load_stopped;
}
/*!
* \internal
* \brief Create utilization-related internal constraints for a resource
*
* \param[in] rsc Resource to create constraints for
* \param[in] allowed_nodes List of allowed next nodes for \p rsc
*/
void
pcmk__create_utilization_constraints(pe_resource_t *rsc, GList *allowed_nodes)
{
GList *iter = NULL;
pe_node_t *node = NULL;
pe_action_t *load_stopped = NULL;
pe_rsc_trace(rsc, "Creating utilization constraints for %s - strategy: %s",
rsc->id, rsc->cluster->placement_strategy);
// "stop rsc then load_stopped" constraints for current nodes
for (iter = rsc->running_on; iter != NULL; iter = iter->next) {
node = (pe_node_t *) iter->data;
load_stopped = new_load_stopped_op(node, rsc->cluster);
pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL, NULL, load_stopped,
pe_order_load, rsc->cluster);
}
// "load_stopped then start/migrate_to rsc" constraints for allowed nodes
for (GList *iter = allowed_nodes; iter; iter = iter->next) {
node = (pe_node_t *) iter->data;
load_stopped = new_load_stopped_op(node, rsc->cluster);
pcmk__new_ordering(NULL, NULL, load_stopped, rsc, start_key(rsc), NULL,
pe_order_load, rsc->cluster);
pcmk__new_ordering(NULL, NULL, load_stopped,
rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL,
pe_order_load, rsc->cluster);
}
}
/*!
* \internal
* \brief Output node capacities if enabled
*
* \param[in] desc Prefix for output
* \param[in] data_set Cluster working set
*/
void
pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set)
{
if (!pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
return;
}
for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
pcmk__output_t *out = data_set->priv;
out->message(out, "node-capacity", node, desc);
}
}
diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c
index 920a04c32c..bac0cc49bf 100644
--- a/lib/pengine/clone.c
+++ b/lib/pengine/clone.c
@@ -1,1142 +1,1189 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdint.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/status.h>
#include <crm/pengine/internal.h>
#include <pe_status_private.h>
#include <crm/msg_xml.h>
#include <crm/common/output.h>
#include <crm/common/xml_internal.h>
#define VARIANT_CLONE 1
#include "./variant.h"
#ifdef PCMK__COMPAT_2_0
#define PROMOTED_INSTANCES RSC_ROLE_PROMOTED_LEGACY_S "s"
#define UNPROMOTED_INSTANCES RSC_ROLE_UNPROMOTED_LEGACY_S "s"
#else
#define PROMOTED_INSTANCES RSC_ROLE_PROMOTED_S
#define UNPROMOTED_INSTANCES RSC_ROLE_UNPROMOTED_S
#endif
static GList *
sorted_hash_table_values(GHashTable *table)
{
GList *retval = NULL;
GHashTableIter iter;
gpointer key, value;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
if (!g_list_find_custom(retval, value, (GCompareFunc) strcmp)) {
retval = g_list_prepend(retval, (char *) value);
}
}
retval = g_list_sort(retval, (GCompareFunc) strcmp);
return retval;
}
static GList *
nodes_with_status(GHashTable *table, const char *status)
{
GList *retval = NULL;
GHashTableIter iter;
gpointer key, value;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
if (!strcmp((char *) value, status)) {
retval = g_list_prepend(retval, key);
}
}
retval = g_list_sort(retval, (GCompareFunc) pcmk__numeric_strcasecmp);
return retval;
}
static char *
node_list_to_str(GList *list)
{
char *retval = NULL;
size_t len = 0;
for (GList *iter = list; iter != NULL; iter = iter->next) {
pcmk__add_word(&retval, &len, (char *) iter->data);
}
return retval;
}
static void
clone_header(pcmk__output_t *out, int *rc, pe_resource_t *rsc, clone_variant_data_t *clone_data)
{
char *attrs = NULL;
size_t len = 0;
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__add_separated_word(&attrs, &len, "promotable", ", ");
}
if (pcmk_is_set(rsc->flags, pe_rsc_unique)) {
pcmk__add_separated_word(&attrs, &len, "unique", ", ");
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pcmk__add_separated_word(&attrs, &len, "unmanaged", ", ");
}
if (pe__resource_is_disabled(rsc)) {
pcmk__add_separated_word(&attrs, &len, "disabled", ", ");
}
if (attrs) {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, *rc, "Clone Set: %s [%s] (%s)",
rsc->id, ID(clone_data->xml_obj_child),
attrs);
free(attrs);
} else {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, *rc, "Clone Set: %s [%s]",
rsc->id, ID(clone_data->xml_obj_child))
}
}
void
pe__force_anon(const char *standard, pe_resource_t *rsc, const char *rid,
pe_working_set_t *data_set)
{
if (pe_rsc_is_clone(rsc)) {
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
pe_warn("Ignoring " XML_RSC_ATTR_UNIQUE " for %s because %s resources "
"such as %s can be used only as anonymous clones",
rsc->id, standard, rid);
clone_data->clone_node_max = 1;
clone_data->clone_max = QB_MIN(clone_data->clone_max,
g_list_length(data_set->nodes));
}
}
pe_resource_t *
find_clone_instance(pe_resource_t * rsc, const char *sub_id, pe_working_set_t * data_set)
{
char *child_id = NULL;
pe_resource_t *child = NULL;
const char *child_base = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
child_base = ID(clone_data->xml_obj_child);
child_id = crm_strdup_printf("%s:%s", child_base, sub_id);
child = pe_find_resource(rsc->children, child_id);
free(child_id);
return child;
}
pe_resource_t *
pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set)
{
gboolean as_orphan = FALSE;
char *inc_num = NULL;
char *inc_max = NULL;
pe_resource_t *child_rsc = NULL;
xmlNode *child_copy = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
CRM_CHECK(clone_data->xml_obj_child != NULL, return FALSE);
if (clone_data->total_clones >= clone_data->clone_max) {
// If we've already used all available instances, this is an orphan
as_orphan = TRUE;
}
// Allocate instance numbers in numerical order (starting at 0)
inc_num = pcmk__itoa(clone_data->total_clones);
inc_max = pcmk__itoa(clone_data->clone_max);
child_copy = copy_xml(clone_data->xml_obj_child);
crm_xml_add(child_copy, XML_RSC_ATTR_INCARNATION, inc_num);
if (common_unpack(child_copy, &child_rsc, rsc, data_set) == FALSE) {
pe_err("Failed unpacking resource %s", crm_element_value(child_copy, XML_ATTR_ID));
child_rsc = NULL;
goto bail;
}
/* child_rsc->globally_unique = rsc->globally_unique; */
CRM_ASSERT(child_rsc);
clone_data->total_clones += 1;
pe_rsc_trace(child_rsc, "Setting clone attributes for: %s", child_rsc->id);
rsc->children = g_list_append(rsc->children, child_rsc);
if (as_orphan) {
pe__set_resource_flags_recursive(child_rsc, pe_rsc_orphan);
}
add_hash_param(child_rsc->meta, XML_RSC_ATTR_INCARNATION_MAX, inc_max);
pe_rsc_trace(rsc, "Added %s instance %s", rsc->id, child_rsc->id);
bail:
free(inc_num);
free(inc_max);
return child_rsc;
}
gboolean
clone_unpack(pe_resource_t * rsc, pe_working_set_t * data_set)
{
int lpc = 0;
xmlNode *a_child = NULL;
xmlNode *xml_obj = rsc->xml;
clone_variant_data_t *clone_data = NULL;
- const char *ordered = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_ORDERED);
const char *max_clones = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_MAX);
const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX);
pe_rsc_trace(rsc, "Processing resource %s...", rsc->id);
clone_data = calloc(1, sizeof(clone_variant_data_t));
rsc->variant_opaque = clone_data;
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
const char *promoted_max = NULL;
const char *promoted_node_max = NULL;
promoted_max = g_hash_table_lookup(rsc->meta,
XML_RSC_ATTR_PROMOTED_MAX);
if (promoted_max == NULL) {
// @COMPAT deprecated since 2.0.0
promoted_max = g_hash_table_lookup(rsc->meta,
PCMK_XE_PROMOTED_MAX_LEGACY);
}
promoted_node_max = g_hash_table_lookup(rsc->meta,
XML_RSC_ATTR_PROMOTED_NODEMAX);
if (promoted_node_max == NULL) {
// @COMPAT deprecated since 2.0.0
promoted_node_max = g_hash_table_lookup(rsc->meta,
PCMK_XE_PROMOTED_NODE_MAX_LEGACY);
}
// Use 1 as default but 0 for minimum and invalid
if (promoted_max == NULL) {
clone_data->promoted_max = 1;
} else {
pcmk__scan_min_int(promoted_max, &(clone_data->promoted_max), 0);
}
// Use 1 as default but 0 for minimum and invalid
if (promoted_node_max == NULL) {
clone_data->promoted_node_max = 1;
} else {
pcmk__scan_min_int(promoted_node_max,
&(clone_data->promoted_node_max), 0);
}
}
// Implied by calloc()
/* clone_data->xml_obj_child = NULL; */
// Use 1 as default but 0 for minimum and invalid
if (max_clones_node == NULL) {
clone_data->clone_node_max = 1;
} else {
pcmk__scan_min_int(max_clones_node, &(clone_data->clone_node_max), 0);
}
/* Use number of nodes (but always at least 1, which is handy for crm_verify
* for a CIB without nodes) as default, but 0 for minimum and invalid
*/
if (max_clones == NULL) {
clone_data->clone_max = QB_MAX(1, g_list_length(data_set->nodes));
} else {
pcmk__scan_min_int(max_clones, &(clone_data->clone_max), 0);
}
- clone_data->ordered = crm_is_true(ordered);
+ if (crm_is_true(g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_ORDERED))) {
+ clone_data->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE,
+ "Clone", rsc->id,
+ clone_data->flags,
+ pe__clone_ordered,
+ "pe__clone_ordered");
+ }
if ((rsc->flags & pe_rsc_unique) == 0 && clone_data->clone_node_max > 1) {
pcmk__config_err("Ignoring " XML_RSC_ATTR_PROMOTED_MAX " for %s "
"because anonymous clones support only one instance "
"per node", rsc->id);
clone_data->clone_node_max = 1;
}
pe_rsc_trace(rsc, "Options for %s", rsc->id);
pe_rsc_trace(rsc, "\tClone max: %d", clone_data->clone_max);
pe_rsc_trace(rsc, "\tClone node max: %d", clone_data->clone_node_max);
pe_rsc_trace(rsc, "\tClone is unique: %s",
pe__rsc_bool_str(rsc, pe_rsc_unique));
pe_rsc_trace(rsc, "\tClone is promotable: %s",
pe__rsc_bool_str(rsc, pe_rsc_promotable));
// Clones may contain a single group or primitive
for (a_child = pcmk__xe_first_child(xml_obj); a_child != NULL;
a_child = pcmk__xe_next(a_child)) {
if (pcmk__str_any_of((const char *)a_child->name, XML_CIB_TAG_RESOURCE, XML_CIB_TAG_GROUP, NULL)) {
clone_data->xml_obj_child = a_child;
break;
}
}
if (clone_data->xml_obj_child == NULL) {
pcmk__config_err("%s has nothing to clone", rsc->id);
return FALSE;
}
/*
* Make clones ever so slightly sticky by default
*
* This helps ensure clone instances are not shuffled around the cluster
* for no benefit in situations when pre-allocation is not appropriate
*/
if (g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_STICKINESS) == NULL) {
add_hash_param(rsc->meta, XML_RSC_ATTR_STICKINESS, "1");
}
/* This ensures that the globally-unique value always exists for children to
* inherit when being unpacked, as well as in resource agents' environment.
*/
add_hash_param(rsc->meta, XML_RSC_ATTR_UNIQUE,
pe__rsc_bool_str(rsc, pe_rsc_unique));
if (clone_data->clone_max <= 0) {
/* Create one child instance so that unpack_find_resource() will hook up
* any orphans up to the parent correctly.
*/
if (pe__create_clone_child(rsc, data_set) == NULL) {
return FALSE;
}
} else {
// Create a child instance for each available instance number
for (lpc = 0; lpc < clone_data->clone_max; lpc++) {
if (pe__create_clone_child(rsc, data_set) == NULL) {
return FALSE;
}
}
}
pe_rsc_trace(rsc, "Added %d children to resource %s...", clone_data->clone_max, rsc->id);
return TRUE;
}
gboolean
clone_active(pe_resource_t * rsc, gboolean all)
{
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
gboolean child_active = child_rsc->fns->active(child_rsc, all);
if (all == FALSE && child_active) {
return TRUE;
} else if (all && child_active == FALSE) {
return FALSE;
}
}
if (all) {
return TRUE;
} else {
return FALSE;
}
}
static void
short_print(char *list, const char *prefix, const char *type, const char *suffix, long options, void *print_data)
{
if(suffix == NULL) {
suffix = "";
}
if (list) {
if (options & pe_print_html) {
status_print("<li>");
}
status_print("%s%s: [ %s ]%s", prefix, type, list, suffix);
if (options & pe_print_html) {
status_print("</li>\n");
} else if (options & pe_print_suppres_nl) {
/* nothing */
} else if ((options & pe_print_printf) || (options & pe_print_ncurses)) {
status_print("\n");
}
}
}
static const char *
configured_role_str(pe_resource_t * rsc)
{
const char *target_role = g_hash_table_lookup(rsc->meta,
XML_RSC_ATTR_TARGET_ROLE);
if ((target_role == NULL) && rsc->children && rsc->children->data) {
target_role = g_hash_table_lookup(((pe_resource_t*)rsc->children->data)->meta,
XML_RSC_ATTR_TARGET_ROLE);
}
return target_role;
}
static enum rsc_role_e
configured_role(pe_resource_t * rsc)
{
const char *target_role = configured_role_str(rsc);
if (target_role) {
return text2role(target_role);
}
return RSC_ROLE_UNKNOWN;
}
static void
clone_print_xml(pe_resource_t * rsc, const char *pre_text, long options, void *print_data)
{
char *child_text = crm_strdup_printf("%s ", pre_text);
const char *target_role = configured_role_str(rsc);
GList *gIter = rsc->children;
status_print("%s<clone ", pre_text);
status_print("id=\"%s\" ", rsc->id);
status_print("multi_state=\"%s\" ",
pe__rsc_bool_str(rsc, pe_rsc_promotable));
status_print("unique=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_unique));
status_print("managed=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_managed));
status_print("failed=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_failed));
status_print("failure_ignored=\"%s\" ",
pe__rsc_bool_str(rsc, pe_rsc_failure_ignored));
if (target_role) {
status_print("target_role=\"%s\" ", target_role);
}
status_print(">\n");
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->fns->print(child_rsc, child_text, options, print_data);
}
status_print("%s</clone>\n", pre_text);
free(child_text);
}
bool is_set_recursive(pe_resource_t * rsc, long long flag, bool any)
{
GList *gIter;
bool all = !any;
if (pcmk_is_set(rsc->flags, flag)) {
if(any) {
return TRUE;
}
} else if(all) {
return FALSE;
}
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
if(is_set_recursive(gIter->data, flag, any)) {
if(any) {
return TRUE;
}
} else if(all) {
return FALSE;
}
}
if(all) {
return TRUE;
}
return FALSE;
}
void
clone_print(pe_resource_t * rsc, const char *pre_text, long options, void *print_data)
{
char *list_text = NULL;
char *child_text = NULL;
char *stopped_list = NULL;
size_t list_text_len = 0;
size_t stopped_list_len = 0;
GList *promoted_list = NULL;
GList *started_list = NULL;
GList *gIter = rsc->children;
clone_variant_data_t *clone_data = NULL;
int active_instances = 0;
if (pre_text == NULL) {
pre_text = " ";
}
if (options & pe_print_xml) {
clone_print_xml(rsc, pre_text, options, print_data);
return;
}
get_clone_variant_data(clone_data, rsc);
child_text = crm_strdup_printf("%s ", pre_text);
status_print("%sClone Set: %s [%s]%s%s%s",
pre_text ? pre_text : "", rsc->id, ID(clone_data->xml_obj_child),
pcmk_is_set(rsc->flags, pe_rsc_promotable)? " (promotable)" : "",
pcmk_is_set(rsc->flags, pe_rsc_unique)? " (unique)" : "",
pcmk_is_set(rsc->flags, pe_rsc_managed)? "" : " (unmanaged)");
if (options & pe_print_html) {
status_print("\n<ul>\n");
} else if ((options & pe_print_log) == 0) {
status_print("\n");
}
for (; gIter != NULL; gIter = gIter->next) {
gboolean print_full = FALSE;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
gboolean partially_active = child_rsc->fns->active(child_rsc, FALSE);
if (options & pe_print_clone_details) {
print_full = TRUE;
}
if (pcmk_is_set(rsc->flags, pe_rsc_unique)) {
// Print individual instance when unique (except stopped orphans)
if (partially_active || !pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
print_full = TRUE;
}
// Everything else in this block is for anonymous clones
} else if (pcmk_is_set(options, pe_print_pending)
&& (child_rsc->pending_task != NULL)
&& strcmp(child_rsc->pending_task, "probe")) {
// Print individual instance when non-probe action is pending
print_full = TRUE;
} else if (partially_active == FALSE) {
// List stopped instances when requested (except orphans)
if (!pcmk_is_set(child_rsc->flags, pe_rsc_orphan)
&& !pcmk_is_set(options, pe_print_clone_active)) {
pcmk__add_word(&stopped_list, &stopped_list_len, child_rsc->id);
}
} else if (is_set_recursive(child_rsc, pe_rsc_orphan, TRUE)
|| is_set_recursive(child_rsc, pe_rsc_managed, FALSE) == FALSE
|| is_set_recursive(child_rsc, pe_rsc_failed, TRUE)) {
// Print individual instance when active orphaned/unmanaged/failed
print_full = TRUE;
} else if (child_rsc->fns->active(child_rsc, TRUE)) {
// Instance of fully active anonymous clone
pe_node_t *location = child_rsc->fns->location(child_rsc, NULL, TRUE);
if (location) {
// Instance is active on a single node
enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, TRUE);
if (location->details->online == FALSE && location->details->unclean) {
print_full = TRUE;
} else if (a_role > RSC_ROLE_UNPROMOTED) {
promoted_list = g_list_append(promoted_list, location);
} else {
started_list = g_list_append(started_list, location);
}
} else {
/* uncolocated group - bleh */
print_full = TRUE;
}
} else {
// Instance of partially active anonymous clone
print_full = TRUE;
}
if (print_full) {
if (options & pe_print_html) {
status_print("<li>\n");
}
child_rsc->fns->print(child_rsc, child_text, options, print_data);
if (options & pe_print_html) {
status_print("</li>\n");
}
}
}
/* Promoted */
promoted_list = g_list_sort(promoted_list, sort_node_uname);
for (gIter = promoted_list; gIter; gIter = gIter->next) {
pe_node_t *host = gIter->data;
pcmk__add_word(&list_text, &list_text_len, host->details->uname);
active_instances++;
}
short_print(list_text, child_text, PROMOTED_INSTANCES, NULL, options,
print_data);
g_list_free(promoted_list);
free(list_text);
list_text = NULL;
list_text_len = 0;
/* Started/Unpromoted */
started_list = g_list_sort(started_list, sort_node_uname);
for (gIter = started_list; gIter; gIter = gIter->next) {
pe_node_t *host = gIter->data;
pcmk__add_word(&list_text, &list_text_len, host->details->uname);
active_instances++;
}
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
enum rsc_role_e role = configured_role(rsc);
if (role == RSC_ROLE_UNPROMOTED) {
short_print(list_text, child_text,
UNPROMOTED_INSTANCES " (target-role)", NULL, options,
print_data);
} else {
short_print(list_text, child_text, UNPROMOTED_INSTANCES, NULL,
options, print_data);
}
} else {
short_print(list_text, child_text, "Started", NULL, options, print_data);
}
g_list_free(started_list);
free(list_text);
list_text = NULL;
list_text_len = 0;
if (!pcmk_is_set(options, pe_print_clone_active)) {
const char *state = "Stopped";
enum rsc_role_e role = configured_role(rsc);
if (role == RSC_ROLE_STOPPED) {
state = "Stopped (disabled)";
}
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)
&& (clone_data->clone_max > active_instances)) {
GList *nIter;
GList *list = g_hash_table_get_values(rsc->allowed_nodes);
/* Custom stopped list for non-unique clones */
free(stopped_list);
stopped_list = NULL;
stopped_list_len = 0;
if (list == NULL) {
/* Clusters with symmetrical=false haven't calculated allowed_nodes yet
* If we've not probed for them yet, the Stopped list will be empty
*/
list = g_hash_table_get_values(rsc->known_on);
}
list = g_list_sort(list, sort_node_uname);
for (nIter = list; nIter != NULL; nIter = nIter->next) {
pe_node_t *node = (pe_node_t *)nIter->data;
if (pe_find_node(rsc->running_on, node->details->uname) == NULL) {
pcmk__add_word(&stopped_list, &stopped_list_len,
node->details->uname);
}
}
g_list_free(list);
}
short_print(stopped_list, child_text, state, NULL, options, print_data);
free(stopped_list);
}
if (options & pe_print_html) {
status_print("</ul>\n");
}
free(child_text);
}
PCMK__OUTPUT_ARGS("clone", "uint32_t", "pe_resource_t *", "GList *", "GList *")
int
pe__clone_xml(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
GList *gIter = rsc->children;
GList *all = NULL;
int rc = pcmk_rc_no_output;
gboolean printed_header = FALSE;
gboolean print_everything = TRUE;
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) ||
(strstr(rsc->id, ":") != NULL && pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches));
all = g_list_prepend(all, (gpointer) "*");
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if (pcmk__rsc_filtered_by_node(child_rsc, only_node)) {
continue;
}
if (child_rsc->fns->is_filtered(child_rsc, only_rsc, print_everything)) {
continue;
}
if (!printed_header) {
printed_header = TRUE;
rc = pe__name_and_nvpairs_xml(out, true, "clone", 8,
"id", rsc->id,
"multi_state", pe__rsc_bool_str(rsc, pe_rsc_promotable),
"unique", pe__rsc_bool_str(rsc, pe_rsc_unique),
"managed", pe__rsc_bool_str(rsc, pe_rsc_managed),
"disabled", pcmk__btoa(pe__resource_is_disabled(rsc)),
"failed", pe__rsc_bool_str(rsc, pe_rsc_failed),
"failure_ignored", pe__rsc_bool_str(rsc, pe_rsc_failure_ignored),
"target_role", configured_role_str(rsc));
CRM_ASSERT(rc == pcmk_rc_ok);
}
out->message(out, crm_map_element_name(child_rsc->xml), show_opts,
child_rsc, only_node, all);
}
if (printed_header) {
pcmk__output_xml_pop_parent(out);
}
g_list_free(all);
return rc;
}
PCMK__OUTPUT_ARGS("clone", "uint32_t", "pe_resource_t *", "GList *", "GList *")
int
pe__clone_default(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
GHashTable *stopped = NULL;
char *list_text = NULL;
size_t list_text_len = 0;
GList *promoted_list = NULL;
GList *started_list = NULL;
GList *gIter = rsc->children;
clone_variant_data_t *clone_data = NULL;
int active_instances = 0;
int rc = pcmk_rc_no_output;
gboolean print_everything = TRUE;
get_clone_variant_data(clone_data, rsc);
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) ||
(strstr(rsc->id, ":") != NULL && pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches));
for (; gIter != NULL; gIter = gIter->next) {
gboolean print_full = FALSE;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
gboolean partially_active = child_rsc->fns->active(child_rsc, FALSE);
if (pcmk__rsc_filtered_by_node(child_rsc, only_node)) {
continue;
}
if (child_rsc->fns->is_filtered(child_rsc, only_rsc, print_everything)) {
continue;
}
if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) {
print_full = TRUE;
}
if (pcmk_is_set(rsc->flags, pe_rsc_unique)) {
// Print individual instance when unique (except stopped orphans)
if (partially_active || !pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
print_full = TRUE;
}
// Everything else in this block is for anonymous clones
} else if (pcmk_is_set(show_opts, pcmk_show_pending)
&& (child_rsc->pending_task != NULL)
&& strcmp(child_rsc->pending_task, "probe")) {
// Print individual instance when non-probe action is pending
print_full = TRUE;
} else if (partially_active == FALSE) {
// List stopped instances when requested (except orphans)
if (!pcmk_is_set(child_rsc->flags, pe_rsc_orphan)
&& !pcmk_is_set(show_opts, pcmk_show_clone_detail)
&& pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
if (stopped == NULL) {
stopped = pcmk__strkey_table(free, free);
}
g_hash_table_insert(stopped, strdup(child_rsc->id), strdup("Stopped"));
}
} else if (is_set_recursive(child_rsc, pe_rsc_orphan, TRUE)
|| is_set_recursive(child_rsc, pe_rsc_managed, FALSE) == FALSE
|| is_set_recursive(child_rsc, pe_rsc_failed, TRUE)) {
// Print individual instance when active orphaned/unmanaged/failed
print_full = TRUE;
} else if (child_rsc->fns->active(child_rsc, TRUE)) {
// Instance of fully active anonymous clone
pe_node_t *location = child_rsc->fns->location(child_rsc, NULL, TRUE);
if (location) {
// Instance is active on a single node
enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, TRUE);
if (location->details->online == FALSE && location->details->unclean) {
print_full = TRUE;
} else if (a_role > RSC_ROLE_UNPROMOTED) {
promoted_list = g_list_append(promoted_list, location);
} else {
started_list = g_list_append(started_list, location);
}
} else {
/* uncolocated group - bleh */
print_full = TRUE;
}
} else {
// Instance of partially active anonymous clone
print_full = TRUE;
}
if (print_full) {
GList *all = NULL;
clone_header(out, &rc, rsc, clone_data);
/* Print every resource that's a child of this clone. */
all = g_list_prepend(all, (gpointer) "*");
out->message(out, crm_map_element_name(child_rsc->xml), show_opts,
child_rsc, only_node, all);
g_list_free(all);
}
}
if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) {
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return pcmk_rc_ok;
}
/* Promoted */
promoted_list = g_list_sort(promoted_list, sort_node_uname);
for (gIter = promoted_list; gIter; gIter = gIter->next) {
pe_node_t *host = gIter->data;
if (!pcmk__str_in_list(host->details->uname, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
pcmk__add_word(&list_text, &list_text_len, host->details->uname);
active_instances++;
}
g_list_free(promoted_list);
if (list_text != NULL) {
clone_header(out, &rc, rsc, clone_data);
out->list_item(out, NULL, PROMOTED_INSTANCES ": [ %s ]", list_text);
free(list_text);
list_text = NULL;
list_text_len = 0;
}
/* Started/Unpromoted */
started_list = g_list_sort(started_list, sort_node_uname);
for (gIter = started_list; gIter; gIter = gIter->next) {
pe_node_t *host = gIter->data;
if (!pcmk__str_in_list(host->details->uname, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
pcmk__add_word(&list_text, &list_text_len, host->details->uname);
active_instances++;
}
g_list_free(started_list);
if (list_text != NULL) {
clone_header(out, &rc, rsc, clone_data);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
enum rsc_role_e role = configured_role(rsc);
if (role == RSC_ROLE_UNPROMOTED) {
out->list_item(out, NULL,
UNPROMOTED_INSTANCES " (target-role): [ %s ]",
list_text);
} else {
out->list_item(out, NULL, UNPROMOTED_INSTANCES ": [ %s ]",
list_text);
}
} else {
out->list_item(out, NULL, "Started: [ %s ]", list_text);
}
free(list_text);
list_text = NULL;
list_text_len = 0;
}
if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)
&& (clone_data->clone_max > active_instances)) {
GList *nIter;
GList *list = g_hash_table_get_values(rsc->allowed_nodes);
/* Custom stopped table for non-unique clones */
if (stopped != NULL) {
g_hash_table_destroy(stopped);
stopped = NULL;
}
if (list == NULL) {
/* Clusters with symmetrical=false haven't calculated allowed_nodes yet
* If we've not probed for them yet, the Stopped list will be empty
*/
list = g_hash_table_get_values(rsc->known_on);
}
list = g_list_sort(list, sort_node_uname);
for (nIter = list; nIter != NULL; nIter = nIter->next) {
pe_node_t *node = (pe_node_t *)nIter->data;
if (pe_find_node(rsc->running_on, node->details->uname) == NULL &&
pcmk__str_in_list(node->details->uname, only_node,
pcmk__str_star_matches|pcmk__str_casei)) {
xmlNode *probe_op = pe__failed_probe_for_rsc(rsc, node->details->uname);
const char *state = "Stopped";
if (configured_role(rsc) == RSC_ROLE_STOPPED) {
state = "Stopped (disabled)";
}
if (stopped == NULL) {
stopped = pcmk__strkey_table(free, free);
}
if (probe_op != NULL) {
int rc;
pcmk__scan_min_int(crm_element_value(probe_op, XML_LRM_ATTR_RC), &rc, 0);
g_hash_table_insert(stopped, strdup(node->details->uname),
crm_strdup_printf("Stopped (%s)", services_ocf_exitcode_str(rc)));
} else {
g_hash_table_insert(stopped, strdup(node->details->uname),
strdup(state));
}
}
}
g_list_free(list);
}
if (stopped != NULL) {
GList *list = sorted_hash_table_values(stopped);
clone_header(out, &rc, rsc, clone_data);
for (GList *status_iter = list; status_iter != NULL; status_iter = status_iter->next) {
const char *status = status_iter->data;
GList *nodes = nodes_with_status(stopped, status);
char *str = node_list_to_str(nodes);
if (str != NULL) {
out->list_item(out, NULL, "%s: [ %s ]", status, str);
free(str);
}
g_list_free(nodes);
}
g_list_free(list);
g_hash_table_destroy(stopped);
/* If there are no instances of this clone (perhaps because there are no
* nodes configured), simply output the clone header by itself. This can
* come up in PCS testing.
*/
} else if (active_instances == 0) {
clone_header(out, &rc, rsc, clone_data);
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
void
clone_free(pe_resource_t * rsc)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
pe_rsc_trace(rsc, "Freeing %s", rsc->id);
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
CRM_ASSERT(child_rsc);
pe_rsc_trace(child_rsc, "Freeing child %s", child_rsc->id);
free_xml(child_rsc->xml);
child_rsc->xml = NULL;
/* There could be a saved unexpanded xml */
free_xml(child_rsc->orig_xml);
child_rsc->orig_xml = NULL;
child_rsc->fns->free(child_rsc);
}
g_list_free(rsc->children);
if (clone_data) {
CRM_ASSERT(clone_data->demote_notify == NULL);
CRM_ASSERT(clone_data->stop_notify == NULL);
CRM_ASSERT(clone_data->start_notify == NULL);
CRM_ASSERT(clone_data->promote_notify == NULL);
}
common_free(rsc);
}
enum rsc_role_e
clone_resource_state(const pe_resource_t * rsc, gboolean current)
{
enum rsc_role_e clone_role = RSC_ROLE_UNKNOWN;
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, current);
if (a_role > clone_role) {
clone_role = a_role;
}
}
pe_rsc_trace(rsc, "%s role: %s", rsc->id, role2text(clone_role));
return clone_role;
}
/*!
* \internal
* \brief Check whether a clone has an instance for every node
*
* \param[in] rsc Clone to check
* \param[in] data_set Cluster state
*/
bool
pe__is_universal_clone(pe_resource_t *rsc,
pe_working_set_t *data_set)
{
if (pe_rsc_is_clone(rsc)) {
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
if (clone_data->clone_max == g_list_length(data_set->nodes)) {
return TRUE;
}
}
return FALSE;
}
gboolean
pe__clone_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent)
{
gboolean passes = FALSE;
clone_variant_data_t *clone_data = NULL;
if (pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches)) {
passes = TRUE;
} else {
get_clone_variant_data(clone_data, rsc);
passes = pcmk__str_in_list(ID(clone_data->xml_obj_child), only_rsc, pcmk__str_star_matches);
if (!passes) {
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if (!child_rsc->fns->is_filtered(child_rsc, only_rsc, FALSE)) {
passes = TRUE;
break;
}
}
}
}
return !passes;
}
const char *
pe__clone_child_id(pe_resource_t *rsc)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
return ID(clone_data->xml_obj_child);
}
+
+/*!
+ * \internal
+ * \brief Check whether a clone is ordered
+ *
+ * \param[in] clone Clone resource to check
+ *
+ * \return true if clone is ordered, otherwise false
+ */
+bool
+pe__clone_is_ordered(pe_resource_t *clone)
+{
+ clone_variant_data_t *clone_data = NULL;
+
+ get_clone_variant_data(clone_data, clone);
+ return pcmk_is_set(clone_data->flags, pe__clone_ordered);
+}
+
+/*!
+ * \internal
+ * \brief Set a clone flag
+ *
+ * \param[in] clone Clone resource to set flag for
+ * \param[in] flag Clone flag to set
+ *
+ * \return Standard Pacemaker return code (either pcmk_rc_ok if flag was not
+ * already set or pcmk_rc_already if it was)
+ */
+int
+pe__set_clone_flag(pe_resource_t *clone, enum pe__clone_flags flag)
+{
+ clone_variant_data_t *clone_data = NULL;
+
+ get_clone_variant_data(clone_data, clone);
+ if (pcmk_is_set(clone_data->flags, flag)) {
+ return pcmk_rc_already;
+ }
+ clone_data->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE,
+ "Clone", clone->id,
+ clone_data->flags, flag, "flag");
+ return pcmk_rc_ok;
+}
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
index 5b83e7aac6..c686a4f100 100644
--- a/lib/pengine/utils.c
+++ b/lib/pengine/utils.c
@@ -1,2646 +1,2619 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/util.h>
#include <glib.h>
#include <stdbool.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/internal.h>
#include "pe_status_private.h"
extern bool pcmk__is_daemon;
void print_str_str(gpointer key, gpointer value, gpointer user_data);
gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data);
static void unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container,
pe_working_set_t * data_set, guint interval_ms);
static xmlNode *find_rsc_op_entry_helper(pe_resource_t * rsc, const char *key,
gboolean include_disabled);
#if ENABLE_VERSIONED_ATTRS
pe_rsc_action_details_t *
pe_rsc_action_details(pe_action_t *action)
{
pe_rsc_action_details_t *details;
CRM_CHECK(action != NULL, return NULL);
if (action->action_details == NULL) {
action->action_details = calloc(1, sizeof(pe_rsc_action_details_t));
CRM_CHECK(action->action_details != NULL, return NULL);
}
details = (pe_rsc_action_details_t *) action->action_details;
if (details->versioned_parameters == NULL) {
details->versioned_parameters = create_xml_node(NULL,
XML_TAG_OP_VER_ATTRS);
}
if (details->versioned_meta == NULL) {
details->versioned_meta = create_xml_node(NULL, XML_TAG_OP_VER_META);
}
return details;
}
static void
pe_free_rsc_action_details(pe_action_t *action)
{
pe_rsc_action_details_t *details;
if ((action == NULL) || (action->action_details == NULL)) {
return;
}
details = (pe_rsc_action_details_t *) action->action_details;
if (details->versioned_parameters) {
free_xml(details->versioned_parameters);
}
if (details->versioned_meta) {
free_xml(details->versioned_meta);
}
action->action_details = NULL;
}
#endif
/*!
* \internal
* \brief Check whether we can fence a particular node
*
* \param[in] data_set Working set for cluster
* \param[in] node Name of node to check
*
* \return true if node can be fenced, false otherwise
*/
bool
pe_can_fence(pe_working_set_t *data_set, pe_node_t *node)
{
if (pe__is_guest_node(node)) {
/* Guest nodes are fenced by stopping their container resource. We can
* do that if the container's host is either online or fenceable.
*/
pe_resource_t *rsc = node->details->remote_rsc->container;
for (GList *n = rsc->running_on; n != NULL; n = n->next) {
pe_node_t *container_node = n->data;
if (!container_node->details->online
&& !pe_can_fence(data_set, container_node)) {
return false;
}
}
return true;
} else if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
return false; /* Turned off */
} else if (!pcmk_is_set(data_set->flags, pe_flag_have_stonith_resource)) {
return false; /* No devices */
} else if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)) {
return true;
} else if (data_set->no_quorum_policy == no_quorum_ignore) {
return true;
} else if(node == NULL) {
return false;
} else if(node->details->online) {
crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname);
return true;
}
crm_trace("Cannot fence %s", node->details->uname);
return false;
}
/*!
* \internal
* \brief Copy a node object
*
* \param[in] this_node Node object to copy
*
* \return Newly allocated shallow copy of this_node
* \note This function asserts on errors and is guaranteed to return non-NULL.
*/
pe_node_t *
pe__copy_node(const pe_node_t *this_node)
{
pe_node_t *new_node = NULL;
CRM_ASSERT(this_node != NULL);
new_node = calloc(1, sizeof(pe_node_t));
CRM_ASSERT(new_node != NULL);
new_node->rsc_discover_mode = this_node->rsc_discover_mode;
new_node->weight = this_node->weight;
new_node->fixed = this_node->fixed;
new_node->details = this_node->details;
return new_node;
}
/* any node in list1 or list2 and not in the other gets a score of -INFINITY */
void
node_list_exclude(GHashTable * hash, GList *list, gboolean merge_scores)
{
GHashTable *result = hash;
pe_node_t *other_node = NULL;
GList *gIter = list;
GHashTableIter iter;
pe_node_t *node = NULL;
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
other_node = pe_find_node_id(list, node->details->id);
if (other_node == NULL) {
node->weight = -INFINITY;
} else if (merge_scores) {
node->weight = pcmk__add_scores(node->weight, other_node->weight);
}
}
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
other_node = pe_hash_table_lookup(result, node->details->id);
if (other_node == NULL) {
pe_node_t *new_node = pe__copy_node(node);
new_node->weight = -INFINITY;
g_hash_table_insert(result, (gpointer) new_node->details->id, new_node);
}
}
}
/*!
* \internal
* \brief Create a node hash table from a node list
*
* \param[in] list Node list
*
* \return Hash table equivalent of node list
*/
GHashTable *
pe__node_list2table(GList *list)
{
GHashTable *result = NULL;
result = pcmk__strkey_table(NULL, free);
for (GList *gIter = list; gIter != NULL; gIter = gIter->next) {
pe_node_t *new_node = pe__copy_node((pe_node_t *) gIter->data);
g_hash_table_insert(result, (gpointer) new_node->details->id, new_node);
}
return result;
}
gint
sort_node_uname(gconstpointer a, gconstpointer b)
{
return pcmk__numeric_strcasecmp(((const pe_node_t *) a)->details->uname,
((const pe_node_t *) b)->details->uname);
}
/*!
* \internal
* \brief Output node weights to stdout
*
* \param[in] rsc Use allowed nodes for this resource
* \param[in] comment Text description to prefix lines with
* \param[in] nodes If rsc is not specified, use these nodes
*/
static void
pe__output_node_weights(pe_resource_t *rsc, const char *comment,
GHashTable *nodes, pe_working_set_t *data_set)
{
pcmk__output_t *out = data_set->priv;
char score[128]; // Stack-allocated since this is called frequently
// Sort the nodes so the output is consistent for regression tests
GList *list = g_list_sort(g_hash_table_get_values(nodes), sort_node_uname);
for (GList *gIter = list; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
score2char_stack(node->weight, score, sizeof(score));
out->message(out, "node-weight", rsc, comment, node->details->uname, score);
}
g_list_free(list);
}
/*!
* \internal
* \brief Log node weights at trace level
*
* \param[in] file Caller's filename
* \param[in] function Caller's function name
* \param[in] line Caller's line number
* \param[in] rsc Use allowed nodes for this resource
* \param[in] comment Text description to prefix lines with
* \param[in] nodes If rsc is not specified, use these nodes
*/
static void
pe__log_node_weights(const char *file, const char *function, int line,
pe_resource_t *rsc, const char *comment, GHashTable *nodes)
{
GHashTableIter iter;
pe_node_t *node = NULL;
char score[128]; // Stack-allocated since this is called frequently
// Don't waste time if we're not tracing at this point
pcmk__log_else(LOG_TRACE, return);
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
score2char_stack(node->weight, score, sizeof(score));
if (rsc) {
qb_log_from_external_source(function, file,
"%s: %s allocation score on %s: %s",
LOG_TRACE, line, 0,
comment, rsc->id,
node->details->uname, score);
} else {
qb_log_from_external_source(function, file, "%s: %s = %s",
LOG_TRACE, line, 0,
comment, node->details->uname,
score);
}
}
}
/*!
* \internal
* \brief Log or output node weights
*
* \param[in] file Caller's filename
* \param[in] function Caller's function name
* \param[in] line Caller's line number
* \param[in] to_log Log if true, otherwise output
* \param[in] rsc Use allowed nodes for this resource
* \param[in] comment Text description to prefix lines with
* \param[in] nodes Use these nodes
*/
void
pe__show_node_weights_as(const char *file, const char *function, int line,
bool to_log, pe_resource_t *rsc, const char *comment,
GHashTable *nodes, pe_working_set_t *data_set)
{
if (rsc != NULL && pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
// Don't show allocation scores for orphans
return;
}
if (nodes == NULL) {
// Nothing to show
return;
}
if (to_log) {
pe__log_node_weights(file, function, line, rsc, comment, nodes);
} else {
pe__output_node_weights(rsc, comment, nodes, data_set);
}
// If this resource has children, repeat recursively for each
if (rsc && rsc->children) {
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
pe__show_node_weights_as(file, function, line, to_log, child,
comment, child->allowed_nodes, data_set);
}
}
}
-gint
-sort_rsc_index(gconstpointer a, gconstpointer b)
-{
- const pe_resource_t *resource1 = (const pe_resource_t *)a;
- const pe_resource_t *resource2 = (const pe_resource_t *)b;
-
- if (a == NULL && b == NULL) {
- return 0;
- }
- if (a == NULL) {
- return 1;
- }
- if (b == NULL) {
- return -1;
- }
-
- if (resource1->sort_index > resource2->sort_index) {
- return -1;
- }
-
- if (resource1->sort_index < resource2->sort_index) {
- return 1;
- }
-
- return 0;
-}
-
gint
sort_rsc_priority(gconstpointer a, gconstpointer b)
{
const pe_resource_t *resource1 = (const pe_resource_t *)a;
const pe_resource_t *resource2 = (const pe_resource_t *)b;
if (a == NULL && b == NULL) {
return 0;
}
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (resource1->priority > resource2->priority) {
return -1;
}
if (resource1->priority < resource2->priority) {
return 1;
}
return 0;
}
static enum pe_quorum_policy
effective_quorum_policy(pe_resource_t *rsc, pe_working_set_t *data_set)
{
enum pe_quorum_policy policy = data_set->no_quorum_policy;
if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)) {
policy = no_quorum_ignore;
} else if (data_set->no_quorum_policy == no_quorum_demote) {
switch (rsc->role) {
case RSC_ROLE_PROMOTED:
case RSC_ROLE_UNPROMOTED:
if (rsc->next_role > RSC_ROLE_UNPROMOTED) {
pe__set_next_role(rsc, RSC_ROLE_UNPROMOTED,
"no-quorum-policy=demote");
}
policy = no_quorum_ignore;
break;
default:
policy = no_quorum_stop;
break;
}
}
return policy;
}
static void
add_singleton(pe_working_set_t *data_set, pe_action_t *action)
{
if (data_set->singletons == NULL) {
data_set->singletons = pcmk__strkey_table(NULL, NULL);
}
g_hash_table_insert(data_set->singletons, action->uuid, action);
}
static pe_action_t *
lookup_singleton(pe_working_set_t *data_set, const char *action_uuid)
{
if (data_set->singletons == NULL) {
return NULL;
}
return g_hash_table_lookup(data_set->singletons, action_uuid);
}
/*!
* \internal
* \brief Find an existing action that matches arguments
*
* \param[in] key Action key to match
* \param[in] rsc Resource to match (if any)
* \param[in] node Node to match (if any)
* \param[in] data_set Cluster working set
*
* \return Existing action that matches arguments (or NULL if none)
*/
static pe_action_t *
find_existing_action(const char *key, pe_resource_t *rsc, pe_node_t *node,
pe_working_set_t *data_set)
{
GList *matches = NULL;
pe_action_t *action = NULL;
/* When rsc is NULL, it would be quicker to check data_set->singletons,
* but checking all data_set->actions takes the node into account.
*/
matches = find_actions(((rsc == NULL)? data_set->actions : rsc->actions),
key, node);
if (matches == NULL) {
return NULL;
}
CRM_LOG_ASSERT(!pcmk__list_of_multiple(matches));
action = matches->data;
g_list_free(matches);
return action;
}
/*!
* \internal
* \brief Create a new action object
*
* \param[in] key Action key
* \param[in] task Action name
* \param[in] rsc Resource that action is for (if any)
* \param[in] node Node that action is on (if any)
* \param[in] optional Whether action should be considered optional
* \param[in] for_graph Whether action should be recorded in transition graph
* \param[in] data_set Cluster working set
*
* \return Newly allocated action
* \note This function takes ownership of \p key. It is the caller's
* responsibility to free the return value with pe_free_action().
*/
static pe_action_t *
new_action(char *key, const char *task, pe_resource_t *rsc, pe_node_t *node,
bool optional, bool for_graph, pe_working_set_t *data_set)
{
pe_action_t *action = calloc(1, sizeof(pe_action_t));
CRM_ASSERT(action != NULL);
action->rsc = rsc;
action->task = strdup(task); CRM_ASSERT(action->task != NULL);
action->uuid = key;
action->extra = pcmk__strkey_table(free, free);
action->meta = pcmk__strkey_table(free, free);
if (node) {
action->node = pe__copy_node(node);
}
if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
// Resource history deletion for a node can be done on the DC
pe__set_action_flags(action, pe_action_dc);
}
pe__set_action_flags(action, pe_action_runnable);
if (optional) {
pe__set_action_flags(action, pe_action_optional);
} else {
pe__clear_action_flags(action, pe_action_optional);
}
if (rsc != NULL) {
guint interval_ms = 0;
action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE);
parse_op_key(key, NULL, NULL, &interval_ms);
unpack_operation(action, action->op_entry, rsc->container, data_set,
interval_ms);
}
if (for_graph) {
pe_rsc_trace(rsc, "Created %s action %d (%s): %s for %s on %s",
(optional? "optional" : "required"),
data_set->action_id, key, task,
((rsc == NULL)? "no resource" : rsc->id),
((node == NULL)? "no node" : node->details->uname));
action->id = data_set->action_id++;
data_set->actions = g_list_prepend(data_set->actions, action);
if (rsc == NULL) {
add_singleton(data_set, action);
} else {
rsc->actions = g_list_prepend(rsc->actions, action);
}
}
return action;
}
/*!
* \internal
* \brief Evaluate node attribute values for an action
*
* \param[in] action Action to unpack attributes for
* \param[in] data_set Cluster working set
*/
static void
unpack_action_node_attributes(pe_action_t *action, pe_working_set_t *data_set)
{
if (!pcmk_is_set(action->flags, pe_action_have_node_attrs)
&& (action->op_entry != NULL)) {
pe_rule_eval_data_t rule_data = {
.node_hash = action->node->details->attrs,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
pe__set_action_flags(action, pe_action_have_node_attrs);
pe__unpack_dataset_nvpairs(action->op_entry, XML_TAG_ATTR_SETS,
&rule_data, action->extra, NULL,
FALSE, data_set);
}
}
/*!
* \internal
* \brief Update an action's optional flag
*
* \param[in] action Action to update
* \param[in] optional Requested optional status
*/
static void
update_action_optional(pe_action_t *action, gboolean optional)
{
// Force a non-recurring action to be optional if its resource is unmanaged
if ((action->rsc != NULL) && (action->node != NULL)
&& !pcmk_is_set(action->flags, pe_action_pseudo)
&& !pcmk_is_set(action->rsc->flags, pe_rsc_managed)
&& (g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS) == NULL)) {
pe_rsc_debug(action->rsc, "%s on %s is optional (%s is unmanaged)",
action->uuid, action->node->details->uname,
action->rsc->id);
pe__set_action_flags(action, pe_action_optional);
// We shouldn't clear runnable here because ... something
// Otherwise require the action if requested
} else if (!optional) {
pe__clear_action_flags(action, pe_action_optional);
}
}
/*!
* \internal
* \brief Update a resource action's runnable flag
*
* \param[in] action Action to update
* \param[in] for_graph Whether action should be recorded in transition graph
* \param[in] data_set Cluster working set
*
* \note This may also schedule fencing if a stop is unrunnable.
*/
static void
update_resource_action_runnable(pe_action_t *action, bool for_graph,
pe_working_set_t *data_set)
{
if (pcmk_is_set(action->flags, pe_action_pseudo)) {
return;
}
if (action->node == NULL) {
pe_rsc_trace(action->rsc, "%s is unrunnable (unallocated)",
action->uuid);
pe__clear_action_flags(action, pe_action_runnable);
} else if (!pcmk_is_set(action->flags, pe_action_dc)
&& !(action->node->details->online)
&& (!pe__is_guest_node(action->node)
|| action->node->details->remote_requires_reset)) {
pe__clear_action_flags(action, pe_action_runnable);
do_crm_log((for_graph? LOG_WARNING: LOG_TRACE),
"%s on %s is unrunnable (node is offline)",
action->uuid, action->node->details->uname);
if (pcmk_is_set(action->rsc->flags, pe_rsc_managed)
&& for_graph
&& pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)
&& !(action->node->details->unclean)) {
pe_fence_node(data_set, action->node, "stop is unrunnable", false);
}
} else if (!pcmk_is_set(action->flags, pe_action_dc)
&& action->node->details->pending) {
pe__clear_action_flags(action, pe_action_runnable);
do_crm_log((for_graph? LOG_WARNING: LOG_TRACE),
"Action %s on %s is unrunnable (node is pending)",
action->uuid, action->node->details->uname);
} else if (action->needs == rsc_req_nothing) {
pe_action_set_reason(action, NULL, TRUE);
if (pe__is_guest_node(action->node)
&& !pe_can_fence(data_set, action->node)) {
/* An action that requires nothing usually does not require any
* fencing in order to be runnable. However, there is an exception:
* such an action cannot be completed if it is on a guest node whose
* host is unclean and cannot be fenced.
*/
pe_rsc_debug(action->rsc, "%s on %s is unrunnable "
"(node's host cannot be fenced)",
action->uuid, action->node->details->uname);
pe__clear_action_flags(action, pe_action_runnable);
} else {
pe_rsc_trace(action->rsc,
"%s on %s does not require fencing or quorum",
action->uuid, action->node->details->uname);
pe__set_action_flags(action, pe_action_runnable);
}
} else {
switch (effective_quorum_policy(action->rsc, data_set)) {
case no_quorum_stop:
pe_rsc_debug(action->rsc, "%s on %s is unrunnable (no quorum)",
action->uuid, action->node->details->uname);
pe__clear_action_flags(action, pe_action_runnable);
pe_action_set_reason(action, "no quorum", true);
break;
case no_quorum_freeze:
if (!action->rsc->fns->active(action->rsc, TRUE)
|| (action->rsc->next_role > action->rsc->role)) {
pe_rsc_debug(action->rsc,
"%s on %s is unrunnable (no quorum)",
action->uuid, action->node->details->uname);
pe__clear_action_flags(action, pe_action_runnable);
pe_action_set_reason(action, "quorum freeze", true);
}
break;
default:
//pe_action_set_reason(action, NULL, TRUE);
pe__set_action_flags(action, pe_action_runnable);
break;
}
}
}
/*!
* \internal
* \brief Update a resource object's flags for a new action on it
*
* \param[in] rsc Resource that action is for (if any)
* \param[in] action New action
*/
static void
update_resource_flags_for_action(pe_resource_t *rsc, pe_action_t *action)
{
/* @COMPAT pe_rsc_starting and pe_rsc_stopping are not actually used
* within Pacemaker, and should be deprecated and eventually removed
*/
if (pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) {
pe__set_resource_flags(rsc, pe_rsc_stopping);
} else if (pcmk__str_eq(action->task, CRMD_ACTION_START, pcmk__str_casei)) {
if (pcmk_is_set(action->flags, pe_action_runnable)) {
pe__set_resource_flags(rsc, pe_rsc_starting);
} else {
pe__clear_resource_flags(rsc, pe_rsc_starting);
}
}
}
/*!
* \brief Create or update an action object
*
* \param[in] rsc Resource that action is for (if any)
* \param[in] key Action key (must be non-NULL)
* \param[in] task Action name (must be non-NULL)
* \param[in] on_node Node that action is on (if any)
* \param[in] optional Whether action should be considered optional
* \param[in] save_action Whether action should be recorded in transition graph
* \param[in] data_set Cluster working set
*
* \return Action object corresponding to arguments
* \note This function takes ownership of (and might free) \p key. If
* \p save_action is true, \p data_set will own the returned action,
* otherwise it is the caller's responsibility to free the return value
* with pe_free_action().
*/
pe_action_t *
custom_action(pe_resource_t *rsc, char *key, const char *task,
pe_node_t *on_node, gboolean optional, gboolean save_action,
pe_working_set_t *data_set)
{
pe_action_t *action = NULL;
CRM_ASSERT((key != NULL) && (task != NULL) && (data_set != NULL));
if (save_action) {
action = find_existing_action(key, rsc, on_node, data_set);
}
if (action == NULL) {
action = new_action(key, task, rsc, on_node, optional, save_action,
data_set);
} else {
free(key);
}
update_action_optional(action, optional);
if (rsc != NULL) {
if (action->node != NULL) {
unpack_action_node_attributes(action, data_set);
}
update_resource_action_runnable(action, save_action, data_set);
if (save_action) {
update_resource_flags_for_action(rsc, action);
}
}
return action;
}
static bool
valid_stop_on_fail(const char *value)
{
return !pcmk__strcase_any_of(value, "standby", "demote", "stop", NULL);
}
static const char *
unpack_operation_on_fail(pe_action_t * action)
{
const char *name = NULL;
const char *role = NULL;
const char *on_fail = NULL;
const char *interval_spec = NULL;
const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL);
if (pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)
&& !valid_stop_on_fail(value)) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s stop "
"action to default value because '%s' is not "
"allowed for stop", action->rsc->id, value);
return NULL;
} else if (pcmk__str_eq(action->task, CRMD_ACTION_DEMOTE, pcmk__str_casei) && !value) {
// demote on_fail defaults to monitor value for promoted role if present
xmlNode *operation = NULL;
CRM_CHECK(action->rsc != NULL, return NULL);
for (operation = pcmk__xe_first_child(action->rsc->ops_xml);
(operation != NULL) && (value == NULL);
operation = pcmk__xe_next(operation)) {
bool enabled = false;
if (!pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) {
continue;
}
name = crm_element_value(operation, "name");
role = crm_element_value(operation, "role");
on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL);
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
if (!on_fail) {
continue;
} else if (pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok && !enabled) {
continue;
} else if (!pcmk__str_eq(name, "monitor", pcmk__str_casei)
|| !pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S,
RSC_ROLE_PROMOTED_LEGACY_S,
NULL)) {
continue;
} else if (crm_parse_interval_spec(interval_spec) == 0) {
continue;
} else if (pcmk__str_eq(on_fail, "demote", pcmk__str_casei)) {
continue;
}
value = on_fail;
}
} else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
value = "ignore";
} else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
name = crm_element_value(action->op_entry, "name");
role = crm_element_value(action->op_entry, "role");
interval_spec = crm_element_value(action->op_entry,
XML_LRM_ATTR_INTERVAL);
if (!pcmk__str_eq(name, CRMD_ACTION_PROMOTE, pcmk__str_casei)
&& (!pcmk__str_eq(name, CRMD_ACTION_STATUS, pcmk__str_casei)
|| !pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S,
RSC_ROLE_PROMOTED_LEGACY_S, NULL)
|| (crm_parse_interval_spec(interval_spec) == 0))) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s %s "
"action to default value because 'demote' is not "
"allowed for it", action->rsc->id, name);
return NULL;
}
}
return value;
}
static xmlNode *
find_min_interval_mon(pe_resource_t * rsc, gboolean include_disabled)
{
guint interval_ms = 0;
guint min_interval_ms = G_MAXUINT;
const char *name = NULL;
const char *interval_spec = NULL;
xmlNode *op = NULL;
xmlNode *operation = NULL;
for (operation = pcmk__xe_first_child(rsc->ops_xml);
operation != NULL;
operation = pcmk__xe_next(operation)) {
if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) {
bool enabled = false;
name = crm_element_value(operation, "name");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
if (!include_disabled && pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok &&
!enabled) {
continue;
}
if (!pcmk__str_eq(name, RSC_STATUS, pcmk__str_casei)) {
continue;
}
interval_ms = crm_parse_interval_spec(interval_spec);
if (interval_ms && (interval_ms < min_interval_ms)) {
min_interval_ms = interval_ms;
op = operation;
}
}
}
return op;
}
static int
unpack_start_delay(const char *value, GHashTable *meta)
{
int start_delay = 0;
if (value != NULL) {
start_delay = crm_get_msec(value);
if (start_delay < 0) {
start_delay = 0;
}
if (meta) {
g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY),
pcmk__itoa(start_delay));
}
}
return start_delay;
}
// true if value contains valid, non-NULL interval origin for recurring op
static bool
unpack_interval_origin(const char *value, xmlNode *xml_obj, guint interval_ms,
crm_time_t *now, long long *start_delay)
{
long long result = 0;
guint interval_sec = interval_ms / 1000;
crm_time_t *origin = NULL;
// Ignore unspecified values and non-recurring operations
if ((value == NULL) || (interval_ms == 0) || (now == NULL)) {
return false;
}
// Parse interval origin from text
origin = crm_time_new(value);
if (origin == NULL) {
pcmk__config_err("Ignoring '" XML_OP_ATTR_ORIGIN "' for operation "
"'%s' because '%s' is not valid",
(ID(xml_obj)? ID(xml_obj) : "(missing ID)"), value);
return false;
}
// Get seconds since origin (negative if origin is in the future)
result = crm_time_get_seconds(now) - crm_time_get_seconds(origin);
crm_time_free(origin);
// Calculate seconds from closest interval to now
result = result % interval_sec;
// Calculate seconds remaining until next interval
result = ((result <= 0)? 0 : interval_sec) - result;
crm_info("Calculated a start delay of %llds for operation '%s'",
result,
(ID(xml_obj)? ID(xml_obj) : "(unspecified)"));
if (start_delay != NULL) {
*start_delay = result * 1000; // milliseconds
}
return true;
}
static int
unpack_timeout(const char *value)
{
int timeout_ms = crm_get_msec(value);
if (timeout_ms < 0) {
timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
return timeout_ms;
}
int
pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set)
{
xmlNode *child = NULL;
GHashTable *action_meta = NULL;
const char *timeout_spec = NULL;
int timeout_ms = 0;
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP);
child != NULL; child = crm_next_same_xml(child)) {
if (pcmk__str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME),
pcmk__str_casei)) {
timeout_spec = crm_element_value(child, XML_ATTR_TIMEOUT);
break;
}
}
if (timeout_spec == NULL && data_set->op_defaults) {
action_meta = pcmk__strkey_table(free, free);
pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS,
&rule_data, action_meta, NULL, FALSE, data_set);
timeout_spec = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT);
}
// @TODO check meta-attributes (including versioned meta-attributes)
// @TODO maybe use min-interval monitor timeout as default for monitors
timeout_ms = crm_get_msec(timeout_spec);
if (timeout_ms < 0) {
timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
if (action_meta != NULL) {
g_hash_table_destroy(action_meta);
}
return timeout_ms;
}
#if ENABLE_VERSIONED_ATTRS
static void
unpack_versioned_meta(xmlNode *versioned_meta, xmlNode *xml_obj,
guint interval_ms, crm_time_t *now)
{
xmlNode *attrs = NULL;
xmlNode *attr = NULL;
for (attrs = pcmk__xe_first_child(versioned_meta); attrs != NULL;
attrs = pcmk__xe_next(attrs)) {
for (attr = pcmk__xe_first_child(attrs); attr != NULL;
attr = pcmk__xe_next(attr)) {
const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
if (pcmk__str_eq(name, XML_OP_ATTR_START_DELAY, pcmk__str_casei)) {
int start_delay = unpack_start_delay(value, NULL);
crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay);
} else if (pcmk__str_eq(name, XML_OP_ATTR_ORIGIN, pcmk__str_casei)) {
long long start_delay = 0;
if (unpack_interval_origin(value, xml_obj, interval_ms, now,
&start_delay)) {
crm_xml_add(attr, XML_NVPAIR_ATTR_NAME,
XML_OP_ATTR_START_DELAY);
crm_xml_add_ll(attr, XML_NVPAIR_ATTR_VALUE, start_delay);
}
} else if (pcmk__str_eq(name, XML_ATTR_TIMEOUT, pcmk__str_casei)) {
int timeout_ms = unpack_timeout(value);
crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, timeout_ms);
}
}
}
}
#endif
/*!
* \brief Unpack operation XML into an action structure
*
* Unpack an operation's meta-attributes (normalizing the interval, timeout,
* and start delay values as integer milliseconds), requirements, and
* failure policy.
*
* \param[in,out] action Action to unpack into
* \param[in] xml_obj Operation XML (or NULL if all defaults)
* \param[in] container Resource that contains affected resource, if any
* \param[in] data_set Cluster state
* \param[in] interval_ms How frequently to perform the operation
*/
static void
unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container,
pe_working_set_t * data_set, guint interval_ms)
{
int timeout_ms = 0;
const char *value = NULL;
bool is_probe = false;
#if ENABLE_VERSIONED_ATTRS
pe_rsc_action_details_t *rsc_details = NULL;
#endif
pe_rsc_eval_data_t rsc_rule_data = {
.standard = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_CLASS),
.provider = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_PROVIDER),
.agent = crm_element_value(action->rsc->xml, XML_EXPR_ATTR_TYPE)
};
pe_op_eval_data_t op_rule_data = {
.op_name = action->task,
.interval = interval_ms
};
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = &rsc_rule_data,
.op_data = &op_rule_data
};
CRM_CHECK(action && action->rsc, return);
is_probe = pcmk_is_probe(action->task, interval_ms);
// Cluster-wide <op_defaults> <meta_attributes>
pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data,
action->meta, NULL, FALSE, data_set);
// Determine probe default timeout differently
if (is_probe) {
xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE);
if (min_interval_mon) {
value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT);
if (value) {
crm_trace("\t%s: Setting default timeout to minimum-interval "
"monitor's timeout '%s'", action->uuid, value);
g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT),
strdup(value));
}
}
}
if (xml_obj) {
xmlAttrPtr xIter = NULL;
// <op> <meta_attributes> take precedence over defaults
pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_META_SETS, &rule_data,
action->meta, NULL, TRUE, data_set);
#if ENABLE_VERSIONED_ATTRS
rsc_details = pe_rsc_action_details(action);
/* Non-versioned attributes also unpack XML_TAG_ATTR_SETS, but that
* capability is deprecated, so we don't need to extend that support to
* versioned attributes.
*/
pe_eval_versioned_attributes(data_set->input, xml_obj,
XML_TAG_META_SETS, &rule_data,
rsc_details->versioned_meta,
NULL);
#endif
/* Anything set as an <op> XML property has highest precedence.
* This ensures we use the name and interval from the <op> tag.
*/
for (xIter = xml_obj->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = crm_element_value(xml_obj, prop_name);
g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value));
}
}
g_hash_table_remove(action->meta, "id");
// Normalize interval to milliseconds
if (interval_ms > 0) {
g_hash_table_replace(action->meta, strdup(XML_LRM_ATTR_INTERVAL),
crm_strdup_printf("%u", interval_ms));
} else {
g_hash_table_remove(action->meta, XML_LRM_ATTR_INTERVAL);
}
/*
* Timeout order of precedence:
* 1. pcmk_monitor_timeout (if rsc has pcmk_ra_cap_fence_params
* and task is start or a probe; pcmk_monitor_timeout works
* by default for a recurring monitor)
* 2. explicit op timeout on the primitive
* 3. default op timeout
* a. if probe, then min-interval monitor's timeout
* b. else, in XML_CIB_TAG_OPCONFIG
* 4. CRM_DEFAULT_OP_TIMEOUT_S
*
* #1 overrides general rule of <op> XML property having highest
* precedence.
*/
if (pcmk_is_set(pcmk_get_ra_caps(rsc_rule_data.standard),
pcmk_ra_cap_fence_params)
&& (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)
|| is_probe)) {
GHashTable *params = pe_rsc_params(action->rsc, action->node, data_set);
value = g_hash_table_lookup(params, "pcmk_monitor_timeout");
if (value) {
crm_trace("\t%s: Setting timeout to pcmk_monitor_timeout '%s', "
"overriding default", action->uuid, value);
g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT),
strdup(value));
}
}
// Normalize timeout to positive milliseconds
value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT);
timeout_ms = unpack_timeout(value);
g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT),
pcmk__itoa(timeout_ms));
if (!pcmk__strcase_any_of(action->task, RSC_START, RSC_PROMOTE, NULL)) {
action->needs = rsc_req_nothing;
value = "nothing (not start or promote)";
} else if (pcmk_is_set(action->rsc->flags, pe_rsc_needs_fencing)) {
action->needs = rsc_req_stonith;
value = "fencing";
} else if (pcmk_is_set(action->rsc->flags, pe_rsc_needs_quorum)) {
action->needs = rsc_req_quorum;
value = "quorum";
} else {
action->needs = rsc_req_nothing;
value = "nothing";
}
pe_rsc_trace(action->rsc, "%s requires %s", action->uuid, value);
value = unpack_operation_on_fail(action);
if (value == NULL) {
} else if (pcmk__str_eq(value, "block", pcmk__str_casei)) {
action->on_fail = action_fail_block;
g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block"));
value = "block"; // The above could destroy the original string
} else if (pcmk__str_eq(value, "fence", pcmk__str_casei)) {
action->on_fail = action_fail_fence;
value = "node fencing";
if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for "
"operation '%s' to 'stop' because 'fence' is not "
"valid when fencing is disabled", action->uuid);
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop resource";
}
} else if (pcmk__str_eq(value, "standby", pcmk__str_casei)) {
action->on_fail = action_fail_standby;
value = "node standby";
} else if (pcmk__strcase_any_of(value, "ignore", PCMK__VALUE_NOTHING,
NULL)) {
action->on_fail = action_fail_ignore;
value = "ignore";
} else if (pcmk__str_eq(value, "migrate", pcmk__str_casei)) {
action->on_fail = action_fail_migrate;
value = "force migration";
} else if (pcmk__str_eq(value, "stop", pcmk__str_casei)) {
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop resource";
} else if (pcmk__str_eq(value, "restart", pcmk__str_casei)) {
action->on_fail = action_fail_recover;
value = "restart (and possibly migrate)";
} else if (pcmk__str_eq(value, "restart-container", pcmk__str_casei)) {
if (container) {
action->on_fail = action_fail_restart_container;
value = "restart container (and possibly migrate)";
} else {
value = NULL;
}
} else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
action->on_fail = action_fail_demote;
value = "demote instance";
} else {
pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value);
value = NULL;
}
/* defaults */
if (value == NULL && container) {
action->on_fail = action_fail_restart_container;
value = "restart container (and possibly migrate) (default)";
/* For remote nodes, ensure that any failure that results in dropping an
* active connection to the node results in fencing of the node.
*
* There are only two action failures that don't result in fencing.
* 1. probes - probe failures are expected.
* 2. start - a start failure indicates that an active connection does not already
* exist. The user can set op on-fail=fence if they really want to fence start
* failures. */
} else if (((value == NULL) || !pcmk_is_set(action->rsc->flags, pe_rsc_managed))
&& pe__resource_is_remote_conn(action->rsc, data_set)
&& !(pcmk__str_eq(action->task, CRMD_ACTION_STATUS, pcmk__str_casei)
&& (interval_ms == 0))
&& !pcmk__str_eq(action->task, CRMD_ACTION_START, pcmk__str_casei)) {
if (!pcmk_is_set(action->rsc->flags, pe_rsc_managed)) {
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop unmanaged remote node (enforcing default)";
} else {
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
value = "fence remote node (default)";
} else {
value = "recover remote node connection (default)";
}
if (action->rsc->remote_reconnect_ms) {
action->fail_role = RSC_ROLE_STOPPED;
}
action->on_fail = action_fail_reset_remote;
}
} else if (value == NULL && pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) {
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
action->on_fail = action_fail_fence;
value = "resource fence (default)";
} else {
action->on_fail = action_fail_block;
value = "resource block (default)";
}
} else if (value == NULL) {
action->on_fail = action_fail_recover;
value = "restart (and possibly migrate) (default)";
}
pe_rsc_trace(action->rsc, "%s failure handling: %s",
action->uuid, value);
value = NULL;
if (xml_obj != NULL) {
value = g_hash_table_lookup(action->meta, "role_after_failure");
if (value) {
pe_warn_once(pe_wo_role_after,
"Support for role_after_failure is deprecated and will be removed in a future release");
}
}
if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) {
action->fail_role = text2role(value);
}
/* defaults */
if (action->fail_role == RSC_ROLE_UNKNOWN) {
if (pcmk__str_eq(action->task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) {
action->fail_role = RSC_ROLE_UNPROMOTED;
} else {
action->fail_role = RSC_ROLE_STARTED;
}
}
pe_rsc_trace(action->rsc, "%s failure results in: %s",
action->uuid, role2text(action->fail_role));
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY);
if (value) {
unpack_start_delay(value, action->meta);
} else {
long long start_delay = 0;
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN);
if (unpack_interval_origin(value, xml_obj, interval_ms, data_set->now,
&start_delay)) {
g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY),
crm_strdup_printf("%lld", start_delay));
}
}
#if ENABLE_VERSIONED_ATTRS
unpack_versioned_meta(rsc_details->versioned_meta, xml_obj, interval_ms,
data_set->now);
#endif
}
static xmlNode *
find_rsc_op_entry_helper(pe_resource_t * rsc, const char *key, gboolean include_disabled)
{
guint interval_ms = 0;
gboolean do_retry = TRUE;
char *local_key = NULL;
const char *name = NULL;
const char *interval_spec = NULL;
char *match_key = NULL;
xmlNode *op = NULL;
xmlNode *operation = NULL;
retry:
for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL;
operation = pcmk__xe_next(operation)) {
if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) {
bool enabled = false;
name = crm_element_value(operation, "name");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
if (!include_disabled && pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok &&
!enabled) {
continue;
}
interval_ms = crm_parse_interval_spec(interval_spec);
match_key = pcmk__op_key(rsc->id, name, interval_ms);
if (pcmk__str_eq(key, match_key, pcmk__str_casei)) {
op = operation;
}
free(match_key);
if (rsc->clone_name) {
match_key = pcmk__op_key(rsc->clone_name, name, interval_ms);
if (pcmk__str_eq(key, match_key, pcmk__str_casei)) {
op = operation;
}
free(match_key);
}
if (op != NULL) {
free(local_key);
return op;
}
}
}
free(local_key);
if (do_retry == FALSE) {
return NULL;
}
do_retry = FALSE;
if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) {
local_key = pcmk__op_key(rsc->id, "migrate", 0);
key = local_key;
goto retry;
} else if (strstr(key, "_notify_")) {
local_key = pcmk__op_key(rsc->id, "notify", 0);
key = local_key;
goto retry;
}
return NULL;
}
xmlNode *
find_rsc_op_entry(pe_resource_t * rsc, const char *key)
{
return find_rsc_op_entry_helper(rsc, key, FALSE);
}
/*
* Used by the HashTable for-loop
*/
void
print_str_str(gpointer key, gpointer value, gpointer user_data)
{
crm_trace("%s%s %s ==> %s",
user_data == NULL ? "" : (char *)user_data,
user_data == NULL ? "" : ": ", (char *)key, (char *)value);
}
void
pe_free_action(pe_action_t * action)
{
if (action == NULL) {
return;
}
g_list_free_full(action->actions_before, free); /* pe_action_wrapper_t* */
g_list_free_full(action->actions_after, free); /* pe_action_wrapper_t* */
if (action->extra) {
g_hash_table_destroy(action->extra);
}
if (action->meta) {
g_hash_table_destroy(action->meta);
}
#if ENABLE_VERSIONED_ATTRS
if (action->rsc) {
pe_free_rsc_action_details(action);
}
#endif
free(action->cancel_task);
free(action->reason);
free(action->task);
free(action->uuid);
free(action->node);
free(action);
}
GList *
find_recurring_actions(GList *input, pe_node_t * not_on_node)
{
const char *value = NULL;
GList *result = NULL;
GList *gIter = input;
CRM_CHECK(input != NULL, return NULL);
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS);
if (value == NULL) {
/* skip */
} else if (pcmk__str_eq(value, "0", pcmk__str_casei)) {
/* skip */
} else if (pcmk__str_eq(CRMD_ACTION_CANCEL, action->task, pcmk__str_casei)) {
/* skip */
} else if (not_on_node == NULL) {
crm_trace("(null) Found: %s", action->uuid);
result = g_list_prepend(result, action);
} else if (action->node == NULL) {
/* skip */
} else if (action->node->details != not_on_node->details) {
crm_trace("Found: %s", action->uuid);
result = g_list_prepend(result, action);
}
}
return result;
}
enum action_tasks
get_complex_task(pe_resource_t * rsc, const char *name, gboolean allow_non_atomic)
{
enum action_tasks task = text2task(name);
if (rsc == NULL) {
return task;
} else if (allow_non_atomic == FALSE || rsc->variant == pe_native) {
switch (task) {
case stopped_rsc:
case started_rsc:
case action_demoted:
case action_promoted:
crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id);
return task - 1;
default:
break;
}
}
return task;
}
pe_action_t *
find_first_action(GList *input, const char *uuid, const char *task, pe_node_t * on_node)
{
GList *gIter = NULL;
CRM_CHECK(uuid || task, return NULL);
for (gIter = input; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (uuid != NULL && !pcmk__str_eq(uuid, action->uuid, pcmk__str_casei)) {
continue;
} else if (task != NULL && !pcmk__str_eq(task, action->task, pcmk__str_casei)) {
continue;
} else if (on_node == NULL) {
return action;
} else if (action->node == NULL) {
continue;
} else if (on_node->details == action->node->details) {
return action;
}
}
return NULL;
}
GList *
find_actions(GList *input, const char *key, const pe_node_t *on_node)
{
GList *gIter = input;
GList *result = NULL;
CRM_CHECK(key != NULL, return NULL);
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (!pcmk__str_eq(key, action->uuid, pcmk__str_casei)) {
continue;
} else if (on_node == NULL) {
crm_trace("Action %s matches (ignoring node)", key);
result = g_list_prepend(result, action);
} else if (action->node == NULL) {
crm_trace("Action %s matches (unallocated, assigning to %s)",
key, on_node->details->uname);
action->node = pe__copy_node(on_node);
result = g_list_prepend(result, action);
} else if (on_node->details == action->node->details) {
crm_trace("Action %s on %s matches", key, on_node->details->uname);
result = g_list_prepend(result, action);
}
}
return result;
}
GList *
find_actions_exact(GList *input, const char *key, const pe_node_t *on_node)
{
GList *result = NULL;
CRM_CHECK(key != NULL, return NULL);
if (on_node == NULL) {
return NULL;
}
for (GList *gIter = input; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if ((action->node != NULL)
&& pcmk__str_eq(key, action->uuid, pcmk__str_casei)
&& pcmk__str_eq(on_node->details->id, action->node->details->id,
pcmk__str_casei)) {
crm_trace("Action %s on %s matches", key, on_node->details->uname);
result = g_list_prepend(result, action);
}
}
return result;
}
/*!
* \brief Find all actions of given type for a resource
*
* \param[in] rsc Resource to search
* \param[in] node Find only actions scheduled on this node
* \param[in] task Action name to search for
* \param[in] require_node If TRUE, NULL node or action node will not match
*
* \return List of actions found (or NULL if none)
* \note If node is not NULL and require_node is FALSE, matching actions
* without a node will be assigned to node.
*/
GList *
pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node,
const char *task, bool require_node)
{
GList *result = NULL;
char *key = pcmk__op_key(rsc->id, task, 0);
if (require_node) {
result = find_actions_exact(rsc->actions, key, node);
} else {
result = find_actions(rsc->actions, key, node);
}
free(key);
return result;
}
static void
resource_node_score(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag)
{
pe_node_t *match = NULL;
if ((rsc->exclusive_discover || (node->rsc_discover_mode == pe_discover_never))
&& pcmk__str_eq(tag, "symmetric_default", pcmk__str_casei)) {
/* This string comparision may be fragile, but exclusive resources and
* exclusive nodes should not have the symmetric_default constraint
* applied to them.
*/
return;
} else if (rsc->children) {
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
resource_node_score(child_rsc, node, score, tag);
}
}
pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score);
match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (match == NULL) {
match = pe__copy_node(node);
g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match);
}
match->weight = pcmk__add_scores(match->weight, score);
}
void
resource_location(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag,
pe_working_set_t * data_set)
{
if (node != NULL) {
resource_node_score(rsc, node, score, tag);
} else if (data_set != NULL) {
GList *gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *node_iter = (pe_node_t *) gIter->data;
resource_node_score(rsc, node_iter, score, tag);
}
} else {
GHashTableIter iter;
pe_node_t *node_iter = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) {
resource_node_score(rsc, node_iter, score, tag);
}
}
if (node == NULL && score == -INFINITY) {
if (rsc->allocated_to) {
crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname);
free(rsc->allocated_to);
rsc->allocated_to = NULL;
}
}
}
#define sort_return(an_int, why) do { \
free(a_uuid); \
free(b_uuid); \
crm_trace("%s (%d) %c %s (%d) : %s", \
a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \
b_xml_id, b_call_id, why); \
return an_int; \
} while(0)
int
pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b,
bool same_node_default)
{
int a_call_id = -1;
int b_call_id = -1;
char *a_uuid = NULL;
char *b_uuid = NULL;
const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID);
const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID);
const char *a_node = crm_element_value(xml_a, XML_LRM_ATTR_TARGET);
const char *b_node = crm_element_value(xml_b, XML_LRM_ATTR_TARGET);
bool same_node = true;
/* @COMPAT The on_node attribute was added to last_failure as of 1.1.13 (via
* 8b3ca1c) and the other entries as of 1.1.12 (via 0b07b5c).
*
* In case that any of the lrm_rsc_op entries doesn't have on_node
* attribute, we need to explicitly tell whether the two operations are on
* the same node.
*/
if (a_node == NULL || b_node == NULL) {
same_node = same_node_default;
} else {
same_node = pcmk__str_eq(a_node, b_node, pcmk__str_casei);
}
if (same_node && pcmk__str_eq(a_xml_id, b_xml_id, pcmk__str_none)) {
/* We have duplicate lrm_rsc_op entries in the status
* section which is unlikely to be a good thing
* - we can handle it easily enough, but we need to get
* to the bottom of why it's happening.
*/
pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id);
sort_return(0, "duplicate");
}
crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id);
crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id);
if (a_call_id == -1 && b_call_id == -1) {
/* both are pending ops so it doesn't matter since
* stops are never pending
*/
sort_return(0, "pending");
} else if (same_node && a_call_id >= 0 && a_call_id < b_call_id) {
sort_return(-1, "call id");
} else if (same_node && b_call_id >= 0 && a_call_id > b_call_id) {
sort_return(1, "call id");
} else if (a_call_id >= 0 && b_call_id >= 0
&& (!same_node || a_call_id == b_call_id)) {
/*
* The op and last_failed_op are the same
* Order on last-rc-change
*/
time_t last_a = -1;
time_t last_b = -1;
crm_element_value_epoch(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a);
crm_element_value_epoch(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b);
crm_trace("rc-change: %lld vs %lld",
(long long) last_a, (long long) last_b);
if (last_a >= 0 && last_a < last_b) {
sort_return(-1, "rc-change");
} else if (last_b >= 0 && last_a > last_b) {
sort_return(1, "rc-change");
}
sort_return(0, "rc-change");
} else {
/* One of the inputs is a pending operation
* Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other
*/
int a_id = -1;
int b_id = -1;
const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC);
const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC);
CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic"));
if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL,
NULL)) {
sort_return(0, "bad magic a");
}
if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL,
NULL)) {
sort_return(0, "bad magic b");
}
/* try to determine the relative age of the operation...
* some pending operations (e.g. a start) may have been superseded
* by a subsequent stop
*
* [a|b]_id == -1 means it's a shutdown operation and _always_ comes last
*/
if (!pcmk__str_eq(a_uuid, b_uuid, pcmk__str_casei) || a_id == b_id) {
/*
* some of the logic in here may be redundant...
*
* if the UUID from the TE doesn't match then one better
* be a pending operation.
* pending operations don't survive between elections and joins
* because we query the LRM directly
*/
if (b_call_id == -1) {
sort_return(-1, "transition + call");
} else if (a_call_id == -1) {
sort_return(1, "transition + call");
}
} else if ((a_id >= 0 && a_id < b_id) || b_id == -1) {
sort_return(-1, "transition");
} else if ((b_id >= 0 && a_id > b_id) || a_id == -1) {
sort_return(1, "transition");
}
}
/* we should never end up here */
CRM_CHECK(FALSE, sort_return(0, "default"));
}
gint
sort_op_by_callid(gconstpointer a, gconstpointer b)
{
const xmlNode *xml_a = a;
const xmlNode *xml_b = b;
return pe__is_newer_op(xml_a, xml_b, true);
}
time_t
get_effective_time(pe_working_set_t * data_set)
{
if(data_set) {
if (data_set->now == NULL) {
crm_trace("Recording a new 'now'");
data_set->now = crm_time_new(NULL);
}
return crm_time_get_seconds_since_epoch(data_set->now);
}
crm_trace("Defaulting to 'now'");
return time(NULL);
}
gboolean
get_target_role(pe_resource_t * rsc, enum rsc_role_e * role)
{
enum rsc_role_e local_role = RSC_ROLE_UNKNOWN;
const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
CRM_CHECK(role != NULL, return FALSE);
if (pcmk__str_eq(value, "started", pcmk__str_null_matches | pcmk__str_casei)
|| pcmk__str_eq("default", value, pcmk__str_casei)) {
return FALSE;
}
local_role = text2role(value);
if (local_role == RSC_ROLE_UNKNOWN) {
pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s "
"because '%s' is not valid", rsc->id, value);
return FALSE;
} else if (local_role > RSC_ROLE_STARTED) {
if (pcmk_is_set(uber_parent(rsc)->flags, pe_rsc_promotable)) {
if (local_role > RSC_ROLE_UNPROMOTED) {
/* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */
return FALSE;
}
} else {
pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s "
"because '%s' only makes sense for promotable "
"clones", rsc->id, value);
return FALSE;
}
}
*role = local_role;
return TRUE;
}
gboolean
order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order)
{
GList *gIter = NULL;
pe_action_wrapper_t *wrapper = NULL;
GList *list = NULL;
if (order == pe_order_none) {
return FALSE;
}
if (lh_action == NULL || rh_action == NULL) {
return FALSE;
}
crm_trace("Creating action wrappers for ordering: %s then %s",
lh_action->uuid, rh_action->uuid);
/* Ensure we never create a dependency on ourselves... it's happened */
CRM_ASSERT(lh_action != rh_action);
/* Filter dups, otherwise update_action_states() has too much work to do */
gIter = lh_action->actions_after;
for (; gIter != NULL; gIter = gIter->next) {
pe_action_wrapper_t *after = (pe_action_wrapper_t *) gIter->data;
if (after->action == rh_action && (after->type & order)) {
return FALSE;
}
}
wrapper = calloc(1, sizeof(pe_action_wrapper_t));
wrapper->action = rh_action;
wrapper->type = order;
list = lh_action->actions_after;
list = g_list_prepend(list, wrapper);
lh_action->actions_after = list;
wrapper = calloc(1, sizeof(pe_action_wrapper_t));
wrapper->action = lh_action;
wrapper->type = order;
list = rh_action->actions_before;
list = g_list_prepend(list, wrapper);
rh_action->actions_before = list;
return TRUE;
}
pe_action_t *
get_pseudo_op(const char *name, pe_working_set_t * data_set)
{
pe_action_t *op = lookup_singleton(data_set, name);
if (op == NULL) {
op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set);
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
}
return op;
}
void
destroy_ticket(gpointer data)
{
pe_ticket_t *ticket = data;
if (ticket->state) {
g_hash_table_destroy(ticket->state);
}
free(ticket->id);
free(ticket);
}
pe_ticket_t *
ticket_new(const char *ticket_id, pe_working_set_t * data_set)
{
pe_ticket_t *ticket = NULL;
if (pcmk__str_empty(ticket_id)) {
return NULL;
}
if (data_set->tickets == NULL) {
data_set->tickets = pcmk__strkey_table(free, destroy_ticket);
}
ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
if (ticket == NULL) {
ticket = calloc(1, sizeof(pe_ticket_t));
if (ticket == NULL) {
crm_err("Cannot allocate ticket '%s'", ticket_id);
return NULL;
}
crm_trace("Creaing ticket entry for %s", ticket_id);
ticket->id = strdup(ticket_id);
ticket->granted = FALSE;
ticket->last_granted = -1;
ticket->standby = FALSE;
ticket->state = pcmk__strkey_table(free, free);
g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket);
}
return ticket;
}
const char *rsc_printable_id(pe_resource_t *rsc)
{
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
return ID(rsc->xml);
}
return rsc->id;
}
void
pe__clear_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags)
{
pe__clear_resource_flags(rsc, flags);
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe__clear_resource_flags_recursive((pe_resource_t *) gIter->data, flags);
}
}
void
pe__clear_resource_flags_on_all(pe_working_set_t *data_set, uint64_t flag)
{
for (GList *lpc = data_set->resources; lpc != NULL; lpc = lpc->next) {
pe_resource_t *r = (pe_resource_t *) lpc->data;
pe__clear_resource_flags_recursive(r, flag);
}
}
void
pe__set_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags)
{
pe__set_resource_flags(rsc, flags);
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe__set_resource_flags_recursive((pe_resource_t *) gIter->data, flags);
}
}
static GList *
find_unfencing_devices(GList *candidates, GList *matches)
{
for (GList *gIter = candidates; gIter != NULL; gIter = gIter->next) {
pe_resource_t *candidate = gIter->data;
if (candidate->children != NULL) {
matches = find_unfencing_devices(candidate->children, matches);
} else if (!pcmk_is_set(candidate->flags, pe_rsc_fence_device)) {
continue;
} else if (pcmk_is_set(candidate->flags, pe_rsc_needs_unfencing)) {
matches = g_list_prepend(matches, candidate);
} else if (pcmk__str_eq(g_hash_table_lookup(candidate->meta,
PCMK_STONITH_PROVIDES),
PCMK__VALUE_UNFENCING,
pcmk__str_casei)) {
matches = g_list_prepend(matches, candidate);
}
}
return matches;
}
static int
node_priority_fencing_delay(pe_node_t * node, pe_working_set_t * data_set)
{
int member_count = 0;
int online_count = 0;
int top_priority = 0;
int lowest_priority = 0;
GList *gIter = NULL;
// `priority-fencing-delay` is disabled
if (data_set->priority_fencing_delay <= 0) {
return 0;
}
/* No need to request a delay if the fencing target is not a normal cluster
* member, for example if it's a remote node or a guest node. */
if (node->details->type != node_member) {
return 0;
}
// No need to request a delay if the fencing target is in our partition
if (node->details->online) {
return 0;
}
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
pe_node_t *n = gIter->data;
if (n->details->type != node_member) {
continue;
}
member_count ++;
if (n->details->online) {
online_count++;
}
if (member_count == 1
|| n->details->priority > top_priority) {
top_priority = n->details->priority;
}
if (member_count == 1
|| n->details->priority < lowest_priority) {
lowest_priority = n->details->priority;
}
}
// No need to delay if we have more than half of the cluster members
if (online_count > member_count / 2) {
return 0;
}
/* All the nodes have equal priority.
* Any configured corresponding `pcmk_delay_base/max` will be applied. */
if (lowest_priority == top_priority) {
return 0;
}
if (node->details->priority < top_priority) {
return 0;
}
return data_set->priority_fencing_delay;
}
pe_action_t *
pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason,
bool priority_delay, pe_working_set_t * data_set)
{
char *op_key = NULL;
pe_action_t *stonith_op = NULL;
if(op == NULL) {
op = data_set->stonith_action;
}
op_key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op);
stonith_op = lookup_singleton(data_set, op_key);
if(stonith_op == NULL) {
stonith_op = custom_action(NULL, op_key, CRM_OP_FENCE, node, TRUE, TRUE, data_set);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id);
add_hash_param(stonith_op->meta, "stonith_action", op);
if (pe__is_guest_or_remote_node(node)
&& pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) {
/* Extra work to detect device changes on remotes
*
* We may do this for all nodes in the future, but for now
* the pcmk__check_action_config() based stuff works fine.
*/
long max = 1024;
long digests_all_offset = 0;
long digests_secure_offset = 0;
char *digests_all = calloc(max, sizeof(char));
char *digests_secure = calloc(max, sizeof(char));
GList *matches = find_unfencing_devices(data_set->resources, NULL);
for (GList *gIter = matches; gIter != NULL; gIter = gIter->next) {
pe_resource_t *match = gIter->data;
const char *agent = g_hash_table_lookup(match->meta,
XML_ATTR_TYPE);
op_digest_cache_t *data = NULL;
data = pe__compare_fencing_digest(match, agent, node, data_set);
if(data->rc == RSC_DIGEST_ALL) {
optional = FALSE;
crm_notice("Unfencing %s (remote): because the definition of %s changed", node->details->uname, match->id);
if (!pcmk__is_daemon && data_set->priv != NULL) {
pcmk__output_t *out = data_set->priv;
out->info(out, "notice: Unfencing %s (remote): because the definition of %s changed",
node->details->uname, match->id);
}
}
digests_all_offset += snprintf(
digests_all+digests_all_offset, max-digests_all_offset,
"%s:%s:%s,", match->id, agent, data->digest_all_calc);
digests_secure_offset += snprintf(
digests_secure+digests_secure_offset, max-digests_secure_offset,
"%s:%s:%s,", match->id, agent, data->digest_secure_calc);
}
g_hash_table_insert(stonith_op->meta,
strdup(XML_OP_ATTR_DIGESTS_ALL),
digests_all);
g_hash_table_insert(stonith_op->meta,
strdup(XML_OP_ATTR_DIGESTS_SECURE),
digests_secure);
}
} else {
free(op_key);
}
if (data_set->priority_fencing_delay > 0
/* It's a suitable case where `priority-fencing-delay` applies.
* At least add `priority-fencing-delay` field as an indicator. */
&& (priority_delay
/* The priority delay needs to be recalculated if this function has
* been called by schedule_fencing_and_shutdowns() after node
* priority has already been calculated by native_add_running().
*/
|| g_hash_table_lookup(stonith_op->meta,
XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY) != NULL)) {
/* Add `priority-fencing-delay` to the fencing op even if it's 0 for
* the targeting node. So that it takes precedence over any possible
* `pcmk_delay_base/max`.
*/
char *delay_s = pcmk__itoa(node_priority_fencing_delay(node, data_set));
g_hash_table_insert(stonith_op->meta,
strdup(XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY),
delay_s);
}
if(optional == FALSE && pe_can_fence(data_set, node)) {
pe__clear_action_flags(stonith_op, pe_action_optional);
pe_action_set_reason(stonith_op, reason, false);
} else if(reason && stonith_op->reason == NULL) {
stonith_op->reason = strdup(reason);
}
return stonith_op;
}
void
trigger_unfencing(
pe_resource_t * rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t * data_set)
{
if (!pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) {
/* No resources require it */
return;
} else if ((rsc != NULL)
&& !pcmk_is_set(rsc->flags, pe_rsc_fence_device)) {
/* Wasn't a stonith device */
return;
} else if(node
&& node->details->online
&& node->details->unclean == FALSE
&& node->details->shutdown == FALSE) {
pe_action_t *unfence = pe_fence_op(node, "on", FALSE, reason, FALSE, data_set);
if(dependency) {
order_actions(unfence, dependency, pe_order_optional);
}
} else if(rsc) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) {
trigger_unfencing(rsc, node, reason, dependency, data_set);
}
}
}
}
gboolean
add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref)
{
pe_tag_t *tag = NULL;
GList *gIter = NULL;
gboolean is_existing = FALSE;
CRM_CHECK(tags && tag_name && obj_ref, return FALSE);
tag = g_hash_table_lookup(tags, tag_name);
if (tag == NULL) {
tag = calloc(1, sizeof(pe_tag_t));
if (tag == NULL) {
return FALSE;
}
tag->id = strdup(tag_name);
tag->refs = NULL;
g_hash_table_insert(tags, strdup(tag_name), tag);
}
for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) {
const char *existing_ref = (const char *) gIter->data;
if (pcmk__str_eq(existing_ref, obj_ref, pcmk__str_none)){
is_existing = TRUE;
break;
}
}
if (is_existing == FALSE) {
tag->refs = g_list_append(tag->refs, strdup(obj_ref));
crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref);
}
return TRUE;
}
/*!
* \internal
* \brief Create an action reason string based on the action itself
*
* \param[in] action Action to create reason string for
* \param[in] flag Action flag that was cleared
*
* \return Newly allocated string suitable for use as action reason
* \note It is the caller's responsibility to free() the result.
*/
char *
pe__action2reason(pe_action_t *action, enum pe_action_flags flag)
{
const char *change = NULL;
switch (flag) {
case pe_action_runnable:
case pe_action_migrate_runnable:
change = "unrunnable";
break;
case pe_action_optional:
change = "required";
break;
default:
// Bug: caller passed unsupported flag
CRM_CHECK(change != NULL, change = "");
break;
}
return crm_strdup_printf("%s%s%s %s", change,
(action->rsc == NULL)? "" : " ",
(action->rsc == NULL)? "" : action->rsc->id,
action->task);
}
void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite)
{
if (action->reason != NULL && overwrite) {
pe_rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'",
action->uuid, action->reason, pcmk__s(reason, "(none)"));
} else if (action->reason == NULL) {
pe_rsc_trace(action->rsc, "Set %s reason to '%s'",
action->uuid, pcmk__s(reason, "(none)"));
} else {
// crm_assert(action->reason != NULL && !overwrite);
return;
}
pcmk__str_update(&action->reason, reason);
}
/*!
* \internal
* \brief Check whether shutdown has been requested for a node
*
* \param[in] node Node to check
*
* \return TRUE if node has shutdown attribute set and nonzero, FALSE otherwise
* \note This differs from simply using node->details->shutdown in that it can
* be used before that has been determined (and in fact to determine it),
* and it can also be used to distinguish requested shutdown from implicit
* shutdown of remote nodes by virtue of their connection stopping.
*/
bool
pe__shutdown_requested(pe_node_t *node)
{
const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
return !pcmk__str_eq(shutdown, "0", pcmk__str_null_matches);
}
/*!
* \internal
* \brief Update a data set's "recheck by" time
*
* \param[in] recheck Epoch time when recheck should happen
* \param[in,out] data_set Current working set
*/
void
pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set)
{
if ((recheck > get_effective_time(data_set))
&& ((data_set->recheck_by == 0)
|| (data_set->recheck_by > recheck))) {
data_set->recheck_by = recheck;
}
}
/*!
* \internal
* \brief Wrapper for pe_unpack_nvpairs() using a cluster working set
*/
void
pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name,
pe_rule_eval_data_t *rule_data, GHashTable *hash,
const char *always_first, gboolean overwrite,
pe_working_set_t *data_set)
{
crm_time_t *next_change = crm_time_new_undefined();
pe_eval_nvpairs(data_set->input, xml_obj, set_name, rule_data, hash,
always_first, overwrite, next_change);
if (crm_time_is_defined(next_change)) {
time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change);
pe__update_recheck_time(recheck, data_set);
}
crm_time_free(next_change);
}
bool
pe__resource_is_disabled(pe_resource_t *rsc)
{
const char *target_role = NULL;
CRM_CHECK(rsc != NULL, return false);
target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
if (target_role) {
enum rsc_role_e target_role_e = text2role(target_role);
if ((target_role_e == RSC_ROLE_STOPPED)
|| ((target_role_e == RSC_ROLE_UNPROMOTED)
&& pcmk_is_set(uber_parent(rsc)->flags, pe_rsc_promotable))) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Create an action to clear a resource's history from CIB
*
* \param[in] rsc Resource to clear
* \param[in] node Node to clear history on
*
* \return New action to clear resource history
*/
pe_action_t *
pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node,
pe_working_set_t *data_set)
{
char *key = NULL;
CRM_ASSERT(rsc && node);
key = pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0);
return custom_action(rsc, key, CRM_OP_LRM_DELETE, node, FALSE, TRUE,
data_set);
}
bool
pe__rsc_running_on_any(pe_resource_t *rsc, GList *node_list)
{
for (GList *ele = rsc->running_on; ele; ele = ele->next) {
pe_node_t *node = (pe_node_t *) ele->data;
if (pcmk__str_in_list(node->details->uname, node_list,
pcmk__str_star_matches|pcmk__str_casei)) {
return true;
}
}
return false;
}
bool
pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GList *only_node)
{
return (rsc->fns->active(rsc, FALSE) && !pe__rsc_running_on_any(rsc, only_node));
}
GList *
pe__filter_rsc_list(GList *rscs, GList *filter)
{
GList *retval = NULL;
for (GList *gIter = rscs; gIter; gIter = gIter->next) {
pe_resource_t *rsc = (pe_resource_t *) gIter->data;
/* I think the second condition is safe here for all callers of this
* function. If not, it needs to move into pe__node_text.
*/
if (pcmk__str_in_list(rsc_printable_id(rsc), filter, pcmk__str_star_matches) ||
(rsc->parent && pcmk__str_in_list(rsc_printable_id(rsc->parent), filter, pcmk__str_star_matches))) {
retval = g_list_prepend(retval, rsc);
}
}
return retval;
}
GList *
pe__build_node_name_list(pe_working_set_t *data_set, const char *s) {
GList *nodes = NULL;
if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) {
/* Nothing was given so return a list of all node names. Or, '*' was
* given. This would normally fall into the pe__unames_with_tag branch
* where it will return an empty list. Catch it here instead.
*/
nodes = g_list_prepend(nodes, strdup("*"));
} else {
pe_node_t *node = pe_find_node(data_set->nodes, s);
if (node) {
/* The given string was a valid uname for a node. Return a
* singleton list containing just that uname.
*/
nodes = g_list_prepend(nodes, strdup(s));
} else {
/* The given string was not a valid uname. It's either a tag or
* it's a typo or something. In the first case, we'll return a
* list of all the unames of the nodes with the given tag. In the
* second case, we'll return a NULL pointer and nothing will
* get displayed.
*/
nodes = pe__unames_with_tag(data_set, s);
}
}
return nodes;
}
GList *
pe__build_rsc_list(pe_working_set_t *data_set, const char *s) {
GList *resources = NULL;
if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) {
resources = g_list_prepend(resources, strdup("*"));
} else {
pe_resource_t *rsc = pe_find_resource_with_flags(data_set->resources, s,
pe_find_renamed|pe_find_any);
if (rsc) {
/* A colon in the name we were given means we're being asked to filter
* on a specific instance of a cloned resource. Put that exact string
* into the filter list. Otherwise, use the printable ID of whatever
* resource was found that matches what was asked for.
*/
if (strstr(s, ":") != NULL) {
resources = g_list_prepend(resources, strdup(rsc->id));
} else {
resources = g_list_prepend(resources, strdup(rsc_printable_id(rsc)));
}
} else {
/* The given string was not a valid resource name. It's either
* a tag or it's a typo or something. See build_uname_list for
* more detail.
*/
resources = pe__rscs_with_tag(data_set, s);
}
}
return resources;
}
xmlNode *
pe__failed_probe_for_rsc(pe_resource_t *rsc, const char *name)
{
pe_resource_t *parent = uber_parent(rsc);
const char *rsc_id = rsc->id;
if (rsc->variant == pe_clone) {
rsc_id = pe__clone_child_id(rsc);
} else if (parent->variant == pe_clone) {
rsc_id = pe__clone_child_id(parent);
}
for (xmlNode *xml_op = pcmk__xml_first_child(rsc->cluster->failed); xml_op != NULL;
xml_op = pcmk__xml_next(xml_op)) {
const char *value = NULL;
char *op_id = NULL;
/* This resource operation is not a failed probe. */
if (!pcmk_xe_mask_probe_failure(xml_op)) {
continue;
}
/* This resource operation was not run on the given node. Note that if name is
* NULL, this will always succeed.
*/
value = crm_element_value(xml_op, XML_LRM_ATTR_TARGET);
if (value == NULL || !pcmk__str_eq(value, name, pcmk__str_casei|pcmk__str_null_matches)) {
continue;
}
/* This resource operation has no operation_key. */
value = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
if (!parse_op_key(value ? value : ID(xml_op), &op_id, NULL, NULL)) {
continue;
}
/* This resource operation's ID does not match the rsc_id we are looking for. */
if (!pcmk__str_eq(op_id, rsc_id, pcmk__str_none)) {
free(op_id);
continue;
}
free(op_id);
return xml_op;
}
return NULL;
}
diff --git a/lib/pengine/variant.h b/lib/pengine/variant.h
index cabfbe81f4..0aee28b680 100644
--- a/lib/pengine/variant.h
+++ b/lib/pengine/variant.h
@@ -1,153 +1,150 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PE_VARIANT__H
# define PE_VARIANT__H
# if VARIANT_CLONE
typedef struct clone_variant_data_s {
int clone_max;
int clone_node_max;
int promoted_max;
int promoted_node_max;
int total_clones;
- // @TODO make these a bitmask
- gboolean ordered;
- bool added_promotion_scores;
- bool added_promoted_constraints;
+ uint32_t flags; // Group of enum pe__clone_flags
notify_data_t *stop_notify;
notify_data_t *start_notify;
notify_data_t *demote_notify;
notify_data_t *promote_notify;
xmlNode *xml_obj_child;
} clone_variant_data_t;
# define get_clone_variant_data(data, rsc) \
CRM_ASSERT(rsc != NULL); \
CRM_ASSERT(rsc->variant == pe_clone); \
data = (clone_variant_data_t *)rsc->variant_opaque;
# elif PE__VARIANT_BUNDLE
typedef struct {
int offset;
char *ipaddr;
pe_node_t *node;
pe_resource_t *ip;
pe_resource_t *child;
pe_resource_t *container;
pe_resource_t *remote;
} pe__bundle_replica_t;
enum pe__bundle_mount_flags {
pe__bundle_mount_none = 0x00,
// mount instance-specific subdirectory rather than source directly
pe__bundle_mount_subdir = 0x01
};
typedef struct {
char *source;
char *target;
char *options;
uint32_t flags; // bitmask of pe__bundle_mount_flags
} pe__bundle_mount_t;
typedef struct {
char *source;
char *target;
} pe__bundle_port_t;
enum pe__container_agent {
PE__CONTAINER_AGENT_UNKNOWN,
PE__CONTAINER_AGENT_DOCKER,
PE__CONTAINER_AGENT_RKT,
PE__CONTAINER_AGENT_PODMAN,
};
#define PE__CONTAINER_AGENT_UNKNOWN_S "unknown"
#define PE__CONTAINER_AGENT_DOCKER_S "docker"
#define PE__CONTAINER_AGENT_RKT_S "rkt"
#define PE__CONTAINER_AGENT_PODMAN_S "podman"
typedef struct pe__bundle_variant_data_s {
int promoted_max;
int nreplicas;
int nreplicas_per_host;
char *prefix;
char *image;
const char *ip_last;
char *host_network;
char *host_netmask;
char *control_port;
char *container_network;
char *ip_range_start;
gboolean add_host;
char *container_host_options;
char *container_command;
char *launcher_options;
const char *attribute_target;
pe_resource_t *child;
GList *replicas; // pe__bundle_replica_t *
GList *ports; // pe__bundle_port_t *
GList *mounts; // pe__bundle_mount_t *
enum pe__container_agent agent_type;
} pe__bundle_variant_data_t;
# define get_bundle_variant_data(data, rsc) \
CRM_ASSERT(rsc != NULL); \
CRM_ASSERT(rsc->variant == pe_container); \
CRM_ASSERT(rsc->variant_opaque != NULL); \
data = (pe__bundle_variant_data_t *)rsc->variant_opaque; \
# elif VARIANT_GROUP
typedef struct group_variant_data_s {
int num_children;
pe_resource_t *first_child;
pe_resource_t *last_child;
gboolean colocated;
gboolean ordered;
gboolean child_starting;
gboolean child_stopping;
} group_variant_data_t;
# define get_group_variant_data(data, rsc) \
CRM_ASSERT(rsc != NULL); \
CRM_ASSERT(rsc->variant == pe_group); \
CRM_ASSERT(rsc->variant_opaque != NULL); \
data = (group_variant_data_t *)rsc->variant_opaque; \
# elif VARIANT_NATIVE
typedef struct native_variant_data_s {
int dummy;
} native_variant_data_t;
# define get_native_variant_data(data, rsc) \
CRM_ASSERT(rsc != NULL); \
CRM_ASSERT(rsc->variant == pe_native); \
CRM_ASSERT(rsc->variant_opaque != NULL); \
data = (native_variant_data_t *)rsc->variant_opaque;
# endif
#endif

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jan 25, 12:32 PM (13 h, 28 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1319892
Default Alt Text
(500 KB)

Event Timeline