diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h index f4f290b308..920c82218e 100644 --- a/include/crm/common/internal.h +++ b/include/crm/common/internal.h @@ -1,274 +1,358 @@ /* * Copyright 2015-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRM_COMMON_INTERNAL__H #define CRM_COMMON_INTERNAL__H #include // getpid() #include // bool +#include // uint8_t, uint64_t #include // strcmp() #include // uid_t, gid_t, pid_t #include // guint, GList, GHashTable #include // xmlNode #include // crm_strdup_printf() +#include // do_crm_log_unlikely(), etc. #include // mainloop_io_t, struct ipc_client_callbacks #include // Internal ACL-related utilities (from acl.c) char *pcmk__uid2username(uid_t uid); const char *pcmk__update_acl_user(xmlNode *request, const char *field, const char *peer_user); #if ENABLE_ACL # include static inline bool pcmk__is_privileged(const char *user) { return user && (!strcmp(user, CRM_DAEMON_USER) || !strcmp(user, "root")); } #endif #if SUPPORT_CIBSECRETS // Internal CIB utilities (from cib_secrets.c) */ int pcmk__substitute_secrets(const char *rsc_id, GHashTable *params); #endif /* internal digest-related utilities (from digest.c) */ bool pcmk__verify_digest(xmlNode *input, const char *expected); /* internal I/O utilities (from io.c) */ int pcmk__real_path(const char *path, char **resolved_path); char *pcmk__series_filename(const char *directory, const char *series, int sequence, bool bzip); int pcmk__read_series_sequence(const char *directory, const char *series, unsigned int *seq); void pcmk__write_series_sequence(const char *directory, const char *series, unsigned int sequence, int max); int pcmk__chown_series_sequence(const char *directory, const char *series, uid_t uid, gid_t gid); int pcmk__build_path(const char *path_c, mode_t mode); bool pcmk__daemon_can_write(const char *dir, const char *file); void pcmk__sync_directory(const char *name); int pcmk__file_contents(const char *filename, char **contents); int pcmk__write_sync(int fd, const char *contents); int pcmk__set_nonblocking(int fd); const char *pcmk__get_tmpdir(void); void pcmk__close_fds_in_child(bool); /*! * \internal * \brief Open /dev/null to consume next available file descriptor * * Open /dev/null, disregarding the result. This is intended when daemonizing to * be able to null stdin, stdout, and stderr. * * \param[in] flags O_RDONLY (stdin) or O_WRONLY (stdout and stderr) */ static inline void pcmk__open_devnull(int flags) { // Static analysis clutter // cppcheck-suppress leakReturnValNotUsed (void) open("/dev/null", flags); } /* internal logging utilities */ # define pcmk__config_err(fmt...) do { \ crm_config_error = TRUE; \ crm_err(fmt); \ } while (0) # define pcmk__config_warn(fmt...) do { \ crm_config_warning = TRUE; \ crm_warn(fmt); \ } while (0) /*! * \internal * \brief Execute code depending on whether message would be logged * * This is similar to do_crm_log_unlikely() except instead of logging, it either * continues past this statement or executes else_action depending on whether a * message of the given severity would be logged or not. This allows whole * blocks of code to be skipped if tracing or debugging is turned off. * * \param[in] level Severity at which to continue past this statement * \param[in] else_action Code block to execute if severity would not be logged * * \note else_action must not contain a break or continue statement */ # define pcmk__log_else(level, else_action) do { \ static struct qb_log_callsite *trace_cs = NULL; \ \ if (trace_cs == NULL) { \ trace_cs = qb_log_callsite_get(__func__, __FILE__, "log_else", \ level, __LINE__, 0); \ } \ if (!crm_is_callsite_active(trace_cs, level, 0)) { \ else_action; \ } \ } while(0) /* internal main loop utilities (from mainloop.c) */ int pcmk__add_mainloop_ipc(crm_ipc_t *ipc, int priority, void *userdata, struct ipc_client_callbacks *callbacks, mainloop_io_t **source); /* internal procfs utilities (from procfs.c) */ pid_t pcmk__procfs_pid_of(const char *name); unsigned int pcmk__procfs_num_cores(void); /* internal XML schema functions (from xml.c) */ void crm_schema_init(void); void crm_schema_cleanup(void); /* internal functions related to process IDs (from pid.c) */ /*! * \internal * \brief Check whether process exists (by PID and optionally executable path) * * \param[in] pid PID of process to check * \param[in] daemon If not NULL, path component to match with procfs entry * * \return Standard Pacemaker return code * \note Particular return codes of interest include pcmk_rc_ok for alive, * ESRCH for process is not alive (verified by kill and/or executable path * match), EACCES for caller unable or not allowed to check. A result of * "alive" is less reliable when \p daemon is not provided or procfs is * not available, since there is no guarantee that the PID has not been * recycled for another process. * \note This function cannot be used to verify \e authenticity of the process. */ int pcmk__pid_active(pid_t pid, const char *daemon); int pcmk__read_pidfile(const char *filename, pid_t *pid); int pcmk__pidfile_matches(const char *filename, pid_t expected_pid, const char *expected_name, pid_t *pid); int pcmk__lock_pidfile(const char *filename, const char *name); /* internal functions related to resource operations (from operations.c) */ // printf-style format to create operation ID from resource, action, interval #define PCMK__OP_FMT "%s_%s_%u" char *pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms); char *pcmk__notify_key(const char *rsc_id, const char *notify_type, const char *op_type); char *pcmk__transition_key(int transition_id, int action_id, int target_rc, const char *node); void pcmk__filter_op_for_digest(xmlNode *param_set); +// bitwise arithmetic utilities + +/*! + * \internal + * \brief Set specified flags in a flag group + * + * \param[in] function Function name of caller + * \param[in] line Line number of caller + * \param[in] log_level Log a message at this level + * \param[in] flag_type Label describing this flag group (for logging) + * \param[in] target Name of object whose flags these are (for logging) + * \param[in] flag_group Flag group being manipulated + * \param[in] flags Which flags in the group should be set + * \param[in] flags_str Readable equivalent of \p flags (for logging) + * + * \return Possibly modified flag group + */ +static inline uint64_t +pcmk__set_flags_as(const char *function, int line, uint8_t log_level, + const char *flag_type, const char *target, + uint64_t flag_group, uint64_t flags, const char *flags_str) +{ + uint64_t result = flag_group | flags; + + if (result != flag_group) { + do_crm_log_unlikely(log_level, + "%s flags 0x%.8llx (%s) for %s set by %s:%d", + ((flag_type == NULL)? "Group of" : flag_type), + (unsigned long long) flags, + ((flags_str == NULL)? "flags" : flags_str), + ((target == NULL)? "target" : target), + function, line); + } + return result; +} + +/*! + * \internal + * \brief Clear specified flags in a flag group + * + * \param[in] function Function name of caller + * \param[in] line Line number of caller + * \param[in] log_level Log a message at this level + * \param[in] flag_type Label describing this flag group (for logging) + * \param[in] target Name of object whose flags these are (for logging) + * \param[in] flag_group Flag group being manipulated + * \param[in] flags Which flags in the group should be cleared + * \param[in] flags_str Readable equivalent of \p flags (for logging) + * + * \return Possibly modified flag group + */ +static inline uint64_t +pcmk__clear_flags_as(const char *function, int line, uint8_t log_level, + const char *flag_type, const char *target, + uint64_t flag_group, uint64_t flags, const char *flags_str) +{ + uint64_t result = flag_group & ~flags; + + if (result != flag_group) { + do_crm_log_unlikely(log_level, + "%s flags 0x%.8llx (%s) for %s cleared by %s:%d", + ((flag_type == NULL)? "Group of" : flag_type), + (unsigned long long) flags, + ((flags_str == NULL)? "flags" : flags_str), + ((target == NULL)? "target" : target), + function, line); + } + return result; +} + +//! \deprecated +# define set_bit(word, bit) do { \ + word = pcmk__set_flags_as(__FUNCTION__, __LINE__, LOG_TRACE, \ + NULL, NULL, word, bit, NULL); \ + } while (0) + +//! \deprecated +# define clear_bit(word, bit) do { \ + word = pcmk__clear_flags_as(__FUNCTION__, __LINE__, LOG_TRACE, \ + NULL, NULL, word, bit, NULL); \ + } while (0) + // miscellaneous utilities (from utils.c) const char *pcmk_message_name(const char *name); extern int pcmk__score_red; extern int pcmk__score_green; extern int pcmk__score_yellow; /* Error domains for use with g_set_error (from results.c) */ GQuark pcmk__rc_error_quark(void); GQuark pcmk__exitc_error_quark(void); #define PCMK__RC_ERROR pcmk__rc_error_quark() #define PCMK__EXITC_ERROR pcmk__exitc_error_quark() static inline char * pcmk__getpid_s(void) { return crm_strdup_printf("%lu", (unsigned long) getpid()); } // More efficient than g_list_length(list) == 1 static inline bool pcmk__list_of_1(GList *list) { return list && (list->next == NULL); } // More efficient than g_list_length(list) > 1 static inline bool pcmk__list_of_multiple(GList *list) { return list && (list->next != NULL); } /* convenience functions for failure-related node attributes */ #define PCMK__FAIL_COUNT_PREFIX "fail-count" #define PCMK__LAST_FAILURE_PREFIX "last-failure" /*! * \internal * \brief Generate a failure-related node attribute name for a resource * * \param[in] prefix Start of attribute name * \param[in] rsc_id Resource name * \param[in] op Operation name * \param[in] interval_ms Operation interval * * \return Newly allocated string with attribute name * * \note Failure attributes are named like PREFIX-RSC#OP_INTERVAL (for example, * "fail-count-myrsc#monitor_30000"). The '#' is used because it is not * a valid character in a resource ID, to reliably distinguish where the * operation name begins. The '_' is used simply to be more comparable to * action labels like "myrsc_monitor_30000". */ static inline char * pcmk__fail_attr_name(const char *prefix, const char *rsc_id, const char *op, guint interval_ms) { CRM_CHECK(prefix && rsc_id && op, return NULL); return crm_strdup_printf("%s-%s#%s_%u", prefix, rsc_id, op, interval_ms); } static inline char * pcmk__failcount_name(const char *rsc_id, const char *op, guint interval_ms) { return pcmk__fail_attr_name(PCMK__FAIL_COUNT_PREFIX, rsc_id, op, interval_ms); } static inline char * pcmk__lastfailure_name(const char *rsc_id, const char *op, guint interval_ms) { return pcmk__fail_attr_name(PCMK__LAST_FAILURE_PREFIX, rsc_id, op, interval_ms); } #endif /* CRM_COMMON_INTERNAL__H */ diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h index 767b6bcd8e..80bdc4b375 100644 --- a/include/crm/pengine/internal.h +++ b/include/crm/pengine/internal.h @@ -1,486 +1,498 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PE_INTERNAL__H # define PE_INTERNAL__H # include # include # include # include # define pe_rsc_info(rsc, fmt, args...) crm_log_tag(LOG_INFO, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_debug(rsc, fmt, args...) crm_log_tag(LOG_DEBUG, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_trace(rsc, fmt, args...) crm_log_tag(LOG_TRACE, rsc ? rsc->id : "", fmt, ##args) # define pe_err(fmt...) { was_processing_error = TRUE; crm_config_error = TRUE; crm_err(fmt); } # define pe_warn(fmt...) { was_processing_warning = TRUE; crm_config_warning = TRUE; crm_warn(fmt); } # define pe_proc_err(fmt...) { was_processing_error = TRUE; crm_err(fmt); } # define pe_proc_warn(fmt...) { was_processing_warning = TRUE; crm_warn(fmt); } -# define pe_set_action_bit(action, bit) action->flags = crm_set_bit(__FUNCTION__, __LINE__, action->uuid, action->flags, bit) -# define pe_clear_action_bit(action, bit) action->flags = crm_clear_bit(__FUNCTION__, __LINE__, action->uuid, action->flags, bit) + +#define pe_set_action_bit(action, bit) do { \ + (action)->flags = pcmk__set_flags_as(__FUNCTION__, __LINE__, \ + LOG_TRACE, \ + "Action", (action)->uuid, \ + (action)->flags, bit, #bit); \ + } while (0) + +#define pe_clear_action_bit(action, bit) do { \ + (action)->flags = pcmk__clear_flags_as(__FUNCTION__, __LINE__, \ + LOG_TRACE, \ + "Action", (action)->uuid, \ + (action)->flags, bit, #bit); \ + } while (0) // Some warnings we don't want to print every transition enum pe_warn_once_e { pe_wo_blind = 0x0001, pe_wo_restart_type = 0x0002, pe_wo_role_after = 0x0004, pe_wo_poweroff = 0x0008, pe_wo_require_all = 0x0010, pe_wo_order_score = 0x0020, pe_wo_neg_threshold = 0x0040, }; extern uint32_t pe_wo; #define pe_warn_once(pe_wo_bit, fmt...) do { \ if (is_not_set(pe_wo, pe_wo_bit)) { \ if (pe_wo_bit == pe_wo_blind) { \ crm_warn(fmt); \ } else { \ pe_warn(fmt); \ } \ set_bit(pe_wo, pe_wo_bit); \ } \ } while (0); typedef struct pe__location_constraint_s { char *id; // Constraint XML ID pe_resource_t *rsc_lh; // Resource being located enum rsc_role_e role_filter; // Role to locate enum pe_discover_e discover_mode; // Resource discovery GListPtr node_list_rh; // List of pe_node_t* } pe__location_t; typedef struct pe__order_constraint_s { int id; enum pe_ordering type; void *lh_opaque; pe_resource_t *lh_rsc; pe_action_t *lh_action; char *lh_action_task; void *rh_opaque; pe_resource_t *rh_rsc; pe_action_t *rh_action; char *rh_action_task; } pe__ordering_t; typedef struct notify_data_s { GSList *keys; // Environment variable name/value pairs const char *action; pe_action_t *pre; pe_action_t *post; pe_action_t *pre_done; pe_action_t *post_done; GListPtr active; /* notify_entry_t* */ GListPtr inactive; /* notify_entry_t* */ GListPtr start; /* notify_entry_t* */ GListPtr stop; /* notify_entry_t* */ GListPtr demote; /* notify_entry_t* */ GListPtr promote; /* notify_entry_t* */ GListPtr master; /* notify_entry_t* */ GListPtr slave; /* notify_entry_t* */ GHashTable *allowed_nodes; } notify_data_t; bool pe_can_fence(pe_working_set_t *data_set, pe_node_t *node); int pe__add_scores(int score1, int score2); void add_hash_param(GHashTable * hash, const char *name, const char *value); char *native_parameter(pe_resource_t * rsc, pe_node_t * node, gboolean create, const char *name, pe_working_set_t * data_set); pe_node_t *native_location(const pe_resource_t *rsc, GList **list, int current); void pe_metadata(void); void verify_pe_options(GHashTable * options); void common_update_score(pe_resource_t * rsc, const char *id, int score); void native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set); gboolean native_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean group_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean clone_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set); pe_resource_t *native_find_rsc(pe_resource_t *rsc, const char *id, const pe_node_t *node, int flags); gboolean native_active(pe_resource_t * rsc, gboolean all); gboolean group_active(pe_resource_t * rsc, gboolean all); gboolean clone_active(pe_resource_t * rsc, gboolean all); gboolean pe__bundle_active(pe_resource_t *rsc, gboolean all); void native_print(pe_resource_t * rsc, const char *pre_text, long options, void *print_data); void group_print(pe_resource_t * rsc, const char *pre_text, long options, void *print_data); void clone_print(pe_resource_t * rsc, const char *pre_text, long options, void *print_data); void pe__print_bundle(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); int pe__name_and_nvpairs_xml(pcmk__output_t *out, bool is_list, const char *tag_name , size_t pairs_count, ...); char *pe__node_display_name(pe_node_t *node, bool print_detail); int pe__ban_html(pcmk__output_t *out, va_list args); int pe__ban_text(pcmk__output_t *out, va_list args); int pe__ban_xml(pcmk__output_t *out, va_list args); int pe__clone_xml(pcmk__output_t *out, va_list args); int pe__clone_html(pcmk__output_t *out, va_list args); int pe__clone_text(pcmk__output_t *out, va_list args); int pe__cluster_counts_html(pcmk__output_t *out, va_list args); int pe__cluster_counts_text(pcmk__output_t *out, va_list args); int pe__cluster_counts_xml(pcmk__output_t *out, va_list args); int pe__cluster_dc_html(pcmk__output_t *out, va_list args); int pe__cluster_dc_text(pcmk__output_t *out, va_list args); int pe__cluster_dc_xml(pcmk__output_t *out, va_list args); int pe__cluster_maint_mode_html(pcmk__output_t *out, va_list args); int pe__cluster_maint_mode_text(pcmk__output_t *out, va_list args); int pe__cluster_options_html(pcmk__output_t *out, va_list args); int pe__cluster_options_log(pcmk__output_t *out, va_list args); int pe__cluster_options_text(pcmk__output_t *out, va_list args); int pe__cluster_options_xml(pcmk__output_t *out, va_list args); int pe__cluster_stack_html(pcmk__output_t *out, va_list args); int pe__cluster_stack_text(pcmk__output_t *out, va_list args); int pe__cluster_stack_xml(pcmk__output_t *out, va_list args); int pe__cluster_summary(pcmk__output_t *out, va_list args); int pe__cluster_summary_html(pcmk__output_t *out, va_list args); int pe__cluster_times_html(pcmk__output_t *out, va_list args); int pe__cluster_times_xml(pcmk__output_t *out, va_list args); int pe__cluster_times_text(pcmk__output_t *out, va_list args); int pe__failed_action_text(pcmk__output_t *out, va_list args); int pe__failed_action_xml(pcmk__output_t *out, va_list args); int pe__group_xml(pcmk__output_t *out, va_list args); int pe__group_html(pcmk__output_t *out, va_list args); int pe__group_text(pcmk__output_t *out, va_list args); int pe__bundle_xml(pcmk__output_t *out, va_list args); int pe__bundle_html(pcmk__output_t *out, va_list args); int pe__bundle_text(pcmk__output_t *out, va_list args); int pe__node_html(pcmk__output_t *out, va_list args); int pe__node_text(pcmk__output_t *out, va_list args); int pe__node_xml(pcmk__output_t *out, va_list args); int pe__node_attribute_html(pcmk__output_t *out, va_list args); int pe__node_attribute_text(pcmk__output_t *out, va_list args); int pe__node_attribute_xml(pcmk__output_t *out, va_list args); int pe__node_list_html(pcmk__output_t *out, va_list args); int pe__node_list_text(pcmk__output_t *out, va_list args); int pe__node_list_xml(pcmk__output_t *out, va_list args); int pe__op_history_text(pcmk__output_t *out, va_list args); int pe__op_history_xml(pcmk__output_t *out, va_list args); int pe__resource_history_text(pcmk__output_t *out, va_list args); int pe__resource_history_xml(pcmk__output_t *out, va_list args); int pe__resource_xml(pcmk__output_t *out, va_list args); int pe__resource_html(pcmk__output_t *out, va_list args); int pe__resource_text(pcmk__output_t *out, va_list args); int pe__ticket_html(pcmk__output_t *out, va_list args); int pe__ticket_text(pcmk__output_t *out, va_list args); int pe__ticket_xml(pcmk__output_t *out, va_list args); void native_free(pe_resource_t * rsc); void group_free(pe_resource_t * rsc); void clone_free(pe_resource_t * rsc); void pe__free_bundle(pe_resource_t *rsc); enum rsc_role_e native_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e group_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e clone_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e pe__bundle_resource_state(const pe_resource_t *rsc, gboolean current); void pe__count_common(pe_resource_t *rsc); void pe__count_bundle(pe_resource_t *rsc); gboolean common_unpack(xmlNode * xml_obj, pe_resource_t ** rsc, pe_resource_t * parent, pe_working_set_t * data_set); void common_free(pe_resource_t * rsc); pe_node_t *pe__copy_node(const pe_node_t *this_node); extern time_t get_effective_time(pe_working_set_t * data_set); /* Failure handling utilities (from failcounts.c) */ // bit flags for fail count handling options enum pe_fc_flags_e { pe_fc_default = 0x00, pe_fc_effective = 0x01, // don't count expired failures pe_fc_fillers = 0x02, // if container, include filler failures in count }; int pe_get_failcount(pe_node_t *node, pe_resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set); pe_action_t *pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_working_set_t *data_set); /* Functions for finding/counting a resource's active nodes */ pe_node_t *pe__find_active_on(const pe_resource_t *rsc, unsigned int *count_all, unsigned int *count_clean); pe_node_t *pe__find_active_requires(const pe_resource_t *rsc, unsigned int *count); static inline pe_node_t * pe__current_node(const pe_resource_t *rsc) { return pe__find_active_on(rsc, NULL, NULL); } /* Binary like operators for lists of nodes */ extern void node_list_exclude(GHashTable * list, GListPtr list2, gboolean merge_scores); GHashTable *pe__node_list2table(GList *list); static inline gpointer pe_hash_table_lookup(GHashTable * hash, gconstpointer key) { if (hash) { return g_hash_table_lookup(hash, key); } return NULL; } extern pe_action_t *get_pseudo_op(const char *name, pe_working_set_t * data_set); extern gboolean order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order); /* Printing functions for debug */ extern void print_node(const char *pre_text, pe_node_t * node, gboolean details); extern void print_str_str(gpointer key, gpointer value, gpointer user_data); extern void pe__output_node(pe_node_t * node, gboolean details, pcmk__output_t *out); extern void dump_node_capacity(int level, const char *comment, pe_node_t * node); extern void dump_rsc_utilization(int level, const char *comment, pe_resource_t * rsc, pe_node_t * node); void pe__show_node_weights_as(const char *file, const char *function, int line, bool to_log, pe_resource_t *rsc, const char *comment, GHashTable *nodes); #define pe__show_node_weights(level, rsc, text, nodes) \ pe__show_node_weights_as(__FILE__, __FUNCTION__, __LINE__, \ (level), (rsc), (text), (nodes)) /* Sorting functions */ extern gint sort_rsc_priority(gconstpointer a, gconstpointer b); extern gint sort_rsc_index(gconstpointer a, gconstpointer b); extern xmlNode *find_rsc_op_entry(pe_resource_t * rsc, const char *key); extern pe_action_t *custom_action(pe_resource_t * rsc, char *key, const char *task, pe_node_t * on_node, gboolean optional, gboolean foo, pe_working_set_t * data_set); # define delete_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DELETE, 0) # define delete_action(rsc, node, optional) custom_action( \ rsc, delete_key(rsc), CRMD_ACTION_DELETE, node, \ optional, TRUE, data_set); # define stopped_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STOPPED, 0) # define stopped_action(rsc, node, optional) custom_action( \ rsc, stopped_key(rsc), CRMD_ACTION_STOPPED, node, \ optional, TRUE, data_set); # define stop_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STOP, 0) # define stop_action(rsc, node, optional) custom_action( \ rsc, stop_key(rsc), CRMD_ACTION_STOP, node, \ optional, TRUE, data_set); # define reload_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_RELOAD, 0) # define start_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_START, 0) # define start_action(rsc, node, optional) custom_action( \ rsc, start_key(rsc), CRMD_ACTION_START, node, \ optional, TRUE, data_set) # define started_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STARTED, 0) # define started_action(rsc, node, optional) custom_action( \ rsc, started_key(rsc), CRMD_ACTION_STARTED, node, \ optional, TRUE, data_set) # define promote_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_PROMOTE, 0) # define promote_action(rsc, node, optional) custom_action( \ rsc, promote_key(rsc), CRMD_ACTION_PROMOTE, node, \ optional, TRUE, data_set) # define promoted_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_PROMOTED, 0) # define promoted_action(rsc, node, optional) custom_action( \ rsc, promoted_key(rsc), CRMD_ACTION_PROMOTED, node, \ optional, TRUE, data_set) # define demote_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DEMOTE, 0) # define demote_action(rsc, node, optional) custom_action( \ rsc, demote_key(rsc), CRMD_ACTION_DEMOTE, node, \ optional, TRUE, data_set) # define demoted_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DEMOTED, 0) # define demoted_action(rsc, node, optional) custom_action( \ rsc, demoted_key(rsc), CRMD_ACTION_DEMOTED, node, \ optional, TRUE, data_set) extern int pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set); extern pe_action_t *find_first_action(GListPtr input, const char *uuid, const char *task, pe_node_t * on_node); extern enum action_tasks get_complex_task(pe_resource_t * rsc, const char *name, gboolean allow_non_atomic); extern GListPtr find_actions(GListPtr input, const char *key, const pe_node_t *on_node); GList *find_actions_exact(GList *input, const char *key, const pe_node_t *on_node); extern GListPtr find_recurring_actions(GListPtr input, pe_node_t * not_on_node); GList *pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node); extern void pe_free_action(pe_action_t * action); extern void resource_location(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag, pe_working_set_t * data_set); extern gint sort_op_by_callid(gconstpointer a, gconstpointer b); extern gboolean get_target_role(pe_resource_t * rsc, enum rsc_role_e *role); extern pe_resource_t *find_clone_instance(pe_resource_t * rsc, const char *sub_id, pe_working_set_t * data_set); extern void destroy_ticket(gpointer data); extern pe_ticket_t *ticket_new(const char *ticket_id, pe_working_set_t * data_set); // Resources for manipulating resource names const char *pe_base_name_end(const char *id); char *clone_strip(const char *last_rsc_id); char *clone_zero(const char *last_rsc_id); static inline bool pe_base_name_eq(pe_resource_t *rsc, const char *id) { if (id && rsc && rsc->id) { // Number of characters in rsc->id before any clone suffix size_t base_len = pe_base_name_end(rsc->id) - rsc->id + 1; return (strlen(id) == base_len) && !strncmp(id, rsc->id, base_len); } return FALSE; } int pe__target_rc_from_xml(xmlNode *xml_op); gint sort_node_uname(gconstpointer a, gconstpointer b); bool is_set_recursive(pe_resource_t * rsc, long long flag, bool any); enum rsc_digest_cmp_val { /*! Digests are the same */ RSC_DIGEST_MATCH = 0, /*! Params that require a restart changed */ RSC_DIGEST_RESTART, /*! Some parameter changed. */ RSC_DIGEST_ALL, /*! rsc op didn't have a digest associated with it, so * it is unknown if parameters changed or not. */ RSC_DIGEST_UNKNOWN, }; typedef struct op_digest_cache_s { enum rsc_digest_cmp_val rc; xmlNode *params_all; xmlNode *params_secure; xmlNode *params_restart; char *digest_all_calc; char *digest_secure_calc; char *digest_restart_calc; } op_digest_cache_t; op_digest_cache_t *rsc_action_digest_cmp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * node, pe_working_set_t * data_set); pe_action_t *pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t * data_set); void trigger_unfencing( pe_resource_t * rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t * data_set); void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite); void pe_action_set_flag_reason(const char *function, long line, pe_action_t *action, pe_action_t *reason, const char *text, enum pe_action_flags flags, bool overwrite); #define pe_action_required(action, reason, text) pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, reason, text, pe_action_optional, FALSE) #define pe_action_implies(action, reason, flag) pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, reason, NULL, flag, FALSE) void set_bit_recursive(pe_resource_t * rsc, unsigned long long flag); void clear_bit_recursive(pe_resource_t * rsc, unsigned long long flag); gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref); void print_rscs_brief(GListPtr rsc_list, const char * pre_text, long options, void * print_data, gboolean print_all); int pe__rscs_brief_output(pcmk__output_t *out, GListPtr rsc_list, long options, gboolean print_all); void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason, bool priority_delay); pe_node_t *pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); void common_print(pe_resource_t * rsc, const char *pre_text, const char *name, pe_node_t *node, long options, void *print_data); int pe__common_output_text(pcmk__output_t *out, pe_resource_t * rsc, const char *name, pe_node_t *node, long options); int pe__common_output_html(pcmk__output_t *out, pe_resource_t * rsc, const char *name, pe_node_t *node, long options); pe_resource_t *pe__find_bundle_replica(const pe_resource_t *bundle, const pe_node_t *node); bool pe__bundle_needs_remote_name(pe_resource_t *rsc); const char *pe__add_bundle_remote_name(pe_resource_t *rsc, xmlNode *xml, const char *field); const char *pe_node_attribute_calculated(const pe_node_t *node, const char *name, const pe_resource_t *rsc); const char *pe_node_attribute_raw(pe_node_t *node, const char *name); bool pe__is_universal_clone(pe_resource_t *rsc, pe_working_set_t *data_set); void pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set); void pe__foreach_param_check(pe_working_set_t *data_set, void (*cb)(pe_resource_t*, pe_node_t*, xmlNode*, enum pe_check_parameters, pe_working_set_t*)); void pe__free_param_checks(pe_working_set_t *data_set); bool pe__shutdown_requested(pe_node_t *node); void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set); #define BOOL2STR(x) ((x) ? "true" : "false") /*! * \internal * \brief Register xml formatting message functions. */ void pe__register_messages(pcmk__output_t *out); void pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name, pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pe_working_set_t *data_set); bool pe__resource_is_disabled(pe_resource_t *rsc); pe_action_t *pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set); GListPtr pe__rscs_with_tag(pe_working_set_t *data_set, const char *tag_name); GListPtr pe__unames_with_tag(pe_working_set_t *data_set, const char *tag_name); bool pe__rsc_has_tag(pe_working_set_t *data_set, const char *rsc, const char *tag); bool pe__uname_has_tag(pe_working_set_t *data_set, const char *node, const char *tag); bool pe__rsc_running_on_any_node_in_list(pe_resource_t *rsc, GListPtr node_list); GListPtr pe__filter_rsc_list(GListPtr rscs, GListPtr filter); bool pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GListPtr only_node); gboolean pe__bundle_is_filtered(pe_resource_t *rsc, GListPtr only_rsc, gboolean check_parent); gboolean pe__clone_is_filtered(pe_resource_t *rsc, GListPtr only_rsc, gboolean check_parent); gboolean pe__group_is_filtered(pe_resource_t *rsc, GListPtr only_rsc, gboolean check_parent); gboolean pe__native_is_filtered(pe_resource_t *rsc, GListPtr only_rsc, gboolean check_parent); #endif diff --git a/include/crm_internal.h b/include/crm_internal.h index efc7d957d7..62f7b7687c 100644 --- a/include/crm_internal.h +++ b/include/crm_internal.h @@ -1,160 +1,125 @@ /* * Copyright 2006-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRM_INTERNAL__H # define CRM_INTERNAL__H # include # include # include # include # include /* Public API headers can guard deprecated code with this symbol, thus * preventing internal code (which includes this header) from using it, while * still allowing external code (which can't include this header) to use it, * for backward compatibility. */ #define PCMK__NO_COMPAT # include # include # include # include # include /* Assorted convenience functions */ void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile); -static inline long long -crm_clear_bit(const char *function, int line, const char *target, long long word, long long bit) -{ - long long rc = (word & ~bit); - - if (rc == word) { - /* Unchanged */ - } else if (target) { - crm_trace("Bit 0x%.8llx for %s cleared by %s:%d", bit, target, function, line); - } else { - crm_trace("Bit 0x%.8llx cleared by %s:%d", bit, function, line); - } - - return rc; -} - -static inline long long -crm_set_bit(const char *function, int line, const char *target, long long word, long long bit) -{ - long long rc = (word | bit); - - if (rc == word) { - /* Unchanged */ - } else if (target) { - crm_trace("Bit 0x%.8llx for %s set by %s:%d", bit, target, function, line); - } else { - crm_trace("Bit 0x%.8llx set by %s:%d", bit, function, line); - } - - return rc; -} - -# define set_bit(word, bit) word = crm_set_bit(__FUNCTION__, __LINE__, NULL, word, bit) -# define clear_bit(word, bit) word = crm_clear_bit(__FUNCTION__, __LINE__, NULL, word, bit) - void strip_text_nodes(xmlNode * xml); void pcmk_panic(const char *origin); pid_t pcmk_locate_sbd(void); /* * XML attribute names used only by internal code */ #define PCMK__XA_ATTR_DAMPENING "attr_dampening" #define PCMK__XA_ATTR_FORCE "attrd_is_force_write" #define PCMK__XA_ATTR_INTERVAL "attr_clear_interval" #define PCMK__XA_ATTR_IS_PRIVATE "attr_is_private" #define PCMK__XA_ATTR_IS_REMOTE "attr_is_remote" #define PCMK__XA_ATTR_NAME "attr_name" #define PCMK__XA_ATTR_NODE_ID "attr_host_id" #define PCMK__XA_ATTR_NODE_NAME "attr_host" #define PCMK__XA_ATTR_OPERATION "attr_clear_operation" #define PCMK__XA_ATTR_PATTERN "attr_regex" #define PCMK__XA_ATTR_RESOURCE "attr_resource" #define PCMK__XA_ATTR_SECTION "attr_section" #define PCMK__XA_ATTR_SET "attr_set" #define PCMK__XA_ATTR_USER "attr_user" #define PCMK__XA_ATTR_UUID "attr_key" #define PCMK__XA_ATTR_VALUE "attr_value" #define PCMK__XA_ATTR_VERSION "attr_version" #define PCMK__XA_ATTR_WRITER "attr_writer" #define PCMK__XA_MODE "mode" #define PCMK__XA_TASK "task" /* * IPC service names that are only used internally */ # define PCMK__SERVER_BASED_RO "cib_ro" # define PCMK__SERVER_BASED_RW "cib_rw" # define PCMK__SERVER_BASED_SHM "cib_shm" /* * IPC commands that can be sent to Pacemaker daemons */ #define PCMK__ATTRD_CMD_PEER_REMOVE "peer-remove" #define PCMK__ATTRD_CMD_UPDATE "update" #define PCMK__ATTRD_CMD_UPDATE_BOTH "update-both" #define PCMK__ATTRD_CMD_UPDATE_DELAY "update-delay" #define PCMK__ATTRD_CMD_QUERY "query" #define PCMK__ATTRD_CMD_REFRESH "refresh" #define PCMK__ATTRD_CMD_FLUSH "flush" #define PCMK__ATTRD_CMD_SYNC "sync" #define PCMK__ATTRD_CMD_SYNC_RESPONSE "sync-response" #define PCMK__ATTRD_CMD_CLEAR_FAILURE "clear-failure" #define PCMK__CONTROLD_CMD_NODES "list-nodes" /* * Environment variables used by Pacemaker */ #define PCMK__ENV_PHYSICAL_HOST "physical_host" static inline void * realloc_safe(void *ptr, size_t size) { void *new_ptr; // realloc(p, 0) can replace free(p) but this wrapper can't CRM_ASSERT(size > 0); new_ptr = realloc(ptr, size); if (new_ptr == NULL) { free(ptr); abort(); } return new_ptr; } const char *crm_xml_add_last_written(xmlNode *xml_node); void crm_xml_dump(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth); void crm_buffer_add_char(char **buffer, int *offset, int *max, char c); #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) #else # define PCMK__OUTPUT_ARGS(ARGS...) #endif #endif /* CRM_INTERNAL__H */ diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c index 55119fa392..4a35afde44 100644 --- a/lib/pacemaker/pcmk_sched_allocate.c +++ b/lib/pacemaker/pcmk_sched_allocate.c @@ -1,3053 +1,3057 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pacemaker); void set_alloc_actions(pe_working_set_t * data_set); extern void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set); extern gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set); static void apply_remote_node_ordering(pe_working_set_t *data_set); static enum remote_connection_state get_remote_node_state(pe_node_t *node); enum remote_connection_state { remote_state_unknown = 0, remote_state_alive = 1, remote_state_resting = 2, remote_state_failed = 3, remote_state_stopped = 4 }; static const char * state2text(enum remote_connection_state state) { switch (state) { case remote_state_unknown: return "unknown"; case remote_state_alive: return "alive"; case remote_state_resting: return "resting"; case remote_state_failed: return "failed"; case remote_state_stopped: return "stopped"; } return "impossible"; } resource_alloc_functions_t resource_class_alloc_functions[] = { { pcmk__native_merge_weights, pcmk__native_allocate, native_create_actions, native_create_probe, native_internal_constraints, native_rsc_colocation_lh, native_rsc_colocation_rh, native_rsc_location, native_action_flags, native_update_actions, native_expand, native_append_meta, }, { pcmk__group_merge_weights, pcmk__group_allocate, group_create_actions, native_create_probe, group_internal_constraints, group_rsc_colocation_lh, group_rsc_colocation_rh, group_rsc_location, group_action_flags, group_update_actions, group_expand, group_append_meta, }, { pcmk__native_merge_weights, pcmk__clone_allocate, clone_create_actions, clone_create_probe, clone_internal_constraints, clone_rsc_colocation_lh, clone_rsc_colocation_rh, clone_rsc_location, clone_action_flags, pcmk__multi_update_actions, clone_expand, clone_append_meta, }, { pcmk__native_merge_weights, pcmk__bundle_allocate, pcmk__bundle_create_actions, pcmk__bundle_create_probe, pcmk__bundle_internal_constraints, pcmk__bundle_rsc_colocation_lh, pcmk__bundle_rsc_colocation_rh, pcmk__bundle_rsc_location, pcmk__bundle_action_flags, pcmk__multi_update_actions, pcmk__bundle_expand, pcmk__bundle_append_meta, } }; gboolean update_action_flags(pe_action_t * action, enum pe_action_flags flags, const char *source, int line) { static unsigned long calls = 0; gboolean changed = FALSE; gboolean clear = is_set(flags, pe_action_clear); enum pe_action_flags last = action->flags; if (clear) { - action->flags = crm_clear_bit(source, line, action->uuid, action->flags, flags); + action->flags = pcmk__clear_flags_as(source, line, LOG_TRACE, "Action", + action->uuid, action->flags, + flags, NULL); } else { - action->flags = crm_set_bit(source, line, action->uuid, action->flags, flags); + action->flags = pcmk__set_flags_as(source, line, LOG_TRACE, "Action", + action->uuid, action->flags, + flags, NULL); } if (last != action->flags) { calls++; changed = TRUE; /* Useful for tracking down _who_ changed a specific flag */ /* CRM_ASSERT(calls != 534); */ clear_bit(flags, pe_action_clear); crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x, %lu, %s)", action->uuid, action->node ? action->node->details->uname : "[none]", clear ? "un-" : "", flags, last, action->flags, calls, source); } return changed; } static gboolean check_rsc_parameters(pe_resource_t * rsc, pe_node_t * node, xmlNode * rsc_entry, gboolean active_here, pe_working_set_t * data_set) { int attr_lpc = 0; gboolean force_restart = FALSE; gboolean delete_resource = FALSE; gboolean changed = FALSE; const char *value = NULL; const char *old_value = NULL; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; for (; attr_lpc < DIMOF(attr_list); attr_lpc++) { value = crm_element_value(rsc->xml, attr_list[attr_lpc]); old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]); if (value == old_value /* i.e. NULL */ || pcmk__str_eq(value, old_value, pcmk__str_none)) { continue; } changed = TRUE; trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set); if (active_here) { force_restart = TRUE; crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s", rsc->id, node->details->uname, attr_list[attr_lpc], crm_str(old_value), crm_str(value)); } } if (force_restart) { /* make sure the restart happens */ stop_action(rsc, node, FALSE); set_bit(rsc->flags, pe_rsc_start_pending); delete_resource = TRUE; } else if (changed) { delete_resource = TRUE; } return delete_resource; } static void CancelXmlOp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * active_node, const char *reason, pe_working_set_t * data_set) { guint interval_ms = 0; pe_action_t *cancel = NULL; const char *task = NULL; const char *call_id = NULL; CRM_CHECK(xml_op != NULL, return); CRM_CHECK(active_node != NULL, return); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); crm_info("Action " PCMK__OP_FMT " on %s will be stopped: %s", rsc->id, task, interval_ms, active_node->details->uname, (reason? reason : "unknown")); cancel = pe_cancel_op(rsc, task, interval_ms, active_node, data_set); add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set); } static gboolean check_action_definition(pe_resource_t * rsc, pe_node_t * active_node, xmlNode * xml_op, pe_working_set_t * data_set) { char *key = NULL; guint interval_ms = 0; const op_digest_cache_t *digest_data = NULL; gboolean did_change = FALSE; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *digest_secure = NULL; CRM_CHECK(active_node != NULL, return FALSE); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if (interval_ms > 0) { xmlNode *op_match = NULL; /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = pcmk__op_key(rsc->id, task, interval_ms); pe_rsc_trace(rsc, "Checking parameters for %s", key); op_match = find_rsc_op_entry(rsc, key); if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) { CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set); free(key); return TRUE; } else if (op_match == NULL) { pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname); free(key); return TRUE; } free(key); key = NULL; } crm_trace("Testing " PCMK__OP_FMT " on %s", rsc->id, task, interval_ms, active_node->details->uname); if ((interval_ms == 0) && pcmk__str_eq(task, RSC_STATUS, pcmk__str_casei)) { /* Reload based on the start action not a probe */ task = RSC_START; } else if ((interval_ms == 0) && pcmk__str_eq(task, RSC_MIGRATED, pcmk__str_casei)) { /* Reload based on the start action not a migrate */ task = RSC_START; } else if ((interval_ms == 0) && pcmk__str_eq(task, RSC_PROMOTE, pcmk__str_casei)) { /* Reload based on the start action not a promote */ task = RSC_START; } digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set); if(is_set(data_set->flags, pe_flag_sanitized)) { digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST); } if(digest_data->rc != RSC_DIGEST_MATCH && digest_secure && digest_data->digest_secure_calc && strcmp(digest_data->digest_secure_calc, digest_secure) == 0) { if (is_set(data_set->flags, pe_flag_stdout)) { printf("Only 'private' parameters to " PCMK__OP_FMT " on %s changed: %s\n", rsc->id, task, interval_ms, active_node->details->uname, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); } } else if (digest_data->rc == RSC_DIGEST_RESTART) { /* Changes that force a restart */ pe_action_t *required = NULL; did_change = TRUE; key = pcmk__op_key(rsc->id, task, interval_ms); crm_log_xml_info(digest_data->params_restart, "params:restart"); required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set); pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL, "resource definition change", pe_action_optional, TRUE); trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set); } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) { /* Changes that can potentially be handled by a reload */ const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); did_change = TRUE; trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set); crm_log_xml_info(digest_data->params_all, "params:reload"); key = pcmk__op_key(rsc->id, task, interval_ms); if (interval_ms > 0) { pe_action_t *op = NULL; #if 0 /* Always reload/restart the entire resource */ ReloadRsc(rsc, active_node, data_set); #else /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */ op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set); set_bit(op->flags, pe_action_reschedule); #endif } else if (digest_restart) { pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id); /* Reload this resource */ ReloadRsc(rsc, active_node, data_set); free(key); } else { pe_action_t *required = NULL; pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id); /* Re-send the start/demote/promote op * Recurring ops will be detected independently */ required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set); pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL, "resource definition change", pe_action_optional, TRUE); } } return did_change; } /*! * \internal * \brief Do deferred action checks after allocation * * \param[in] data_set Working set for cluster */ static void check_params(pe_resource_t *rsc, pe_node_t *node, xmlNode *rsc_op, enum pe_check_parameters check, pe_working_set_t *data_set) { const char *reason = NULL; op_digest_cache_t *digest_data = NULL; switch (check) { case pe_check_active: if (check_action_definition(rsc, node, rsc_op, data_set) && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { reason = "action definition changed"; } break; case pe_check_last_failure: digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, data_set); switch (digest_data->rc) { case RSC_DIGEST_UNKNOWN: crm_trace("Resource %s history entry %s on %s has no digest to compare", rsc->id, ID(rsc_op), node->details->id); break; case RSC_DIGEST_MATCH: break; default: reason = "resource parameters have changed"; break; } break; } if (reason) { pe__clear_failcount(rsc, node, reason, data_set); } } static void check_actions_for(xmlNode * rsc_entry, pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set) { GListPtr gIter = NULL; int offset = -1; int stop_index = 0; int start_index = 0; const char *task = NULL; xmlNode *rsc_op = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; CRM_CHECK(node != NULL, return); if (is_set(rsc->flags, pe_rsc_orphan)) { pe_resource_t *parent = uber_parent(rsc); if(parent == NULL || pe_rsc_is_clone(parent) == FALSE || is_set(parent->flags, pe_rsc_unique)) { pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id); DeleteRsc(rsc, node, FALSE, data_set); } else { pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id); } return; } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s", rsc->id, node->details->uname); return; } pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname); if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { op_list = g_list_prepend(op_list, rsc_op); } } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; offset++; if (start_index < stop_index) { /* stopped */ continue; } else if (offset < start_index) { /* action occurred prior to a start */ continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((interval_ms > 0) && (is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) { // Maintenance mode cancels recurring operations CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set); } else if ((interval_ms > 0) || pcmk__strcase_any_of(task, RSC_STATUS, RSC_START, RSC_PROMOTE, RSC_MIGRATED, NULL)) { /* If a resource operation failed, and the operation's definition * has changed, clear any fail count so they can be retried fresh. */ if (pe__bundle_needs_remote_name(rsc)) { /* We haven't allocated resources to nodes yet, so if the * REMOTE_CONTAINER_HACK is used, we may calculate the digest * based on the literal "#uname" value rather than the properly * substituted value. That would mistakenly make the action * definition appear to have been changed. Defer the check until * later in this case. */ pe__add_param_check(rsc_op, rsc, node, pe_check_active, data_set); } else if (check_action_definition(rsc, node, rsc_op, data_set) && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { pe__clear_failcount(rsc, node, "action definition changed", data_set); } } } g_list_free(sorted_op_list); } static GListPtr find_rsc_list(GListPtr result, pe_resource_t * rsc, const char *id, gboolean renamed_clones, gboolean partial, pe_working_set_t * data_set) { GListPtr gIter = NULL; gboolean match = FALSE; if (id == NULL) { return NULL; } if (rsc == NULL) { if (data_set == NULL) { return NULL; } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } return result; } if (partial) { if (strstr(rsc->id, id)) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) { match = TRUE; } } else { if (strcmp(rsc->id, id) == 0) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) { match = TRUE; } } if (match) { result = g_list_prepend(result, rsc); } if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } } return result; } static void check_actions(pe_working_set_t * data_set) { const char *id = NULL; pe_node_t *node = NULL; xmlNode *lrm_rscs = NULL; xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); xmlNode *node_state = NULL; for (node_state = __xml_first_child_element(status); node_state != NULL; node_state = __xml_next_element(node_state)) { if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { id = crm_element_value(node_state, XML_ATTR_ID); lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE); node = pe_find_node_id(data_set->nodes, id); if (node == NULL) { continue; /* Still need to check actions for a maintenance node to cancel existing monitor operations */ } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) { crm_trace("Skipping param check for %s: can't run resources", node->details->uname); continue; } crm_trace("Processing node %s", node->details->uname); if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child_element(lrm_rscs); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { if (pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { if (xml_has_children(rsc_entry)) { GListPtr gIter = NULL; GListPtr result = NULL; const char *rsc_id = ID(rsc_entry); CRM_CHECK(rsc_id != NULL, return); result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set); for (gIter = result; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (rsc->variant != pe_native) { continue; } check_actions_for(rsc_entry, rsc, node, data_set); } g_list_free(result); } } } } } } } static void apply_placement_constraints(pe_working_set_t * data_set) { for (GList *gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { pe__location_t *cons = gIter->data; cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons); } } static gboolean failcount_clear_action_exists(pe_node_t * node, pe_resource_t * rsc) { gboolean rc = FALSE; GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE); if (list) { rc = TRUE; } g_list_free(list); return rc; } /*! * \internal * \brief Force resource away if failures hit migration threshold * * \param[in,out] rsc Resource to check for failures * \param[in,out] node Node to check for failures * \param[in,out] data_set Cluster working set to update */ static void check_migration_threshold(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { int fail_count, countdown; pe_resource_t *failed; /* Migration threshold of 0 means never force away */ if (rsc->migration_threshold == 0) { return; } // If we're ignoring failures, also ignore the migration threshold if (is_set(rsc->flags, pe_rsc_failure_ignored)) { return; } /* If there are no failures, there's no need to force away */ fail_count = pe_get_failcount(node, rsc, NULL, pe_fc_effective|pe_fc_fillers, NULL, data_set); if (fail_count <= 0) { return; } /* How many more times recovery will be tried on this node */ countdown = QB_MAX(rsc->migration_threshold - fail_count, 0); /* If failed resource has a parent, we'll force the parent away */ failed = rsc; if (is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } if (countdown == 0) { resource_location(failed, node, -INFINITY, "__fail_limit__", data_set); crm_warn("Forcing %s away from %s after %d failures (max=%d)", failed->id, node->details->uname, fail_count, rsc->migration_threshold); } else { crm_info("%s can fail %d more times on %s before being forced off", failed->id, countdown, node->details->uname); } } static void common_apply_stickiness(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set) { if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; common_apply_stickiness(child_rsc, node, data_set); } return; } if (is_set(rsc->flags, pe_rsc_managed) && rsc->stickiness != 0 && pcmk__list_of_1(rsc->running_on)) { pe_node_t *current = pe_find_node_id(rsc->running_on, node->details->id); pe_node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (current == NULL) { } else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) { pe_resource_t *sticky_rsc = rsc; resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set); pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location" " (node=%s, weight=%d)", sticky_rsc->id, node->details->uname, rsc->stickiness); } else { GHashTableIter iter; pe_node_t *nIter = NULL; pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric" " and node %s is not explicitly allowed", rsc->id, node->details->uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) { crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight); } } } /* Check the migration threshold only if a failcount clear action * has not already been placed for this resource on the node. * There is no sense in potentially forcing the resource from this * node if the failcount is being reset anyway. * * @TODO A clear_failcount operation can be scheduled in stage4() via * check_actions_for(), or in stage5() via check_params(). This runs in * stage2(), so it cannot detect those, meaning we might check the migration * threshold when we shouldn't -- worst case, we stop or move the resource, * then move it back next transition. */ if (failcount_clear_action_exists(node, rsc) == FALSE) { check_migration_threshold(rsc, node, data_set); } } void complex_set_cmds(pe_resource_t * rsc) { GListPtr gIter = rsc->children; rsc->cmds = &resource_class_alloc_functions[rsc->variant]; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; complex_set_cmds(child_rsc); } } void set_alloc_actions(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; complex_set_cmds(rsc); } } static void calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data) { const char *key = (const char *)gKey; const char *value = (const char *)gValue; int *system_health = (int *)user_data; if (!gKey || !gValue || !user_data) { return; } if (pcmk__starts_with(key, "#health")) { int score; /* Convert the value into an integer */ score = char2score(value); /* Add it to the running total */ *system_health = pe__add_scores(score, *system_health); } } static gboolean apply_system_health(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy"); int base_health = 0; if (pcmk__str_eq(health_strategy, "none", pcmk__str_null_matches | pcmk__str_casei)) { /* Prevent any accidental health -> score translation */ pcmk__score_red = 0; pcmk__score_yellow = 0; pcmk__score_green = 0; return TRUE; } else if (pcmk__str_eq(health_strategy, "migrate-on-red", pcmk__str_casei)) { /* Resources on nodes which have health values of red are * weighted away from that node. */ pcmk__score_red = -INFINITY; pcmk__score_yellow = 0; pcmk__score_green = 0; } else if (pcmk__str_eq(health_strategy, "only-green", pcmk__str_casei)) { /* Resources on nodes which have health values of red or yellow * are forced away from that node. */ pcmk__score_red = -INFINITY; pcmk__score_yellow = -INFINITY; pcmk__score_green = 0; } else if (pcmk__str_eq(health_strategy, "progressive", pcmk__str_casei)) { /* Same as the above, but use the r/y/g scores provided by the user * Defaults are provided by the pe_prefs table * Also, custom health "base score" can be used */ base_health = crm_parse_int(pe_pref(data_set->config_hash, "node-health-base"), "0"); } else if (pcmk__str_eq(health_strategy, "custom", pcmk__str_casei)) { /* Requires the admin to configure the rsc_location constaints for * processing the stored health scores */ /* TODO: Check for the existence of appropriate node health constraints */ return TRUE; } else { crm_err("Unknown node health strategy: %s", health_strategy); return FALSE; } crm_info("Applying automated node health strategy: %s", health_strategy); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { int system_health = base_health; pe_node_t *node = (pe_node_t *) gIter->data; /* Search through the node hash table for system health entries. */ g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health); crm_info(" Node %s has an combined system health of %d", node->details->uname, system_health); /* If the health is non-zero, then create a new rsc2node so that the * weight will be added later on. */ if (system_health != 0) { GListPtr gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set); } } } return TRUE; } gboolean stage0(pe_working_set_t * data_set) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); if (data_set->input == NULL) { return FALSE; } if (is_set(data_set->flags, pe_flag_have_status) == FALSE) { crm_trace("Calculating status"); cluster_status(data_set); } set_alloc_actions(data_set); apply_system_health(data_set); unpack_constraints(cib_constraints, data_set); return TRUE; } /* * Check nodes for resources started outside of the LRM */ gboolean probe_resources(pe_working_set_t * data_set) { pe_action_t *probe_node_complete = NULL; for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED); if (node->details->online == FALSE) { if (pe__is_remote_node(node) && node->details->remote_rsc && (get_remote_node_state(node) == remote_state_failed)) { pe_fence_node(data_set, node, "the connection is unrecoverable", FALSE); } continue; } else if (node->details->unclean) { continue; } else if (node->details->rsc_discovery_enabled == FALSE) { /* resource discovery is disabled for this node */ continue; } if (probed != NULL && crm_is_true(probed) == FALSE) { pe_action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname), CRM_OP_REPROBE, node, FALSE, TRUE, data_set); add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); continue; } for (GListPtr gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set); } } return TRUE; } static void rsc_discover_filter(pe_resource_t *rsc, pe_node_t *node) { GListPtr gIter = rsc->children; pe_resource_t *top = uber_parent(rsc); pe_node_t *match; if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) { return; } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; rsc_discover_filter(child_rsc, node); } match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match && match->rsc_discover_mode != pe_discover_exclusive) { match->weight = -INFINITY; } } static time_t shutdown_time(pe_node_t *node, pe_working_set_t *data_set) { const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); time_t result = 0; if (shutdown) { errno = 0; result = (time_t) crm_parse_ll(shutdown, NULL); if (errno != 0) { result = 0; } } return result? result : get_effective_time(data_set); } static void apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set) { const char *class; // Only primitives and (uncloned) groups may be locked if (rsc->variant == pe_group) { for (GList *item = rsc->children; item != NULL; item = item->next) { apply_shutdown_lock((pe_resource_t *) item->data, data_set); } } else if (rsc->variant != pe_native) { return; } // Fence devices and remote connections can't be locked class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches) || pe__resource_is_remote_conn(rsc, data_set)) { return; } if (rsc->lock_node != NULL) { // The lock was obtained from resource history if (rsc->running_on != NULL) { /* The resource was started elsewhere even though it is now * considered locked. This shouldn't be possible, but as a * failsafe, we don't want to disturb the resource now. */ pe_rsc_info(rsc, "Cancelling shutdown lock because %s is already active", rsc->id); pe__clear_resource_history(rsc, rsc->lock_node, data_set); rsc->lock_node = NULL; rsc->lock_time = 0; } // Only a resource active on exactly one node can be locked } else if (pcmk__list_of_1(rsc->running_on)) { pe_node_t *node = rsc->running_on->data; if (node->details->shutdown) { if (node->details->unclean) { pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown", rsc->id, node->details->uname); } else { rsc->lock_node = node; rsc->lock_time = shutdown_time(node, data_set); } } } if (rsc->lock_node == NULL) { // No lock needed return; } if (data_set->shutdown_lock > 0) { time_t lock_expiration = rsc->lock_time + data_set->shutdown_lock; pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)", rsc->id, rsc->lock_node->details->uname, (long long) lock_expiration); pe__update_recheck_time(++lock_expiration, data_set); } else { pe_rsc_info(rsc, "Locking %s to %s due to shutdown", rsc->id, rsc->lock_node->details->uname); } // If resource is locked to one node, ban it from all other nodes for (GList *item = data_set->nodes; item != NULL; item = item->next) { pe_node_t *node = item->data; if (strcmp(node->details->uname, rsc->lock_node->details->uname)) { resource_location(rsc, node, -CRM_SCORE_INFINITY, XML_CONFIG_ATTR_SHUTDOWN_LOCK, data_set); } } } /* * \internal * \brief Stage 2 of cluster status: apply node-specific criteria * * Count known nodes, and apply location constraints, stickiness, and exclusive * resource discovery. */ gboolean stage2(pe_working_set_t * data_set) { GListPtr gIter = NULL; if (is_set(data_set->flags, pe_flag_shutdown_lock)) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { apply_shutdown_lock((pe_resource_t *) gIter->data, data_set); } } if (is_not_set(data_set->flags, pe_flag_no_compat)) { // @COMPAT API backward compatibility for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node && (node->weight >= 0) && node->details->online && (node->details->type != node_ping)) { data_set->max_valid_nodes++; } } } crm_trace("Applying placement constraints"); apply_placement_constraints(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { GListPtr gIter2 = NULL; pe_node_t *node = (pe_node_t *) gIter->data; gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; common_apply_stickiness(rsc, node, data_set); rsc_discover_filter(rsc, node); } } return TRUE; } /* * Create internal resource constraints before allocation */ gboolean stage3(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; rsc->cmds->internal_constraints(rsc, data_set); } return TRUE; } /* * Check for orphaned or redefined actions */ gboolean stage4(pe_working_set_t * data_set) { check_actions(data_set); return TRUE; } static void * convert_const_pointer(const void *ptr) { /* Worst function ever */ return (void *)ptr; } static gint sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data) { int rc = 0; int r1_weight = -INFINITY; int r2_weight = -INFINITY; const char *reason = "existence"; const GListPtr nodes = (GListPtr) data; const pe_resource_t *resource1 = a; const pe_resource_t *resource2 = b; pe_node_t *r1_node = NULL; pe_node_t *r2_node = NULL; GListPtr gIter = NULL; GHashTable *r1_nodes = NULL; GHashTable *r2_nodes = NULL; reason = "priority"; r1_weight = resource1->priority; r2_weight = resource2->priority; if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "no node list"; if (nodes == NULL) { goto done; } r1_nodes = pcmk__native_merge_weights(convert_const_pointer(resource1), resource1->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); pe__show_node_weights(true, NULL, resource1->id, r1_nodes); r2_nodes = pcmk__native_merge_weights(convert_const_pointer(resource2), resource2->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); pe__show_node_weights(true, NULL, resource2->id, r2_nodes); /* Current location score */ reason = "current location"; r1_weight = -INFINITY; r2_weight = -INFINITY; if (resource1->running_on) { r1_node = pe__current_node(resource1); r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id); if (r1_node != NULL) { r1_weight = r1_node->weight; } } if (resource2->running_on) { r2_node = pe__current_node(resource2); r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id); if (r2_node != NULL) { r2_weight = r2_node->weight; } } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "score"; for (gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; r1_node = NULL; r2_node = NULL; r1_weight = -INFINITY; if (r1_nodes) { r1_node = g_hash_table_lookup(r1_nodes, node->details->id); } if (r1_node) { r1_weight = r1_node->weight; } r2_weight = -INFINITY; if (r2_nodes) { r2_node = g_hash_table_lookup(r2_nodes, node->details->id); } if (r2_node) { r2_weight = r2_node->weight; } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } } done: crm_trace("%s (%d) on %s %c %s (%d) on %s: %s", resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a", rc < 0 ? '>' : rc > 0 ? '<' : '=', resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason); if (r1_nodes) { g_hash_table_destroy(r1_nodes); } if (r2_nodes) { g_hash_table_destroy(r2_nodes); } return rc; } static void allocate_resources(pe_working_set_t * data_set) { GListPtr gIter = NULL; if (is_set(data_set->flags, pe_flag_have_remote_nodes)) { /* Allocate remote connection resources first (which will also allocate * any colocation dependencies). If the connection is migrating, always * prefer the partial migration target. */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (rsc->is_remote_node == FALSE) { continue; } pe_rsc_trace(rsc, "Allocating remote connection resource '%s'", rsc->id); rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set); } } /* now do the rest of the resources */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (rsc->is_remote_node == TRUE) { continue; } pe_rsc_trace(rsc, "Allocating %s resource '%s'", crm_element_name(rsc->xml), rsc->id); rsc->cmds->allocate(rsc, NULL, data_set); } } /* We always use pe_order_preserve with these convenience functions to exempt * internally generated constraints from the prohibition of user constraints * involving remote connection resources. * * The start ordering additionally uses pe_order_runnable_left so that the * specified action is not runnable if the start is not runnable. */ static inline void order_start_then_action(pe_resource_t *lh_rsc, pe_action_t *rh_action, enum pe_ordering extra, pe_working_set_t *data_set) { if (lh_rsc && rh_action && data_set) { custom_action_order(lh_rsc, start_key(lh_rsc), NULL, rh_action->rsc, NULL, rh_action, pe_order_preserve | pe_order_runnable_left | extra, data_set); } } static inline void order_action_then_stop(pe_action_t *lh_action, pe_resource_t *rh_rsc, enum pe_ordering extra, pe_working_set_t *data_set) { if (lh_action && rh_rsc && data_set) { custom_action_order(lh_action->rsc, NULL, lh_action, rh_rsc, stop_key(rh_rsc), NULL, pe_order_preserve | extra, data_set); } } // Clear fail counts for orphaned rsc on all online nodes static void cleanup_orphans(pe_resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node->details->online && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { pe_action_t *clear_op = NULL; clear_op = pe__clear_failcount(rsc, node, "it is orphaned", data_set); /* We can't use order_action_then_stop() here because its * pe_order_preserve breaks things */ custom_action_order(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc), NULL, pe_order_optional, data_set); } } } gboolean stage5(pe_working_set_t * data_set) { GListPtr gIter = NULL; int log_prio = show_utilization? LOG_STDOUT : LOG_TRACE; if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) { GListPtr nodes = g_list_copy(data_set->nodes); nodes = sort_nodes_by_weight(nodes, NULL, data_set); data_set->resources = g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes); g_list_free(nodes); } gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; dump_node_capacity(log_prio, "Original", node); } crm_trace("Allocating services"); /* Take (next) highest resource, assign it and create its actions */ allocate_resources(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; dump_node_capacity(log_prio, "Remaining", node); } // Process deferred action checks pe__foreach_param_check(data_set, check_params); pe__free_param_checks(data_set); if (is_set(data_set->flags, pe_flag_startup_probes)) { crm_trace("Calculating needed probes"); /* This code probably needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=100: With probes: ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints 36s ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph Without probes: ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph */ probe_resources(data_set); } crm_trace("Handle orphans"); if (is_set(data_set->flags, pe_flag_stop_rsc_orphans)) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* There's no need to recurse into rsc->children because those * should just be unallocated clone instances. */ if (is_set(rsc->flags, pe_rsc_orphan)) { cleanup_orphans(rsc, data_set); } } } crm_trace("Creating actions"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; rsc->cmds->create_actions(rsc, data_set); } crm_trace("Creating done"); return TRUE; } static gboolean is_managed(const pe_resource_t * rsc) { GListPtr gIter = rsc->children; if (is_set(rsc->flags, pe_rsc_managed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if (is_managed(child_rsc)) { return TRUE; } } return FALSE; } static gboolean any_managed_resources(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (is_managed(rsc)) { return TRUE; } } return FALSE; } /*! * \internal * \brief Create pseudo-op for guest node fence, and order relative to it * * \param[in] node Guest node to fence * \param[in] data_set Working set of CIB state */ static void fence_guest(pe_node_t *node, pe_working_set_t *data_set) { pe_resource_t *container = node->details->remote_rsc->container; pe_action_t *stop = NULL; pe_action_t *stonith_op = NULL; /* The fence action is just a label; we don't do anything differently for * off vs. reboot. We specify it explicitly, rather than let it default to * cluster's default action, because we are not _initiating_ fencing -- we * are creating a pseudo-event to describe fencing that is already occurring * by other means (container recovery). */ const char *fence_action = "off"; /* Check whether guest's container resource has any explicit stop or * start (the stop may be implied by fencing of the guest's host). */ if (container) { stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL); if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) { fence_action = "reboot"; } } /* Create a fence pseudo-event, so we have an event to order actions * against, and the controller can always detect it. */ stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", FALSE, data_set); update_action_flags(stonith_op, pe_action_pseudo | pe_action_runnable, __FUNCTION__, __LINE__); /* We want to imply stops/demotes after the guest is stopped, not wait until * it is restarted, so we always order pseudo-fencing after stop, not start * (even though start might be closer to what is done for a real reboot). */ if(stop && is_set(stop->flags, pe_action_pseudo)) { pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, FALSE, data_set); crm_info("Implying guest node %s is down (action %d) after %s fencing", node->details->uname, stonith_op->id, stop->node->details->uname); order_actions(parent_stonith_op, stonith_op, pe_order_runnable_left|pe_order_implies_then); } else if (stop) { order_actions(stop, stonith_op, pe_order_runnable_left|pe_order_implies_then); crm_info("Implying guest node %s is down (action %d) " "after container %s is stopped (action %d)", node->details->uname, stonith_op->id, container->id, stop->id); } else { /* If we're fencing the guest node but there's no stop for the guest * resource, we must think the guest is already stopped. However, we may * think so because its resource history was just cleaned. To avoid * unnecessarily considering the guest node down if it's really up, * order the pseudo-fencing after any stop of the connection resource, * which will be ordered after any container (re-)probe. */ stop = find_first_action(node->details->remote_rsc->actions, NULL, RSC_STOP, NULL); if (stop) { order_actions(stop, stonith_op, pe_order_optional); crm_info("Implying guest node %s is down (action %d) " "after connection is stopped (action %d)", node->details->uname, stonith_op->id, stop->id); } else { /* Not sure why we're fencing, but everything must already be * cleanly stopped. */ crm_info("Implying guest node %s is down (action %d) ", node->details->uname, stonith_op->id); } } /* Order/imply other actions relative to pseudo-fence as with real fence */ pcmk__order_vs_fence(stonith_op, data_set); } /* * Create dependencies for stonith and shutdown operations */ gboolean stage6(pe_working_set_t * data_set) { pe_action_t *dc_down = NULL; pe_action_t *stonith_op = NULL; gboolean integrity_lost = FALSE; gboolean need_stonith = TRUE; GListPtr gIter; GListPtr stonith_ops = NULL; GList *shutdown_ops = NULL; /* Remote ordering constraints need to happen prior to calculating fencing * because it is one more place we will mark the node as dirty. * * A nice side effect of doing them early is that apply_*_ordering() can be * simpler because pe_fence_node() has already done some of the work. */ crm_trace("Creating remote ordering constraints"); apply_remote_node_ordering(data_set); crm_trace("Processing fencing and shutdown cases"); if (any_managed_resources(data_set) == FALSE) { crm_notice("Delaying fencing operations until there are resources to manage"); need_stonith = FALSE; } /* Check each node for stonith/shutdown */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; /* Guest nodes are "fenced" by recovering their container resource, * so handle them separately. */ if (pe__is_guest_node(node)) { if (node->details->remote_requires_reset && need_stonith && pe_can_fence(data_set, node)) { fence_guest(node, data_set); } continue; } stonith_op = NULL; if (node->details->unclean && need_stonith && pe_can_fence(data_set, node)) { stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, data_set); pe_warn("Scheduling Node %s for STONITH", node->details->uname); pcmk__order_vs_fence(stonith_op, data_set); if (node->details->is_dc) { // Remember if the DC is being fenced dc_down = stonith_op; } else { if (is_not_set(data_set->flags, pe_flag_concurrent_fencing) && (stonith_ops != NULL)) { /* Concurrent fencing is disabled, so order each non-DC * fencing in a chain. If there is any DC fencing or * shutdown, it will be ordered after the last action in the * chain later. */ order_actions((pe_action_t *) stonith_ops->data, stonith_op, pe_order_optional); } // Remember all non-DC fencing actions in a separate list stonith_ops = g_list_prepend(stonith_ops, stonith_op); } } else if (node->details->online && node->details->shutdown && /* TODO define what a shutdown op means for a remote node. * For now we do not send shutdown operations for remote nodes, but * if we can come up with a good use for this in the future, we will. */ pe__is_guest_or_remote_node(node) == FALSE) { pe_action_t *down_op = sched_shutdown_op(node, data_set); if (node->details->is_dc) { // Remember if the DC is being shut down dc_down = down_op; } else { // Remember non-DC shutdowns for later ordering shutdown_ops = g_list_prepend(shutdown_ops, down_op); } } if (node->details->unclean && stonith_op == NULL) { integrity_lost = TRUE; pe_warn("Node %s is unclean!", node->details->uname); } } if (integrity_lost) { if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED"); pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE"); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) { crm_notice("Cannot fence unclean nodes until quorum is" " attained (or no-quorum-policy is set to ignore)"); } } if (dc_down != NULL) { /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated * DC elections. However, we don't want to order non-DC shutdowns before * a DC *fencing*, because even though we don't want a node that's * shutting down to become DC, the DC fencing could be ordered before a * clone stop that's also ordered before the shutdowns, thus leading to * a graph loop. */ if (pcmk__str_eq(dc_down->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { for (gIter = shutdown_ops; gIter != NULL; gIter = gIter->next) { pe_action_t *node_stop = (pe_action_t *) gIter->data; crm_debug("Ordering shutdown on %s before %s on DC %s", node_stop->node->details->uname, dc_down->task, dc_down->node->details->uname); order_actions(node_stop, dc_down, pe_order_optional); } } // Order any non-DC fencing before any DC fencing or shutdown if (is_set(data_set->flags, pe_flag_concurrent_fencing)) { /* With concurrent fencing, order each non-DC fencing action * separately before any DC fencing or shutdown. */ for (gIter = stonith_ops; gIter != NULL; gIter = gIter->next) { order_actions((pe_action_t *) gIter->data, dc_down, pe_order_optional); } } else if (stonith_ops) { /* Without concurrent fencing, the non-DC fencing actions are * already ordered relative to each other, so we just need to order * the DC fencing after the last action in the chain (which is the * first item in the list). */ order_actions((pe_action_t *) stonith_ops->data, dc_down, pe_order_optional); } } g_list_free(stonith_ops); g_list_free(shutdown_ops); return TRUE; } /* * Determine the sets of independent actions and the correct order for the * actions in each set. * * Mark dependencies of un-runnable actions un-runnable * */ static GListPtr find_actions_by_task(GListPtr actions, pe_resource_t * rsc, const char *original_key) { GListPtr list = NULL; list = find_actions(actions, original_key, NULL); if (list == NULL) { /* we're potentially searching a child of the original resource */ char *key = NULL; char *task = NULL; guint interval_ms = 0; if (parse_op_key(original_key, NULL, &task, &interval_ms)) { key = pcmk__op_key(rsc->id, task, interval_ms); list = find_actions(actions, key, NULL); } else { crm_err("search key: %s", original_key); } free(key); free(task); } return list; } static void rsc_order_then(pe_action_t *lh_action, pe_resource_t *rsc, pe__ordering_t *order) { GListPtr gIter = NULL; GListPtr rh_actions = NULL; pe_action_t *rh_action = NULL; enum pe_ordering type; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); type = order->type; rh_action = order->rh_action; crm_trace("Processing RH of ordering constraint %d", order->id); if (rh_action != NULL) { rh_actions = g_list_prepend(NULL, rh_action); } else if (rsc != NULL) { rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task); } if (rh_actions == NULL) { pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..." " ignoring", rsc->id, order->rh_action_task); if (lh_action) { pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid); } return; } if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) { pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid, order->rh_action_task); clear_bit(type, pe_order_implies_then); } gIter = rh_actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *rh_action_iter = (pe_action_t *) gIter->data; if (lh_action) { order_actions(lh_action, rh_action_iter, type); } else if (type & pe_order_implies_then) { update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__); crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type); } else { crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type); } } g_list_free(rh_actions); } static void rsc_order_first(pe_resource_t *lh_rsc, pe__ordering_t *order, pe_working_set_t *data_set) { GListPtr gIter = NULL; GListPtr lh_actions = NULL; pe_action_t *lh_action = order->lh_action; pe_resource_t *rh_rsc = order->rh_rsc; crm_trace("Processing LH of ordering constraint %d", order->id); CRM_ASSERT(lh_rsc != NULL); if (lh_action != NULL) { lh_actions = g_list_prepend(NULL, lh_action); } else { lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task); } if (lh_actions == NULL && lh_rsc != rh_rsc) { char *key = NULL; char *op_type = NULL; guint interval_ms = 0; parse_op_key(order->lh_action_task, NULL, &op_type, &interval_ms); key = pcmk__op_key(lh_rsc->id, op_type, interval_ms); if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && pcmk__str_eq(op_type, RSC_STOP, pcmk__str_casei)) { free(key); pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); } else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && pcmk__str_eq(op_type, RSC_DEMOTE, pcmk__str_casei)) { free(key); pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); } else { pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set); lh_actions = g_list_prepend(NULL, lh_action); } free(op_type); } gIter = lh_actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *lh_action_iter = (pe_action_t *) gIter->data; if (rh_rsc == NULL && order->rh_action) { rh_rsc = order->rh_action->rsc; } if (rh_rsc) { rsc_order_then(lh_action_iter, rh_rsc, order); } else if (order->rh_action) { order_actions(lh_action_iter, order->rh_action, order->type); } } g_list_free(lh_actions); } extern void update_colo_start_chain(pe_action_t *action, pe_working_set_t *data_set); static int is_recurring_action(pe_action_t *action) { guint interval_ms; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { return 0; } return (interval_ms > 0); } static void apply_container_ordering(pe_action_t *action, pe_working_set_t *data_set) { /* VMs are also classified as containers for these purposes... in * that they both involve a 'thing' running on a real or remote * cluster node. * * This allows us to be smarter about the type and extent of * recovery actions required in various scenarios */ pe_resource_t *remote_rsc = NULL; pe_resource_t *container = NULL; enum action_tasks task = text2task(action->task); CRM_ASSERT(action->rsc); CRM_ASSERT(action->node); CRM_ASSERT(pe__is_guest_or_remote_node(action->node)); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc); container = remote_rsc->container; CRM_ASSERT(container); if(is_set(container->flags, pe_rsc_failed)) { pe_fence_node(data_set, action->node, "container failed", FALSE); } crm_trace("Order %s action %s relative to %s%s for %s%s", action->task, action->uuid, is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", remote_rsc->id, is_set(container->flags, pe_rsc_failed)? "failed " : "", container->id); if (pcmk__strcase_any_of(action->task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ task = stop_rsc; } switch (task) { case start_rsc: case action_promote: /* Force resource recovery if the container is recovered */ order_start_then_action(container, action, pe_order_implies_then, data_set); /* Wait for the connection resource to be up too */ order_start_then_action(remote_rsc, action, pe_order_none, data_set); break; case stop_rsc: case action_demote: if (is_set(container->flags, pe_rsc_failed)) { /* When the container representing a guest node fails, any stop * or demote actions for resources running on the guest node * are implied by the container stopping. This is similar to * how fencing operations work for cluster nodes and remote * nodes. */ } else { /* Ensure the operation happens before the connection is brought * down. * * If we really wanted to, we could order these after the * connection start, IFF the container's current role was * stopped (otherwise we re-introduce an ordering loop when the * connection is restarting). */ order_action_then_stop(action, remote_rsc, pe_order_none, data_set); } break; default: /* Wait for the connection resource to be up */ if (is_recurring_action(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ if(task != no_action) { order_start_then_action(remote_rsc, action, pe_order_implies_then, data_set); } } else { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } break; } } static enum remote_connection_state get_remote_node_state(pe_node_t *node) { pe_resource_t *remote_rsc = NULL; pe_node_t *cluster_node = NULL; CRM_ASSERT(node); remote_rsc = node->details->remote_rsc; CRM_ASSERT(remote_rsc); cluster_node = pe__current_node(remote_rsc); /* If the cluster node the remote connection resource resides on * is unclean or went offline, we can't process any operations * on that remote node until after it starts elsewhere. */ if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) { /* The connection resource is not going to run anywhere */ if (cluster_node && cluster_node->details->unclean) { /* The remote connection is failed because its resource is on a * failed node and can't be recovered elsewhere, so we must fence. */ return remote_state_failed; } if (is_not_set(remote_rsc->flags, pe_rsc_failed)) { /* Connection resource is cleanly stopped */ return remote_state_stopped; } /* Connection resource is failed */ if ((remote_rsc->next_role == RSC_ROLE_STOPPED) && remote_rsc->remote_reconnect_ms && node->details->remote_was_fenced && !pe__shutdown_requested(node)) { /* We won't know whether the connection is recoverable until the * reconnect interval expires and we reattempt connection. */ return remote_state_unknown; } /* The remote connection is in a failed state. If there are any * resources known to be active on it (stop) or in an unknown state * (probe), we must assume the worst and fence it. */ return remote_state_failed; } else if (cluster_node == NULL) { /* Connection is recoverable but not currently running anywhere, see if we can recover it first */ return remote_state_unknown; } else if(cluster_node->details->unclean == TRUE || cluster_node->details->online == FALSE) { /* Connection is running on a dead node, see if we can recover it first */ return remote_state_resting; } else if (pcmk__list_of_multiple(remote_rsc->running_on) && remote_rsc->partial_migration_source && remote_rsc->partial_migration_target) { /* We're in the middle of migrating a connection resource, * wait until after the resource migrates before performing * any actions. */ return remote_state_resting; } return remote_state_alive; } /*! * \internal * \brief Order actions on remote node relative to actions for the connection */ static void apply_remote_ordering(pe_action_t *action, pe_working_set_t *data_set) { pe_resource_t *remote_rsc = NULL; enum action_tasks task = text2task(action->task); enum remote_connection_state state = get_remote_node_state(action->node); enum pe_ordering order_opts = pe_order_none; if (action->rsc == NULL) { return; } CRM_ASSERT(action->node); CRM_ASSERT(pe__is_guest_or_remote_node(action->node)); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc); crm_trace("Order %s action %s relative to %s%s (state: %s)", action->task, action->uuid, is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", remote_rsc->id, state2text(state)); if (pcmk__strcase_any_of(action->task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ task = stop_rsc; } switch (task) { case start_rsc: case action_promote: order_opts = pe_order_none; if (state == remote_state_failed) { /* Force recovery, by making this action required */ order_opts |= pe_order_implies_then; } /* Ensure connection is up before running this action */ order_start_then_action(remote_rsc, action, order_opts, data_set); break; case stop_rsc: if(state == remote_state_alive) { order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); } else if(state == remote_state_failed) { /* The resource is active on the node, but since we don't have a * valid connection, the only way to stop the resource is by * fencing the node. There is no need to order the stop relative * to the remote connection, since the stop will become implied * by the fencing. */ pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable", FALSE); } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) { /* State must be remote_state_unknown or remote_state_stopped. * Since the connection is not coming back up in this * transition, stop this resource first. */ order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); } else { /* The connection is going to be started somewhere else, so * stop this resource after that completes. */ order_start_then_action(remote_rsc, action, pe_order_none, data_set); } break; case action_demote: /* Only order this demote relative to the connection start if the * connection isn't being torn down. Otherwise, the demote would be * blocked because the connection start would not be allowed. */ if(state == remote_state_resting || state == remote_state_unknown) { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } /* Otherwise we can rely on the stop ordering */ break; default: /* Wait for the connection resource to be up */ if (is_recurring_action(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ order_start_then_action(remote_rsc, action, pe_order_implies_then, data_set); } else { pe_node_t *cluster_node = pe__current_node(remote_rsc); if(task == monitor_rsc && state == remote_state_failed) { /* We would only be here if we do not know the * state of the resource on the remote node. * Since we have no way to find out, it is * necessary to fence the node. */ pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable", FALSE); } if(cluster_node && state == remote_state_stopped) { /* The connection is currently up, but is going * down permanently. * * Make sure we check services are actually * stopped _before_ we let the connection get * closed */ order_action_then_stop(action, remote_rsc, pe_order_runnable_left, data_set); } else { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } } break; } } static void apply_remote_node_ordering(pe_working_set_t *data_set) { if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) { return; } for (GListPtr gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; pe_resource_t *remote = NULL; // We are only interested in resource actions if (action->rsc == NULL) { continue; } /* Special case: If we are clearing the failcount of an actual * remote connection resource, then make sure this happens before * any start of the resource in this transition. */ if (action->rsc->is_remote_node && pcmk__str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT, pcmk__str_casei)) { custom_action_order(action->rsc, NULL, action, action->rsc, pcmk__op_key(action->rsc->id, RSC_START, 0), NULL, pe_order_optional, data_set); continue; } // We are only interested in actions allocated to a node if (action->node == NULL) { continue; } if (!pe__is_guest_or_remote_node(action->node)) { continue; } /* We are only interested in real actions. * * @TODO This is probably wrong; pseudo-actions might be converted to * real actions and vice versa later in update_actions() at the end of * stage7(). */ if (is_set(action->flags, pe_action_pseudo)) { continue; } remote = action->node->details->remote_rsc; if (remote == NULL) { // Orphaned continue; } /* Another special case: if a resource is moving to a Pacemaker Remote * node, order the stop on the original node after any start of the * remote connection. This ensures that if the connection fails to * start, we leave the resource running on the original node. */ if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) { for (GList *item = action->rsc->actions; item != NULL; item = item->next) { pe_action_t *rsc_action = item->data; if ((rsc_action->node->details != action->node->details) && pcmk__str_eq(rsc_action->task, RSC_STOP, pcmk__str_casei)) { custom_action_order(remote, start_key(remote), NULL, action->rsc, NULL, rsc_action, pe_order_optional, data_set); } } } /* The action occurs across a remote connection, so create * ordering constraints that guarantee the action occurs while the node * is active (after start, before stop ... things like that). * * This is somewhat brittle in that we need to make sure the results of * this ordering are compatible with the result of get_router_node(). * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part * of this logic rather than action2xml(). */ if (remote->container) { crm_trace("Container ordering for %s", action->uuid); apply_container_ordering(action, data_set); } else { crm_trace("Remote ordering for %s", action->uuid); apply_remote_ordering(action, data_set); } } } static gboolean order_first_probe_unneeded(pe_action_t * probe, pe_action_t * rh_action) { /* No need to probe the resource on the node that is being * unfenced. Otherwise it might introduce transition loop * since probe will be performed after the node is * unfenced. */ if (pcmk__str_eq(rh_action->task, CRM_OP_FENCE, pcmk__str_casei) && probe->node && rh_action->node && probe->node->details == rh_action->node->details) { const char *op = g_hash_table_lookup(rh_action->meta, "stonith_action"); if (pcmk__str_eq(op, "on", pcmk__str_casei)) { return TRUE; } } // Shutdown waits for probe to complete only if it's on the same node if ((pcmk__str_eq(rh_action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) && probe->node && rh_action->node && probe->node->details != rh_action->node->details) { return TRUE; } return FALSE; } static void order_first_probes_imply_stops(pe_working_set_t * data_set) { GListPtr gIter = NULL; for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) { pe__ordering_t *order = gIter->data; enum pe_ordering order_type = pe_order_optional; pe_resource_t *lh_rsc = order->lh_rsc; pe_resource_t *rh_rsc = order->rh_rsc; pe_action_t *lh_action = order->lh_action; pe_action_t *rh_action = order->rh_action; const char *lh_action_task = order->lh_action_task; const char *rh_action_task = order->rh_action_task; GListPtr probes = NULL; GListPtr rh_actions = NULL; GListPtr pIter = NULL; if (lh_rsc == NULL) { continue; } else if (rh_rsc && lh_rsc == rh_rsc) { continue; } if (lh_action == NULL && lh_action_task == NULL) { continue; } if (rh_action == NULL && rh_action_task == NULL) { continue; } /* Technically probe is expected to return "not running", which could be * the alternative of stop action if the status of the resource is * unknown yet. */ if (lh_action && !pcmk__str_eq(lh_action->task, RSC_STOP, pcmk__str_casei)) { continue; } else if (lh_action == NULL && lh_action_task && !pcmk__ends_with(lh_action_task, "_" RSC_STOP "_0")) { continue; } /* Do not probe the resource inside of a stopping container. Otherwise * it might introduce transition loop since probe will be performed * after the container starts again. */ if (rh_rsc && lh_rsc->container == rh_rsc) { if (rh_action && pcmk__str_eq(rh_action->task, RSC_STOP, pcmk__str_casei)) { continue; } else if (rh_action == NULL && rh_action_task && pcmk__ends_with(rh_action_task,"_" RSC_STOP "_0")) { continue; } } if (order->type == pe_order_none) { continue; } // Preserve the order options for future filtering if (is_set(order->type, pe_order_apply_first_non_migratable)) { set_bit(order_type, pe_order_apply_first_non_migratable); } if (is_set(order->type, pe_order_same_node)) { set_bit(order_type, pe_order_same_node); } // Keep the order types for future filtering if (order->type == pe_order_anti_colocation || order->type == pe_order_load) { order_type = order->type; } probes = pe__resource_actions(lh_rsc, NULL, RSC_STATUS, FALSE); if (probes == NULL) { continue; } if (rh_action) { rh_actions = g_list_prepend(rh_actions, rh_action); } else if (rh_rsc && rh_action_task) { rh_actions = find_actions(rh_rsc->actions, rh_action_task, NULL); } if (rh_actions == NULL) { g_list_free(probes); continue; } crm_trace("Processing for LH probe based on ordering constraint %s -> %s" " (id=%d, type=%.6x)", lh_action ? lh_action->uuid : lh_action_task, rh_action ? rh_action->uuid : rh_action_task, order->id, order->type); for (pIter = probes; pIter != NULL; pIter = pIter->next) { pe_action_t *probe = (pe_action_t *) pIter->data; GListPtr rIter = NULL; for (rIter = rh_actions; rIter != NULL; rIter = rIter->next) { pe_action_t *rh_action_iter = (pe_action_t *) rIter->data; if (order_first_probe_unneeded(probe, rh_action_iter)) { continue; } order_actions(probe, rh_action_iter, order_type); } } g_list_free(rh_actions); g_list_free(probes); } } static void order_first_probe_then_restart_repromote(pe_action_t * probe, pe_action_t * after, pe_working_set_t * data_set) { GListPtr gIter = NULL; bool interleave = FALSE; pe_resource_t *compatible_rsc = NULL; if (probe == NULL || probe->rsc == NULL || probe->rsc->variant != pe_native) { return; } if (after == NULL // Avoid running into any possible loop || is_set(after->flags, pe_action_tracking)) { return; } if (!pcmk__str_eq(probe->task, RSC_STATUS, pcmk__str_casei)) { return; } pe_set_action_bit(after, pe_action_tracking); crm_trace("Processing based on %s %s -> %s %s", probe->uuid, probe->node ? probe->node->details->uname: "", after->uuid, after->node ? after->node->details->uname : ""); if (after->rsc /* Better not build a dependency directly with a clone/group. * We are going to proceed through the ordering chain and build * dependencies with its children. */ && after->rsc->variant == pe_native && probe->rsc != after->rsc) { GListPtr then_actions = NULL; enum pe_ordering probe_order_type = pe_order_optional; if (pcmk__str_eq(after->task, RSC_START, pcmk__str_casei)) { then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP, FALSE); } else if (pcmk__str_eq(after->task, RSC_PROMOTE, pcmk__str_casei)) { then_actions = pe__resource_actions(after->rsc, NULL, RSC_DEMOTE, FALSE); } for (gIter = then_actions; gIter != NULL; gIter = gIter->next) { pe_action_t *then = (pe_action_t *) gIter->data; // Skip any pseudo action which for example is implied by fencing if (is_set(then->flags, pe_action_pseudo)) { continue; } order_actions(probe, then, probe_order_type); } g_list_free(then_actions); } if (after->rsc && after->rsc->variant > pe_group) { const char *interleave_s = g_hash_table_lookup(after->rsc->meta, XML_RSC_ATTR_INTERLEAVE); interleave = crm_is_true(interleave_s); if (interleave) { /* For an interleaved clone, we should build a dependency only * with the relevant clone child. */ compatible_rsc = find_compatible_child(probe->rsc, after->rsc, RSC_ROLE_UNKNOWN, FALSE, data_set); } } for (gIter = after->actions_after; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) gIter->data; /* pe_order_implies_then is the reason why a required A.start * implies/enforces B.start to be required too, which is the cause of * B.restart/re-promote. * * Not sure about pe_order_implies_then_on_node though. It's now only * used for unfencing case, which tends to introduce transition * loops... */ if (is_not_set(after_wrapper->type, pe_order_implies_then)) { /* The order type between a group/clone and its child such as * B.start-> B_child.start is: * pe_order_implies_first_printed | pe_order_runnable_left * * Proceed through the ordering chain and build dependencies with * its children. */ if (after->rsc == NULL || after->rsc->variant < pe_group || probe->rsc->parent == after->rsc || after_wrapper->action->rsc == NULL || after_wrapper->action->rsc->variant > pe_group || after->rsc != after_wrapper->action->rsc->parent) { continue; } /* Proceed to the children of a group or a non-interleaved clone. * For an interleaved clone, proceed only to the relevant child. */ if (after->rsc->variant > pe_group && interleave == TRUE && (compatible_rsc == NULL || compatible_rsc != after_wrapper->action->rsc)) { continue; } } crm_trace("Proceeding through %s %s -> %s %s (type=0x%.6x)", after->uuid, after->node ? after->node->details->uname: "", after_wrapper->action->uuid, after_wrapper->action->node ? after_wrapper->action->node->details->uname : "", after_wrapper->type); order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set); } } static void clear_actions_tracking_flag(pe_working_set_t * data_set) { GListPtr gIter = NULL; for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (is_set(action->flags, pe_action_tracking)) { pe_clear_action_bit(action, pe_action_tracking); } } } static void order_first_rsc_probes(pe_resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; GListPtr probes = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t * child = (pe_resource_t *) gIter->data; order_first_rsc_probes(child, data_set); } if (rsc->variant != pe_native) { return; } probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE); for (gIter = probes; gIter != NULL; gIter= gIter->next) { pe_action_t *probe = (pe_action_t *) gIter->data; GListPtr aIter = NULL; for (aIter = probe->actions_after; aIter != NULL; aIter = aIter->next) { pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) aIter->data; order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set); clear_actions_tracking_flag(data_set); } } g_list_free(probes); } static void order_first_probes(pe_working_set_t * data_set) { GListPtr gIter = NULL; for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; order_first_rsc_probes(rsc, data_set); } order_first_probes_imply_stops(data_set); } static void order_then_probes(pe_working_set_t * data_set) { #if 0 GListPtr gIter = NULL; for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* Given "A then B", we would prefer to wait for A to be * started before probing B. * * If A was a filesystem on which the binaries and data for B * lived, it would have been useful if the author of B's agent * could assume that A is running before B.monitor will be * called. * * However we can't _only_ probe once A is running, otherwise * we'd not detect the state of B if A could not be started * for some reason. * * In practice however, we cannot even do an opportunistic * version of this because B may be moving: * * B.probe -> B.start * B.probe -> B.stop * B.stop -> B.start * A.stop -> A.start * A.start -> B.probe * * So far so good, but if we add the result of this code: * * B.stop -> A.stop * * Then we get a loop: * * B.probe -> B.stop -> A.stop -> A.start -> B.probe * * We could kill the 'B.probe -> B.stop' dependency, but that * could mean stopping B "too" soon, because B.start must wait * for the probes to complete. * * Another option is to allow it only if A is a non-unique * clone with clone-max == node-max (since we'll never be * moving it). However, we could still be stopping one * instance at the same time as starting another. * The complexity of checking for allowed conditions combined * with the ever narrowing usecase suggests that this code * should remain disabled until someone gets smarter. */ pe_action_t *start = NULL; GListPtr actions = NULL; GListPtr probes = NULL; actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE); if (actions) { start = actions->data; g_list_free(actions); } if(start == NULL) { crm_err("No start action for %s", rsc->id); continue; } probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE); for (actions = start->actions_before; actions != NULL; actions = actions->next) { pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data; GListPtr pIter = NULL; pe_action_t *first = before->action; pe_resource_t *first_rsc = first->rsc; if(first->required_runnable_before) { GListPtr clone_actions = NULL; for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) { before = (pe_action_wrapper_t *) clone_actions->data; crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid); CRM_ASSERT(before->action->rsc); first_rsc = before->action->rsc; break; } } else if(!pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) { crm_trace("Not a start op %s for %s", first->uuid, start->uuid); } if(first_rsc == NULL) { continue; } else if(uber_parent(first_rsc) == uber_parent(start->rsc)) { crm_trace("Same parent %s for %s", first_rsc->id, start->uuid); continue; } else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) { crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid); continue; } crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant); for (pIter = probes; pIter != NULL; pIter = pIter->next) { pe_action_t *probe = (pe_action_t *) pIter->data; crm_err("Ordering %s before %s", first->uuid, probe->uuid); order_actions(first, probe, pe_order_optional); } } } #endif } static void order_probes(pe_working_set_t * data_set) { order_first_probes(data_set); order_then_probes(data_set); } gboolean stage7(pe_working_set_t * data_set) { GList *gIter = NULL; crm_trace("Applying ordering constraints"); /* Don't ask me why, but apparently they need to be processed in * the order they were created in... go figure * * Also g_list_append() has horrendous performance characteristics * So we need to use g_list_prepend() and then reverse the list here */ data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints); for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) { pe__ordering_t *order = gIter->data; pe_resource_t *rsc = order->lh_rsc; crm_trace("Applying ordering constraint: %d", order->id); if (rsc != NULL) { crm_trace("rsc_action-to-*"); rsc_order_first(rsc, order, data_set); continue; } rsc = order->rh_rsc; if (rsc != NULL) { crm_trace("action-to-rsc_action"); rsc_order_then(order->lh_action, rsc, order); } else { crm_trace("action-to-action"); order_actions(order->lh_action, order->rh_action, order->type); } } for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; update_colo_start_chain(action, data_set); } crm_trace("Ordering probes"); order_probes(data_set); crm_trace("Updating %d actions", g_list_length(data_set->actions)); for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; update_action(action, data_set); } // Check for invalid orderings for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; pe_action_wrapper_t *input = NULL; for (GList *input_iter = action->actions_before; input_iter != NULL; input_iter = input_iter->next) { input = (pe_action_wrapper_t *) input_iter->data; if (pcmk__ordering_is_invalid(action, input)) { input->type = pe_order_none; } } } LogNodeActions(data_set, FALSE); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; LogActions(rsc, data_set, FALSE); } return TRUE; } static int transition_id = -1; /*! * \internal * \brief Log a message after calculating a transition * * \param[in] filename Where transition input is stored */ void pcmk__log_transition_summary(const char *filename) { if (was_processing_error) { crm_err("Calculated transition %d (with errors), saving inputs in %s", transition_id, filename); } else if (was_processing_warning) { crm_warn("Calculated transition %d (with warnings), saving inputs in %s", transition_id, filename); } else { crm_notice("Calculated transition %d, saving inputs in %s", transition_id, filename); } if (crm_config_error) { crm_notice("Configuration errors found during scheduler processing," " please run \"crm_verify -L\" to identify issues"); } } /* * Create a dependency graph to send to the transitioner (via the controller) */ gboolean stage8(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *value = NULL; transition_id++; crm_trace("Creating transition graph %d.", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); value = pe_pref(data_set->config_hash, "stonith-timeout"); crm_xml_add(data_set->graph, "stonith-timeout", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); if (is_set(data_set->flags, pe_flag_start_failure_fatal)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); value = pe_pref(data_set->config_hash, "migration-limit"); if (crm_parse_ll(value, NULL) > 0) { crm_xml_add(data_set->graph, "migration-limit", value); } if (data_set->recheck_by > 0) { char *recheck_epoch = NULL; recheck_epoch = crm_strdup_printf("%llu", (long long) data_set->recheck_by); crm_xml_add(data_set->graph, "recheck-by", recheck_epoch); free(recheck_epoch); } /* errors... slist_iter(action, pe_action_t, action_list, lpc, if(action->optional == FALSE && action->runnable == FALSE) { print_action("Ignoring", action, TRUE); } ); */ /* The following code will de-duplicate action inputs, so nothing past this * should rely on the action input type flags retaining their original * values. */ gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id); rsc->cmds->expand(rsc, data_set); } crm_log_xml_trace(data_set->graph, "created resource-driven action list"); /* pseudo action to distribute list of nodes with maintenance state update */ add_maintenance_update(data_set); /* catch any non-resource specific actions */ crm_trace("processing non-resource actions"); gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (action->rsc && action->node && action->node->details->shutdown && is_not_set(action->rsc->flags, pe_rsc_maintenance) && is_not_set(action->flags, pe_action_optional) && is_not_set(action->flags, pe_action_runnable) && pcmk__str_eq(action->task, RSC_STOP, pcmk__str_none) ) { /* Eventually we should just ignore the 'fence' case * But for now it's the best way to detect (in CTS) when * CIB resource updates are being lost */ if (is_set(data_set->flags, pe_flag_have_quorum) || data_set->no_quorum_policy == no_quorum_ignore) { crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)", action->node->details->unclean ? "fence" : "shut down", action->node->details->uname, action->rsc->id, is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked", is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : "", action->uuid); } } graph_element_from_action(action, data_set); } crm_log_xml_trace(data_set->graph, "created generic action list"); crm_trace("Created transition graph %d.", transition_id); return TRUE; } void LogNodeActions(pe_working_set_t * data_set, gboolean terminal) { GListPtr gIter = NULL; for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { char *node_name = NULL; char *task = NULL; pe_action_t *action = (pe_action_t *) gIter->data; if (action->rsc != NULL) { continue; } else if (is_set(action->flags, pe_action_optional)) { continue; } if (pe__is_guest_node(action->node)) { node_name = crm_strdup_printf("%s (resource: %s)", action->node->details->uname, action->node->details->remote_rsc->container->id); } else if(action->node) { node_name = crm_strdup_printf("%s", action->node->details->uname); } if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { task = strdup("Shutdown"); } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { const char *op = g_hash_table_lookup(action->meta, "stonith_action"); task = crm_strdup_printf("Fence (%s)", op); } if(task == NULL) { /* Nothing to report */ } else if(terminal && action->reason) { printf(" * %s %s '%s'\n", task, node_name, action->reason); } else if(terminal) { printf(" * %s %s\n", task, node_name); } else if(action->reason) { crm_notice(" * %s %s '%s'\n", task, node_name, action->reason); } else { crm_notice(" * %s %s\n", task, node_name); } free(node_name); free(task); } } diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c index e68eca90e3..610e21000d 100644 --- a/lib/pacemaker/pcmk_sched_clone.c +++ b/lib/pacemaker/pcmk_sched_clone.c @@ -1,1509 +1,1513 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #define VARIANT_CLONE 1 #include gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set); static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all); static gint sort_rsc_id(gconstpointer a, gconstpointer b) { const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; long num1, num2; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); /* * Sort clone instances numerically by instance number, so instance :10 * comes after :9. */ num1 = strtol(strrchr(resource1->id, ':') + 1, NULL, 10); num2 = strtol(strrchr(resource2->id, ':') + 1, NULL, 10); if (num1 < num2) { return -1; } else if (num1 > num2) { return 1; } return 0; } static pe_node_t * parent_node_instance(const pe_resource_t * rsc, pe_node_t * node) { pe_node_t *ret = NULL; if (node != NULL && rsc->parent) { ret = pe_hash_table_lookup(rsc->parent->allowed_nodes, node->details->id); } else if(node != NULL) { ret = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); } return ret; } static gboolean did_fail(const pe_resource_t * rsc) { GListPtr gIter = rsc->children; if (is_set(rsc->flags, pe_rsc_failed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if (did_fail(child_rsc)) { return TRUE; } } return FALSE; } gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) { int rc = 0; pe_node_t *node1 = NULL; pe_node_t *node2 = NULL; pe_node_t *current_node1 = NULL; pe_node_t *current_node2 = NULL; unsigned int nnodes1 = 0; unsigned int nnodes2 = 0; gboolean can1 = TRUE; gboolean can2 = TRUE; const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); /* allocation order: * - active instances * - instances running on nodes with the least copies * - active instances on nodes that can't support them or are to be fenced * - failed instances * - inactive instances */ current_node1 = pe__find_active_on(resource1, &nnodes1, NULL); current_node2 = pe__find_active_on(resource2, &nnodes2, NULL); if (nnodes1 && nnodes2) { if (nnodes1 < nnodes2) { crm_trace("%s < %s: running_on", resource1->id, resource2->id); return -1; } else if (nnodes1 > nnodes2) { crm_trace("%s > %s: running_on", resource1->id, resource2->id); return 1; } } node1 = current_node1; node2 = current_node2; if (node1) { pe_node_t *match = pe_hash_table_lookup(resource1->allowed_nodes, node1->details->id); if (match == NULL || match->weight < 0) { crm_trace("%s: current location is unavailable", resource1->id); node1 = NULL; can1 = FALSE; } } if (node2) { pe_node_t *match = pe_hash_table_lookup(resource2->allowed_nodes, node2->details->id); if (match == NULL || match->weight < 0) { crm_trace("%s: current location is unavailable", resource2->id); node2 = NULL; can2 = FALSE; } } if (can1 != can2) { if (can1) { crm_trace("%s < %s: availability of current location", resource1->id, resource2->id); return -1; } crm_trace("%s > %s: availability of current location", resource1->id, resource2->id); return 1; } if (resource1->priority < resource2->priority) { crm_trace("%s < %s: priority", resource1->id, resource2->id); return 1; } else if (resource1->priority > resource2->priority) { crm_trace("%s > %s: priority", resource1->id, resource2->id); return -1; } if (node1 == NULL && node2 == NULL) { crm_trace("%s == %s: not active", resource1->id, resource2->id); return 0; } if (node1 != node2) { if (node1 == NULL) { crm_trace("%s > %s: active", resource1->id, resource2->id); return 1; } else if (node2 == NULL) { crm_trace("%s < %s: active", resource1->id, resource2->id); return -1; } } can1 = can_run_resources(node1); can2 = can_run_resources(node2); if (can1 != can2) { if (can1) { crm_trace("%s < %s: can", resource1->id, resource2->id); return -1; } crm_trace("%s > %s: can", resource1->id, resource2->id); return 1; } node1 = parent_node_instance(resource1, node1); node2 = parent_node_instance(resource2, node2); if (node1 != NULL && node2 == NULL) { crm_trace("%s < %s: not allowed", resource1->id, resource2->id); return -1; } else if (node1 == NULL && node2 != NULL) { crm_trace("%s > %s: not allowed", resource1->id, resource2->id); return 1; } if (node1 == NULL || node2 == NULL) { crm_trace("%s == %s: not allowed", resource1->id, resource2->id); return 0; } if (node1->count < node2->count) { crm_trace("%s < %s: count", resource1->id, resource2->id); return -1; } else if (node1->count > node2->count) { crm_trace("%s > %s: count", resource1->id, resource2->id); return 1; } can1 = did_fail(resource1); can2 = did_fail(resource2); if (can1 != can2) { if (can1) { crm_trace("%s > %s: failed", resource1->id, resource2->id); return 1; } crm_trace("%s < %s: failed", resource1->id, resource2->id); return -1; } if (node1 && node2) { int lpc = 0; int max = 0; pe_node_t *n = NULL; GListPtr gIter = NULL; GListPtr list1 = NULL; GListPtr list2 = NULL; GHashTable *hash1 = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); GHashTable *hash2 = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); n = pe__copy_node(current_node1); g_hash_table_insert(hash1, (gpointer) n->details->id, n); n = pe__copy_node(current_node2); g_hash_table_insert(hash2, (gpointer) n->details->id, n); if(resource1->parent) { for (gIter = resource1->parent->rsc_cons; gIter; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score == 0) { continue; } crm_trace("Applying %s to %s", constraint->id, resource1->id); hash1 = pcmk__native_merge_weights(constraint->rsc_rh, resource1->id, hash1, constraint->node_attribute, constraint->score / (float) INFINITY, 0); } for (gIter = resource1->parent->rsc_cons_lhs; gIter; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score == 0) { continue; } crm_trace("Applying %s to %s", constraint->id, resource1->id); hash1 = pcmk__native_merge_weights(constraint->rsc_lh, resource1->id, hash1, constraint->node_attribute, constraint->score / (float) INFINITY, pe_weights_positive); } } if(resource2->parent) { for (gIter = resource2->parent->rsc_cons; gIter; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; crm_trace("Applying %s to %s", constraint->id, resource2->id); hash2 = pcmk__native_merge_weights(constraint->rsc_rh, resource2->id, hash2, constraint->node_attribute, constraint->score / (float) INFINITY, 0); } for (gIter = resource2->parent->rsc_cons_lhs; gIter; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; crm_trace("Applying %s to %s", constraint->id, resource2->id); hash2 = pcmk__native_merge_weights(constraint->rsc_lh, resource2->id, hash2, constraint->node_attribute, constraint->score / (float) INFINITY, pe_weights_positive); } } /* Current location score */ node1 = g_hash_table_lookup(hash1, current_node1->details->id); node2 = g_hash_table_lookup(hash2, current_node2->details->id); if (node1->weight < node2->weight) { if (node1->weight < 0) { crm_trace("%s > %s: current score: %d %d", resource1->id, resource2->id, node1->weight, node2->weight); rc = -1; goto out; } else { crm_trace("%s < %s: current score: %d %d", resource1->id, resource2->id, node1->weight, node2->weight); rc = 1; goto out; } } else if (node1->weight > node2->weight) { crm_trace("%s > %s: current score: %d %d", resource1->id, resource2->id, node1->weight, node2->weight); rc = -1; goto out; } /* All location scores */ list1 = g_hash_table_get_values(hash1); list2 = g_hash_table_get_values(hash2); list1 = sort_nodes_by_weight(list1, current_node1, data_set); list2 = sort_nodes_by_weight(list2, current_node2, data_set); max = g_list_length(list1); if (max < g_list_length(list2)) { max = g_list_length(list2); } for (; lpc < max; lpc++) { node1 = g_list_nth_data(list1, lpc); node2 = g_list_nth_data(list2, lpc); if (node1 == NULL) { crm_trace("%s < %s: colocated score NULL", resource1->id, resource2->id); rc = 1; break; } else if (node2 == NULL) { crm_trace("%s > %s: colocated score NULL", resource1->id, resource2->id); rc = -1; break; } if (node1->weight < node2->weight) { crm_trace("%s < %s: colocated score", resource1->id, resource2->id); rc = 1; break; } else if (node1->weight > node2->weight) { crm_trace("%s > %s: colocated score", resource1->id, resource2->id); rc = -1; break; } } /* Order by reverse uname - same as sort_node_weight() does? */ out: g_hash_table_destroy(hash1); /* Free mem */ g_hash_table_destroy(hash2); /* Free mem */ g_list_free(list1); g_list_free(list2); if (rc != 0) { return rc; } } rc = strcmp(resource1->id, resource2->id); crm_trace("%s %c %s: default", resource1->id, rc < 0 ? '<' : '>', resource2->id); return rc; } static pe_node_t * can_run_instance(pe_resource_t * rsc, pe_node_t * node, int limit) { pe_node_t *local_node = NULL; if (node == NULL && rsc->allowed_nodes) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) { can_run_instance(rsc, local_node, limit); } return NULL; } if (!node) { /* make clang analyzer happy */ goto bail; } else if (can_run_resources(node) == FALSE) { goto bail; } else if (is_set(rsc->flags, pe_rsc_orphan)) { goto bail; } local_node = parent_node_instance(rsc, node); if (local_node == NULL) { crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname); goto bail; } else if (local_node->weight < 0) { common_update_score(rsc, node->details->id, local_node->weight); pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.", rsc->id, node->details->uname); } else if (local_node->count < limit) { pe_rsc_trace(rsc, "%s can run on %s (already running %d)", rsc->id, node->details->uname, local_node->count); return local_node; } else { pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)", rsc->id, node->details->uname, local_node->count, limit); } bail: if (node) { common_update_score(rsc, node->details->id, -INFINITY); } return NULL; } static pe_node_t * allocate_instance(pe_resource_t *rsc, pe_node_t *prefer, gboolean all_coloc, int limit, pe_working_set_t *data_set) { pe_node_t *chosen = NULL; GHashTable *backup = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)", rsc->id, (prefer? prefer->details->uname: "none"), (all_coloc? "all" : "some")); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->fns->location(rsc, NULL, FALSE); } else if (is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } /* Only include positive colocation preferences of dependent resources * if not every node will get a copy of the clone */ append_parent_colocation(rsc->parent, rsc, all_coloc); if (prefer) { pe_node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (local_prefer == NULL || local_prefer->weight < 0) { pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id, prefer->details->uname); return NULL; } } can_run_instance(rsc, NULL, limit); backup = pcmk__copy_node_table(rsc->allowed_nodes); pe_rsc_trace(rsc, "Allocating instance %s", rsc->id); chosen = rsc->cmds->allocate(rsc, prefer, data_set); if (chosen && prefer && (chosen->details != prefer->details)) { crm_info("Not pre-allocating %s to %s because %s is better", rsc->id, prefer->details->uname, chosen->details->uname); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = backup; native_deallocate(rsc); chosen = NULL; backup = NULL; } if (chosen) { pe_node_t *local_node = parent_node_instance(rsc, chosen); if (local_node) { local_node->count++; } else if (is_set(rsc->flags, pe_rsc_managed)) { /* what to do? we can't enforce per-node limits in this case */ pcmk__config_err("%s not found in %s (list of %d)", chosen->details->id, rsc->parent->id, g_hash_table_size(rsc->parent->allowed_nodes)); } } if(backup) { g_hash_table_destroy(backup); } return chosen; } static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all) { GListPtr gIter = NULL; gIter = rsc->rsc_cons; for (; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *cons = (rsc_colocation_t *) gIter->data; if (cons->score == 0) { continue; } if (all || cons->score < 0 || cons->score == INFINITY) { child->rsc_cons = g_list_prepend(child->rsc_cons, cons); } } gIter = rsc->rsc_cons_lhs; for (; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *cons = (rsc_colocation_t *) gIter->data; if (cons->score == 0) { continue; } if (all || cons->score < 0) { child->rsc_cons_lhs = g_list_prepend(child->rsc_cons_lhs, cons); } } } void distribute_children(pe_resource_t *rsc, GListPtr children, GListPtr nodes, int max, int per_host_max, pe_working_set_t * data_set); void distribute_children(pe_resource_t *rsc, GListPtr children, GListPtr nodes, int max, int per_host_max, pe_working_set_t * data_set) { int loop_max = 0; int allocated = 0; int available_nodes = 0; /* count now tracks the number of clones currently allocated */ for(GListPtr nIter = nodes; nIter != NULL; nIter = nIter->next) { pe_node_t *node = nIter->data; node->count = 0; if (can_run_resources(node)) { available_nodes++; } } if(available_nodes) { loop_max = max / available_nodes; } if (loop_max < 1) { loop_max = 1; } pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)", max, rsc->id, available_nodes, per_host_max, loop_max); /* Pre-allocate as many instances as we can to their current location */ for (GListPtr gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (child->running_on && is_set(child->flags, pe_rsc_provisional) && is_not_set(child->flags, pe_rsc_failed)) { pe_node_t *child_node = pe__current_node(child); pe_node_t *local_node = parent_node_instance(child, child_node); pe_rsc_trace(rsc, "Checking pre-allocation of %s to %s (%d remaining of %d)", child->id, child_node->details->uname, max - allocated, max); if (can_run_resources(child_node) == FALSE || child_node->weight < 0) { pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s", child_node->details->uname, child->id); } else if(local_node && local_node->count >= loop_max) { pe_rsc_trace(rsc, "Not pre-allocating because %s already allocated optimal instances", child_node->details->uname); } else if (allocate_instance(child, child_node, max < available_nodes, per_host_max, data_set)) { pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id, child_node->details->uname); allocated++; } } } pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max); for (GListPtr gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (child->running_on != NULL) { pe_node_t *child_node = pe__current_node(child); pe_node_t *local_node = parent_node_instance(child, child_node); if (local_node == NULL) { crm_err("%s is running on %s which isn't allowed", child->id, child_node->details->uname); } } if (is_not_set(child->flags, pe_rsc_provisional)) { } else if (allocated >= max) { pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max); resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set); } else { if (allocate_instance(child, NULL, max < available_nodes, per_host_max, data_set)) { allocated++; } } } pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d", allocated, rsc->id, max); } pe_node_t * pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *prefer, pe_working_set_t *data_set) { GListPtr nodes = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return NULL; } else if (is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } if (is_set(rsc->flags, pe_rsc_promotable)) { apply_master_prefs(rsc); } set_bit(rsc->flags, pe_rsc_allocating); /* this information is used by sort_clone_instance() when deciding in which * order to allocate clone instances */ for (GListPtr gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score == 0) { continue; } pe_rsc_trace(rsc, "%s: Allocating %s first", rsc->id, constraint->rsc_rh->id); constraint->rsc_rh->cmds->allocate(constraint->rsc_rh, prefer, data_set); } for (GListPtr gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score == 0) { continue; } rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, (float)constraint->score / INFINITY, (pe_weights_rollback | pe_weights_positive)); } pe__show_node_weights(!show_scores, rsc, __FUNCTION__, rsc->allowed_nodes); nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = sort_nodes_by_weight(nodes, NULL, data_set); rsc->children = g_list_sort_with_data(rsc->children, sort_clone_instance, data_set); distribute_children(rsc, rsc->children, nodes, clone_data->clone_max, clone_data->clone_node_max, data_set); g_list_free(nodes); if (is_set(rsc->flags, pe_rsc_promotable)) { pcmk__set_instance_roles(rsc, data_set); } clear_bit(rsc->flags, pe_rsc_provisional); clear_bit(rsc->flags, pe_rsc_allocating); pe_rsc_trace(rsc, "Done allocating %s", rsc->id); return NULL; } static void clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting, gboolean * active) { GListPtr gIter = NULL; if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; clone_update_pseudo_status(child, stopping, starting, active); } return; } CRM_ASSERT(active != NULL); CRM_ASSERT(starting != NULL); CRM_ASSERT(stopping != NULL); if (rsc->running_on) { *active = TRUE; } gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (*starting && *stopping) { return; } else if (is_set(action->flags, pe_action_optional)) { pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid); continue; } else if (is_set(action->flags, pe_action_pseudo) == FALSE && is_set(action->flags, pe_action_runnable) == FALSE) { pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid); continue; } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) { pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid); *stopping = TRUE; } else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) { if (is_set(action->flags, pe_action_runnable) == FALSE) { pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d", action->uuid, is_set(action->flags, pe_action_runnable), is_set(action->flags, pe_action_pseudo)); } else { pe_rsc_trace(rsc, "Starting due to: %s", action->uuid); pe_rsc_trace(rsc, "%s run=%d, pseudo=%d", action->uuid, is_set(action->flags, pe_action_runnable), is_set(action->flags, pe_action_pseudo)); *starting = TRUE; } } } } static pe_action_t * find_rsc_action(pe_resource_t *rsc, const char *task, gboolean active_only, GList **list) { pe_action_t *match = NULL; GListPtr possible = NULL; GListPtr active = NULL; possible = pe__resource_actions(rsc, NULL, task, FALSE); if (active_only) { GListPtr gIter = possible; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *op = (pe_action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE) { active = g_list_prepend(active, op); } } if (active && pcmk__list_of_1(active)) { match = g_list_nth_data(active, 0); } if (list) { *list = active; active = NULL; } } else if (possible && pcmk__list_of_1(possible)) { match = g_list_nth_data(possible, 0); } if (list) { *list = possible; possible = NULL; } if (possible) { g_list_free(possible); } if (active) { g_list_free(active); } return match; } static void child_ordering_constraints(pe_resource_t * rsc, pe_working_set_t * data_set) { pe_action_t *stop = NULL; pe_action_t *start = NULL; pe_action_t *last_stop = NULL; pe_action_t *last_start = NULL; GListPtr gIter = NULL; gboolean active_only = TRUE; /* change to false to get the old behavior */ clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (clone_data->ordered == FALSE) { return; } /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, sort_rsc_id); for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; stop = find_rsc_action(child, RSC_STOP, active_only, NULL); if (stop) { if (last_stop) { /* child/child relative stop */ order_actions(stop, last_stop, pe_order_optional); } last_stop = stop; } start = find_rsc_action(child, RSC_START, active_only, NULL); if (start) { if (last_start) { /* child/child relative start */ order_actions(last_start, start, pe_order_optional); } last_start = start; } } } void clone_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); clone_create_pseudo_actions(rsc, rsc->children, &clone_data->start_notify, &clone_data->stop_notify,data_set); child_ordering_constraints(rsc, data_set); if (is_set(rsc->flags, pe_rsc_promotable)) { create_promotable_actions(rsc, data_set); } } void clone_create_pseudo_actions( pe_resource_t * rsc, GListPtr children, notify_data_t **start_notify, notify_data_t **stop_notify, pe_working_set_t * data_set) { gboolean child_active = FALSE; gboolean child_starting = FALSE; gboolean child_stopping = FALSE; gboolean allow_dependent_migrations = TRUE; pe_action_t *stop = NULL; pe_action_t *stopped = NULL; pe_action_t *start = NULL; pe_action_t *started = NULL; pe_rsc_trace(rsc, "Creating actions for %s", rsc->id); for (GListPtr gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; gboolean starting = FALSE; gboolean stopping = FALSE; child_rsc->cmds->create_actions(child_rsc, data_set); clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active); if (stopping && starting) { allow_dependent_migrations = FALSE; } child_stopping |= stopping; child_starting |= starting; } /* start */ start = create_pseudo_resource_op(rsc, RSC_START, !child_starting, TRUE, data_set); started = create_pseudo_resource_op(rsc, RSC_STARTED, !child_starting, FALSE, data_set); started->priority = INFINITY; if (child_active || child_starting) { update_action_flags(started, pe_action_runnable, __FUNCTION__, __LINE__); } if (start_notify != NULL && *start_notify == NULL) { *start_notify = create_notification_boundaries(rsc, RSC_START, start, started, data_set); } /* stop */ stop = create_pseudo_resource_op(rsc, RSC_STOP, !child_stopping, TRUE, data_set); stopped = create_pseudo_resource_op(rsc, RSC_STOPPED, !child_stopping, TRUE, data_set); stopped->priority = INFINITY; if (allow_dependent_migrations) { update_action_flags(stop, pe_action_migrate_runnable, __FUNCTION__, __LINE__); } if (stop_notify != NULL && *stop_notify == NULL) { *stop_notify = create_notification_boundaries(rsc, RSC_STOP, stop, stopped, data_set); if (start_notify && *start_notify && *stop_notify) { order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional); } } } void clone_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set) { pe_resource_t *last_rsc = NULL; GListPtr gIter; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id); new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set); new_rsc_order(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); new_rsc_order(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set); if (is_set(rsc->flags, pe_rsc_promotable)) { new_rsc_order(rsc, RSC_DEMOTED, rsc, RSC_STOP, pe_order_optional, data_set); new_rsc_order(rsc, RSC_STARTED, rsc, RSC_PROMOTE, pe_order_runnable_left, data_set); } if (clone_data->ordered) { /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, sort_rsc_id); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->internal_constraints(child_rsc, data_set); order_start_start(rsc, child_rsc, pe_order_runnable_left | pe_order_implies_first_printed); new_rsc_order(child_rsc, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); if (clone_data->ordered && last_rsc) { order_start_start(last_rsc, child_rsc, pe_order_optional); } order_stop_stop(rsc, child_rsc, pe_order_implies_first_printed); new_rsc_order(child_rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); if (clone_data->ordered && last_rsc) { order_stop_stop(child_rsc, last_rsc, pe_order_optional); } last_rsc = child_rsc; } if (is_set(rsc->flags, pe_rsc_promotable)) { promotable_constraints(rsc, data_set); } } bool assign_node(pe_resource_t * rsc, pe_node_t * node, gboolean force) { bool changed = FALSE; if (rsc->children) { for (GListPtr gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; changed |= assign_node(child_rsc, node, force); } return changed; } if (rsc->allocated_to != NULL) { changed = true; } native_assign_node(rsc, NULL, node, force); return changed; } gboolean is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current) { pe_node_t *node = NULL; enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); CRM_CHECK(child_rsc && local_node, return FALSE); if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { /* We only want instances that haven't failed */ node = child_rsc->fns->location(child_rsc, NULL, current); } if (filter != RSC_ROLE_UNKNOWN && next_role != filter) { crm_trace("Filtered %s", child_rsc->id); return FALSE; } if (node && (node->details == local_node->details)) { return TRUE; } else if (node) { crm_trace("%s - %s vs %s", child_rsc->id, node->details->uname, local_node->details->uname); } else { crm_trace("%s - not allocated %d", child_rsc->id, current); } return FALSE; } pe_resource_t * find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set) { pe_resource_t *pair = NULL; GListPtr gIter = NULL; GListPtr scratch = NULL; pe_node_t *local_node = NULL; local_node = local_child->fns->location(local_child, NULL, current); if (local_node) { return find_compatible_child_by_node(local_child, local_node, rsc, filter, current); } scratch = g_hash_table_get_values(local_child->allowed_nodes); scratch = sort_nodes_by_weight(scratch, NULL, data_set); gIter = scratch; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pair = find_compatible_child_by_node(local_child, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id); done: g_list_free(scratch); return pair; } void clone_rsc_colocation_lh(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, rsc_colocation_t *constraint, pe_working_set_t *data_set) { /* -- Never called -- * * Instead we add the colocation constraints to the child and call from there */ CRM_ASSERT(FALSE); } void clone_rsc_colocation_rh(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, rsc_colocation_t *constraint, pe_working_set_t *data_set) { GListPtr gIter = NULL; gboolean do_interleave = FALSE; const char *interleave_s = NULL; CRM_CHECK(constraint != NULL, return); CRM_CHECK(rsc_lh != NULL, pe_err("rsc_lh was NULL for %s", constraint->id); return); CRM_CHECK(rsc_rh != NULL, pe_err("rsc_rh was NULL for %s", constraint->id); return); CRM_CHECK(rsc_lh->variant == pe_native, return); if (constraint->score == 0) { return; } pe_rsc_trace(rsc_rh, "Processing constraint %s: %s -> %s %d", constraint->id, rsc_lh->id, rsc_rh->id, constraint->score); if (is_set(rsc_rh->flags, pe_rsc_promotable)) { if (is_set(rsc_rh->flags, pe_rsc_provisional)) { pe_rsc_trace(rsc_rh, "%s is still provisional", rsc_rh->id); return; } else if (constraint->role_rh == RSC_ROLE_UNKNOWN) { pe_rsc_trace(rsc_rh, "Handling %s as a clone colocation", constraint->id); } else { promotable_colocation_rh(rsc_lh, rsc_rh, constraint, data_set); return; } } /* only the LHS side needs to be labeled as interleave */ interleave_s = g_hash_table_lookup(constraint->rsc_lh->meta, XML_RSC_ATTR_INTERLEAVE); if(crm_is_true(interleave_s) && constraint->rsc_lh->variant > pe_group) { // TODO: Do we actually care about multiple RH copies sharing a LH copy anymore? if (copies_per_node(constraint->rsc_lh) != copies_per_node(constraint->rsc_rh)) { pcmk__config_err("Cannot interleave %s and %s because they do not " "support the same number of instances per node", constraint->rsc_lh->id, constraint->rsc_rh->id); } else { do_interleave = TRUE; } } if (is_set(rsc_rh->flags, pe_rsc_provisional)) { pe_rsc_trace(rsc_rh, "%s is still provisional", rsc_rh->id); return; } else if (do_interleave) { pe_resource_t *rh_child = NULL; rh_child = find_compatible_child(rsc_lh, rsc_rh, RSC_ROLE_UNKNOWN, FALSE, data_set); if (rh_child) { pe_rsc_debug(rsc_rh, "Pairing %s with %s", rsc_lh->id, rh_child->id); rsc_lh->cmds->rsc_colocation_lh(rsc_lh, rh_child, constraint, data_set); } else if (constraint->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", rsc_lh->id, rsc_rh->id); assign_node(rsc_lh, NULL, TRUE); } else { pe_rsc_debug(rsc_rh, "Cannot pair %s with instance of %s", rsc_lh->id, rsc_rh->id); } return; } else if (constraint->score >= INFINITY) { GListPtr rhs = NULL; gIter = rsc_rh->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { pe_rsc_trace(rsc_rh, "Allowing %s: %s %d", constraint->id, chosen->details->uname, chosen->weight); rhs = g_list_prepend(rhs, chosen); } } node_list_exclude(rsc_lh->allowed_nodes, rhs, FALSE); g_list_free(rhs); return; } gIter = rsc_rh->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint, data_set); } } enum action_tasks clone_child_action(pe_action_t * action) { enum action_tasks result = no_action; pe_resource_t *child = (pe_resource_t *) action->rsc->children->data; if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) { /* Find the action we're notifying about instead */ int stop = 0; char *key = action->uuid; int lpc = strlen(key); for (; lpc > 0; lpc--) { if (key[lpc] == '_' && stop == 0) { stop = lpc; } else if (key[lpc] == '_') { char *task_mutable = NULL; lpc++; task_mutable = strdup(key + lpc); task_mutable[stop - lpc] = 0; crm_trace("Extracted action '%s' from '%s'", task_mutable, key); result = get_complex_task(child, task_mutable, TRUE); free(task_mutable); break; } } } else { result = get_complex_task(child, action->task, TRUE); } return result; } enum pe_action_flags summary_action_flags(pe_action_t * action, GListPtr children, pe_node_t * node) { GListPtr gIter = NULL; gboolean any_runnable = FALSE; gboolean check_runnable = TRUE; enum action_tasks task = clone_child_action(action); enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); const char *task_s = task2text(task); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_action_t *child_action = NULL; pe_resource_t *child = (pe_resource_t *) gIter->data; child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node); pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id, node ? node->details->uname : "none", child_action?child_action->uuid:"NA"); if (child_action) { enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); if (is_set(flags, pe_action_optional) && is_set(child_flags, pe_action_optional) == FALSE) { pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid, child_action->uuid); - flags = crm_clear_bit(__FUNCTION__, __LINE__, action->rsc->id, flags, pe_action_optional); + flags = pcmk__clear_flags_as(__FUNCTION__, __LINE__, LOG_TRACE, + "Action summary", action->rsc->id, + flags, pe_action_optional, NULL); pe_clear_action_bit(action, pe_action_optional); } if (is_set(child_flags, pe_action_runnable)) { any_runnable = TRUE; } } } if (check_runnable && any_runnable == FALSE) { pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid); - flags = crm_clear_bit(__FUNCTION__, __LINE__, action->rsc->id, flags, pe_action_runnable); + flags = pcmk__clear_flags_as(__FUNCTION__, __LINE__, LOG_TRACE, + "Action summary", action->rsc->id, + flags, pe_action_runnable, NULL); if (node == NULL) { pe_clear_action_bit(action, pe_action_runnable); } } return flags; } enum pe_action_flags clone_action_flags(pe_action_t * action, pe_node_t * node) { return summary_action_flags(action, action->rsc->children, node); } void clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { GListPtr gIter = rsc->children; pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id); native_rsc_location(rsc, constraint); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_location(child_rsc, constraint); } } void clone_expand(pe_resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *op = (pe_action_t *) gIter->data; rsc->cmds->action_flags(op, NULL); } if (clone_data->start_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->start_notify); expand_notification_data(rsc, clone_data->start_notify, data_set); create_notifications(rsc, clone_data->start_notify, data_set); } if (clone_data->stop_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->stop_notify); expand_notification_data(rsc, clone_data->stop_notify, data_set); create_notifications(rsc, clone_data->stop_notify, data_set); } if (clone_data->promote_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->promote_notify); expand_notification_data(rsc, clone_data->promote_notify, data_set); create_notifications(rsc, clone_data->promote_notify, data_set); } if (clone_data->demote_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->demote_notify); expand_notification_data(rsc, clone_data->demote_notify, data_set); create_notifications(rsc, clone_data->demote_notify, data_set); } /* Now that the notifcations have been created we can expand the children */ gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } native_expand(rsc, data_set); /* The notifications are in the graph now, we can destroy the notify_data */ free_notification_data(clone_data->demote_notify); clone_data->demote_notify = NULL; free_notification_data(clone_data->stop_notify); clone_data->stop_notify = NULL; free_notification_data(clone_data->start_notify); clone_data->start_notify = NULL; free_notification_data(clone_data->promote_notify); clone_data->promote_notify = NULL; } // Check whether a resource or any of its children is known on node static bool rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node) { if (rsc->children) { for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; if (rsc_known_on(child, node)) { return TRUE; } } } else if (rsc->known_on) { GHashTableIter iter; pe_node_t *known_node = NULL; g_hash_table_iter_init(&iter, rsc->known_on); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) { if (node->details == known_node->details) { return TRUE; } } } return FALSE; } // Look for an instance of clone that is known on node static pe_resource_t * find_instance_on(const pe_resource_t *clone, const pe_node_t *node) { for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (rsc_known_on(child, node)) { return child; } } return NULL; } // For unique clones, probe each instance separately static gboolean probe_unique_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set) { gboolean any_created = FALSE; for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; any_created |= child->cmds->create_probe(child, node, complete, force, data_set); } return any_created; } // For anonymous clones, only a single instance needs to be probed static gboolean probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set) { // First, check if we probed an instance on this node last time pe_resource_t *child = find_instance_on(rsc, node); // Otherwise, check if we plan to start an instance on this node if (child == NULL) { for (GList *child_iter = rsc->children; child_iter && !child; child_iter = child_iter->next) { pe_node_t *local_node = NULL; pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data; if (child_rsc) { /* make clang analyzer happy */ local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); if (local_node && (local_node->details == node->details)) { child = child_rsc; } } } } // Otherwise, use the first clone instance if (child == NULL) { child = rsc->children->data; } CRM_ASSERT(child); return child->cmds->create_probe(child, node, complete, force, data_set); } gboolean clone_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete, gboolean force, pe_working_set_t * data_set) { gboolean any_created = FALSE; CRM_ASSERT(rsc); rsc->children = g_list_sort(rsc->children, sort_rsc_id); if (rsc->children == NULL) { pe_warn("Clone %s has no children", rsc->id); return FALSE; } if (rsc->exclusive_discover) { pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on * * remove the node from allowed_nodes so that the * notification contains only nodes that we might ever run * on */ g_hash_table_remove(rsc->allowed_nodes, node->details->id); /* Bit of a shortcut - might as well take it */ return FALSE; } } if (is_set(rsc->flags, pe_rsc_unique)) { any_created = probe_unique_clone(rsc, node, complete, force, data_set); } else { any_created = probe_anonymous_clone(rsc, node, complete, force, data_set); } return any_created; } void clone_append_meta(pe_resource_t * rsc, xmlNode * xml) { char *name = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); name = crm_meta_name(XML_RSC_ATTR_UNIQUE); crm_xml_add(xml, name, is_set(rsc->flags, pe_rsc_unique) ? "true" : "false"); free(name); name = crm_meta_name(XML_RSC_ATTR_NOTIFY); crm_xml_add(xml, name, is_set(rsc->flags, pe_rsc_notify) ? "true" : "false"); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX); crm_xml_add_int(xml, name, clone_data->clone_max); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX); crm_xml_add_int(xml, name, clone_data->clone_node_max); free(name); if (is_set(rsc->flags, pe_rsc_promotable)) { name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX); crm_xml_add_int(xml, name, clone_data->promoted_max); free(name); name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX); crm_xml_add_int(xml, name, clone_data->promoted_node_max); free(name); /* @COMPAT Maintain backward compatibility with resource agents that * expect the old names (deprecated since 2.0.0). */ name = crm_meta_name(XML_RSC_ATTR_MASTER_MAX); crm_xml_add_int(xml, name, clone_data->promoted_max); free(name); name = crm_meta_name(XML_RSC_ATTR_MASTER_NODEMAX); crm_xml_add_int(xml, name, clone_data->promoted_node_max); free(name); } } diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index f85150037d..1c88e77ed9 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,2844 +1,2848 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); static void unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container, pe_working_set_t * data_set, guint interval_ms); static xmlNode *find_rsc_op_entry_helper(pe_resource_t * rsc, const char *key, gboolean include_disabled); #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t * pe_rsc_action_details(pe_action_t *action) { pe_rsc_action_details_t *details; CRM_CHECK(action != NULL, return NULL); if (action->action_details == NULL) { action->action_details = calloc(1, sizeof(pe_rsc_action_details_t)); CRM_CHECK(action->action_details != NULL, return NULL); } details = (pe_rsc_action_details_t *) action->action_details; if (details->versioned_parameters == NULL) { details->versioned_parameters = create_xml_node(NULL, XML_TAG_OP_VER_ATTRS); } if (details->versioned_meta == NULL) { details->versioned_meta = create_xml_node(NULL, XML_TAG_OP_VER_META); } return details; } static void pe_free_rsc_action_details(pe_action_t *action) { pe_rsc_action_details_t *details; if ((action == NULL) || (action->action_details == NULL)) { return; } details = (pe_rsc_action_details_t *) action->action_details; if (details->versioned_parameters) { free_xml(details->versioned_parameters); } if (details->versioned_meta) { free_xml(details->versioned_meta); } action->action_details = NULL; } #endif /*! * \internal * \brief Check whether we can fence a particular node * * \param[in] data_set Working set for cluster * \param[in] node Name of node to check * * \return true if node can be fenced, false otherwise */ bool pe_can_fence(pe_working_set_t *data_set, pe_node_t *node) { if (pe__is_guest_node(node)) { /* Guest nodes are fenced by stopping their container resource. We can * do that if the container's host is either online or fenceable. */ pe_resource_t *rsc = node->details->remote_rsc->container; for (GList *n = rsc->running_on; n != NULL; n = n->next) { pe_node_t *container_node = n->data; if (!container_node->details->online && !pe_can_fence(data_set, container_node)) { return false; } } return true; } else if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) { return false; /* Turned off */ } else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) { return false; /* No devices */ } else if (is_set(data_set->flags, pe_flag_have_quorum)) { return true; } else if (data_set->no_quorum_policy == no_quorum_ignore) { return true; } else if(node == NULL) { return false; } else if(node->details->online) { crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname); return true; } crm_trace("Cannot fence %s", node->details->uname); return false; } /*! * \internal * \brief Copy a node object * * \param[in] this_node Node object to copy * * \return Newly allocated shallow copy of this_node * \note This function asserts on errors and is guaranteed to return non-NULL. */ pe_node_t * pe__copy_node(const pe_node_t *this_node) { pe_node_t *new_node = NULL; CRM_ASSERT(this_node != NULL); new_node = calloc(1, sizeof(pe_node_t)); CRM_ASSERT(new_node != NULL); new_node->rsc_discover_mode = this_node->rsc_discover_mode; new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ void node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores) { GHashTable *result = hash; pe_node_t *other_node = NULL; GListPtr gIter = list; GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { other_node = pe_find_node_id(list, node->details->id); if (other_node == NULL) { node->weight = -INFINITY; } else if (merge_scores) { node->weight = pe__add_scores(node->weight, other_node->weight); } } for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; other_node = pe_hash_table_lookup(result, node->details->id); if (other_node == NULL) { pe_node_t *new_node = pe__copy_node(node); new_node->weight = -INFINITY; g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } } } /*! * \internal * \brief Create a node hash table from a node list * * \param[in] list Node list * * \return Hash table equivalent of node list */ GHashTable * pe__node_list2table(GList *list) { GHashTable *result = NULL; result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); for (GList *gIter = list; gIter != NULL; gIter = gIter->next) { pe_node_t *new_node = pe__copy_node((pe_node_t *) gIter->data); g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } return result; } gint sort_node_uname(gconstpointer a, gconstpointer b) { return pcmk_numeric_strcasecmp(((const pe_node_t *) a)->details->uname, ((const pe_node_t *) b)->details->uname); } /*! * \internal * \brief Output node weights to stdout * * \param[in] rsc Use allowed nodes for this resource * \param[in] comment Text description to prefix lines with * \param[in] nodes If rsc is not specified, use these nodes */ static void pe__output_node_weights(pe_resource_t *rsc, const char *comment, GHashTable *nodes) { char score[128]; // Stack-allocated since this is called frequently // Sort the nodes so the output is consistent for regression tests GList *list = g_list_sort(g_hash_table_get_values(nodes), sort_node_uname); for (GList *gIter = list; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; score2char_stack(node->weight, score, sizeof(score)); if (rsc) { printf("%s: %s allocation score on %s: %s\n", comment, rsc->id, node->details->uname, score); } else { printf("%s: %s = %s\n", comment, node->details->uname, score); } } g_list_free(list); } /*! * \internal * \brief Log node weights at trace level * * \param[in] file Caller's filename * \param[in] function Caller's function name * \param[in] line Caller's line number * \param[in] rsc Use allowed nodes for this resource * \param[in] comment Text description to prefix lines with * \param[in] nodes If rsc is not specified, use these nodes */ static void pe__log_node_weights(const char *file, const char *function, int line, pe_resource_t *rsc, const char *comment, GHashTable *nodes) { GHashTableIter iter; pe_node_t *node = NULL; char score[128]; // Stack-allocated since this is called frequently // Don't waste time if we're not tracing at this point pcmk__log_else(LOG_TRACE, return); g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { score2char_stack(node->weight, score, sizeof(score)); if (rsc) { qb_log_from_external_source(function, file, "%s: %s allocation score on %s: %s", LOG_TRACE, line, 0, comment, rsc->id, node->details->uname, score); } else { qb_log_from_external_source(function, file, "%s: %s = %s", LOG_TRACE, line, 0, comment, node->details->uname, score); } } } /*! * \internal * \brief Log or output node weights * * \param[in] file Caller's filename * \param[in] function Caller's function name * \param[in] line Caller's line number * \param[in] to_log Log if true, otherwise output * \param[in] rsc Use allowed nodes for this resource * \param[in] comment Text description to prefix lines with * \param[in] nodes If rsc is not specified, use these nodes */ void pe__show_node_weights_as(const char *file, const char *function, int line, bool to_log, pe_resource_t *rsc, const char *comment, GHashTable *nodes) { if (rsc != NULL) { if (is_set(rsc->flags, pe_rsc_orphan)) { // Don't show allocation scores for orphans return; } nodes = rsc->allowed_nodes; } if (nodes == NULL) { // Nothing to show return; } if (to_log) { pe__log_node_weights(file, function, line, rsc, comment, nodes); } else { pe__output_node_weights(rsc, comment, nodes); } // If this resource has children, repeat recursively for each if (rsc && rsc->children) { for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; pe__show_node_weights_as(file, function, line, to_log, child, comment, nodes); } } } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; char *new_text = crm_strdup_printf("%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } void dump_node_capacity(int level, const char *comment, pe_node_t * node) { char *dump_text = crm_strdup_printf("%s: %s capacity:", comment, node->details->uname); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); if (level == LOG_STDOUT) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } void dump_rsc_utilization(int level, const char *comment, pe_resource_t * rsc, pe_node_t * node) { char *dump_text = crm_strdup_printf("%s: %s utilization on %s:", comment, rsc->id, node->details->uname); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); switch (level) { case LOG_STDOUT: fprintf(stdout, "%s\n", dump_text); break; case LOG_NEVER: break; default: crm_trace("%s", dump_text); } free(dump_text); } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->sort_index > resource2->sort_index) { return -1; } if (resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->priority > resource2->priority) { return -1; } if (resource1->priority < resource2->priority) { return 1; } return 0; } static enum pe_quorum_policy effective_quorum_policy(pe_resource_t *rsc, pe_working_set_t *data_set) { enum pe_quorum_policy policy = data_set->no_quorum_policy; if (is_set(data_set->flags, pe_flag_have_quorum)) { policy = no_quorum_ignore; } else if (data_set->no_quorum_policy == no_quorum_demote) { switch (rsc->role) { case RSC_ROLE_MASTER: case RSC_ROLE_SLAVE: if (rsc->next_role > RSC_ROLE_SLAVE) { rsc->next_role = RSC_ROLE_SLAVE; } policy = no_quorum_ignore; break; default: policy = no_quorum_stop; break; } } return policy; } pe_action_t * custom_action(pe_resource_t * rsc, char *key, const char *task, pe_node_t * on_node, gboolean optional, gboolean save_action, pe_working_set_t * data_set) { pe_action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, free(key); return NULL); if (save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } else if(save_action) { #if 0 action = g_hash_table_lookup(data_set->singletons, key); #else /* More expensive but takes 'node' into account */ possible_matches = find_actions(data_set->actions, key, on_node); #endif } if(data_set->singletons == NULL) { data_set->singletons = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); } if (possible_matches != NULL) { if (pcmk__list_of_multiple(possible_matches)) { pe_warn("Action %s for %s on %s exists %d times", task, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); pe_rsc_trace(rsc, "Found existing action %d (%s) for %s (%s) on %s", action->id, action->uuid, (rsc? rsc->id : "no resource"), task, (on_node? on_node->details->uname : "no node")); g_list_free(possible_matches); } if (action == NULL) { if (save_action) { pe_rsc_trace(rsc, "Creating %s action %d: %s for %s (%s) on %s", (optional? "optional" : "mandatory"), data_set->action_id, key, (rsc? rsc->id : "no resource"), task, (on_node? on_node->details->uname : "no node")); } action = calloc(1, sizeof(pe_action_t)); if (save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = strdup(task); if (on_node) { action->node = pe__copy_node(on_node); } action->uuid = strdup(key); if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { // Resource history deletion for a node can be done on the DC pe_set_action_bit(action, pe_action_dc); } pe_set_action_bit(action, pe_action_runnable); if (optional) { pe_set_action_bit(action, pe_action_optional); } else { pe_clear_action_bit(action, pe_action_optional); } action->extra = crm_str_table_new(); action->meta = crm_str_table_new(); if (save_action) { data_set->actions = g_list_prepend(data_set->actions, action); if(rsc == NULL) { g_hash_table_insert(data_set->singletons, action->uuid, action); } } if (rsc != NULL) { guint interval_ms = 0; action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE); parse_op_key(key, NULL, NULL, &interval_ms); unpack_operation(action, action->op_entry, rsc->container, data_set, interval_ms); if (save_action) { rsc->actions = g_list_prepend(rsc->actions, action); } } if (save_action) { pe_rsc_trace(rsc, "Action %d created", action->id); } } if (!optional && is_set(action->flags, pe_action_optional)) { pe_rsc_trace(rsc, "Unset optional on action %d", action->id); pe_clear_action_bit(action, pe_action_optional); } if (rsc != NULL) { enum action_tasks a_task = text2task(action->task); enum pe_quorum_policy quorum_policy = effective_quorum_policy(rsc, data_set); int warn_level = LOG_TRACE; if (save_action) { warn_level = LOG_WARNING; } if (is_set(action->flags, pe_action_have_node_attrs) == FALSE && action->node != NULL && action->op_entry != NULL) { pe_rule_eval_data_t rule_data = { .node_hash = action->node->details->attrs, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; pe_set_action_bit(action, pe_action_have_node_attrs); pe__unpack_dataset_nvpairs(action->op_entry, XML_TAG_ATTR_SETS, &rule_data, action->extra, NULL, FALSE, data_set); } if (is_set(action->flags, pe_action_pseudo)) { /* leave untouched */ } else if (action->node == NULL) { pe_rsc_trace(rsc, "Unset runnable on %s", action->uuid); pe_clear_action_bit(action, pe_action_runnable); } else if (is_not_set(rsc->flags, pe_rsc_managed) && g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS) == NULL) { crm_debug("Action %s (unmanaged)", action->uuid); pe_rsc_trace(rsc, "Set optional on %s", action->uuid); pe_set_action_bit(action, pe_action_optional); /* action->runnable = FALSE; */ } else if (is_not_set(action->flags, pe_action_dc) && !(action->node->details->online) && (!pe__is_guest_node(action->node) || action->node->details->remote_requires_reset)) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if (is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc && action->node->details->unclean == FALSE) { pe_fence_node(data_set, action->node, "resource actions are unrunnable", FALSE); } } else if (is_not_set(action->flags, pe_action_dc) && action->node->details->pending) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)", action->uuid, action->node->details->uname); } else if (action->needs == rsc_req_nothing) { pe_rsc_trace(rsc, "Action %s does not require anything", action->uuid); pe_action_set_reason(action, NULL, TRUE); if (pe__is_guest_node(action->node) && !pe_can_fence(data_set, action->node)) { /* An action that requires nothing usually does not require any * fencing in order to be runnable. However, there is an * exception: an action cannot be completed if it is on a guest * node whose host is unclean and cannot be fenced. */ pe_clear_action_bit(action, pe_action_runnable); crm_debug("%s\t%s (cancelled : host cannot be fenced)", action->node->details->uname, action->uuid); } else { pe_set_action_bit(action, pe_action_runnable); } #if 0 /* * No point checking this * - if we don't have quorum we can't stonith anyway */ } else if (action->needs == rsc_req_stonith) { crm_trace("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if (quorum_policy == no_quorum_stop) { pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "no quorum", pe_action_runnable, TRUE); crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if (quorum_policy == no_quorum_freeze) { pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role)); if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) { pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "quorum freeze", pe_action_runnable, TRUE); pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else if(is_not_set(action->flags, pe_action_runnable)) { pe_rsc_trace(rsc, "Action %s is runnable", action->uuid); //pe_action_set_reason(action, NULL, TRUE); pe_set_action_bit(action, pe_action_runnable); } if (save_action) { switch (a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if (is_set(action->flags, pe_action_runnable)) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } free(key); return action; } static bool valid_stop_on_fail(const char *value) { return !pcmk__strcase_any_of(value, "standby", "demote", "stop", NULL); } static const char * unpack_operation_on_fail(pe_action_t * action) { const char *name = NULL; const char *role = NULL; const char *on_fail = NULL; const char *interval_spec = NULL; const char *enabled = NULL; const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); if (pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei) && !valid_stop_on_fail(value)) { pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s stop " "action to default value because '%s' is not " "allowed for stop", action->rsc->id, value); return NULL; } else if (pcmk__str_eq(action->task, CRMD_ACTION_DEMOTE, pcmk__str_casei) && !value) { /* demote on_fail defaults to master monitor value if present */ xmlNode *operation = NULL; CRM_CHECK(action->rsc != NULL, return NULL); for (operation = __xml_first_child_element(action->rsc->ops_xml); operation && !value; operation = __xml_next_element(operation)) { if (!pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { continue; } name = crm_element_value(operation, "name"); role = crm_element_value(operation, "role"); on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL); enabled = crm_element_value(operation, "enabled"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (!on_fail) { continue; } else if (enabled && !crm_is_true(enabled)) { continue; } else if (!pcmk__str_eq(name, "monitor", pcmk__str_casei) || !pcmk__str_eq(role, "Master", pcmk__str_casei)) { continue; } else if (crm_parse_interval_spec(interval_spec) == 0) { continue; } else if (pcmk__str_eq(on_fail, "demote", pcmk__str_casei)) { continue; } value = on_fail; } } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { value = "ignore"; } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { name = crm_element_value(action->op_entry, "name"); role = crm_element_value(action->op_entry, "role"); on_fail = crm_element_value(action->op_entry, XML_OP_ATTR_ON_FAIL); interval_spec = crm_element_value(action->op_entry, XML_LRM_ATTR_INTERVAL); if (!pcmk__str_eq(name, CRMD_ACTION_PROMOTE, pcmk__str_casei) && (!pcmk__str_eq(name, CRMD_ACTION_STATUS, pcmk__str_casei) || !pcmk__str_eq(role, "Master", pcmk__str_casei) || (crm_parse_interval_spec(interval_spec) == 0))) { pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s %s " "action to default value because 'demote' is not " "allowed for it", action->rsc->id, name); return NULL; } } return value; } static xmlNode * find_min_interval_mon(pe_resource_t * rsc, gboolean include_disabled) { guint interval_ms = 0; guint min_interval_ms = G_MAXUINT; const char *name = NULL; const char *value = NULL; const char *interval_spec = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { name = crm_element_value(operation, "name"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } if (!pcmk__str_eq(name, RSC_STATUS, pcmk__str_casei)) { continue; } interval_ms = crm_parse_interval_spec(interval_spec); if (interval_ms && (interval_ms < min_interval_ms)) { min_interval_ms = interval_ms; op = operation; } } } return op; } static int unpack_start_delay(const char *value, GHashTable *meta) { int start_delay = 0; if (value != NULL) { start_delay = crm_get_msec(value); if (start_delay < 0) { start_delay = 0; } if (meta) { g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay)); } } return start_delay; } // true if value contains valid, non-NULL interval origin for recurring op static bool unpack_interval_origin(const char *value, xmlNode *xml_obj, guint interval_ms, crm_time_t *now, long long *start_delay) { long long result = 0; guint interval_sec = interval_ms / 1000; crm_time_t *origin = NULL; // Ignore unspecified values and non-recurring operations if ((value == NULL) || (interval_ms == 0) || (now == NULL)) { return false; } // Parse interval origin from text origin = crm_time_new(value); if (origin == NULL) { pcmk__config_err("Ignoring '" XML_OP_ATTR_ORIGIN "' for operation " "'%s' because '%s' is not valid", (ID(xml_obj)? ID(xml_obj) : "(missing ID)"), value); return false; } // Get seconds since origin (negative if origin is in the future) result = crm_time_get_seconds(now) - crm_time_get_seconds(origin); crm_time_free(origin); // Calculate seconds from closest interval to now result = result % interval_sec; // Calculate seconds remaining until next interval result = ((result <= 0)? 0 : interval_sec) - result; crm_info("Calculated a start delay of %llds for operation '%s'", result, (ID(xml_obj)? ID(xml_obj) : "(unspecified)")); if (start_delay != NULL) { *start_delay = result * 1000; // milliseconds } return true; } static int unpack_timeout(const char *value) { int timeout_ms = crm_get_msec(value); if (timeout_ms < 0) { timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); } return timeout_ms; } int pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set) { xmlNode *child = NULL; GHashTable *action_meta = NULL; const char *timeout_spec = NULL; int timeout_ms = 0; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { if (pcmk__str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME), pcmk__str_casei)) { timeout_spec = crm_element_value(child, XML_ATTR_TIMEOUT); break; } } if (timeout_spec == NULL && data_set->op_defaults) { action_meta = crm_str_table_new(); pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data, action_meta, NULL, FALSE, data_set); timeout_spec = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT); } // @TODO check meta-attributes (including versioned meta-attributes) // @TODO maybe use min-interval monitor timeout as default for monitors timeout_ms = crm_get_msec(timeout_spec); if (timeout_ms < 0) { timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); } if (action_meta != NULL) { g_hash_table_destroy(action_meta); } return timeout_ms; } #if ENABLE_VERSIONED_ATTRS static void unpack_versioned_meta(xmlNode *versioned_meta, xmlNode *xml_obj, guint interval_ms, crm_time_t *now) { xmlNode *attrs = NULL; xmlNode *attr = NULL; for (attrs = __xml_first_child_element(versioned_meta); attrs != NULL; attrs = __xml_next_element(attrs)) { for (attr = __xml_first_child_element(attrs); attr != NULL; attr = __xml_next_element(attr)) { const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); if (pcmk__str_eq(name, XML_OP_ATTR_START_DELAY, pcmk__str_casei)) { int start_delay = unpack_start_delay(value, NULL); crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay); } else if (pcmk__str_eq(name, XML_OP_ATTR_ORIGIN, pcmk__str_casei)) { long long start_delay = 0; if (unpack_interval_origin(value, xml_obj, interval_ms, now, &start_delay)) { crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_START_DELAY); crm_xml_add_ll(attr, XML_NVPAIR_ATTR_VALUE, start_delay); } } else if (pcmk__str_eq(name, XML_ATTR_TIMEOUT, pcmk__str_casei)) { int timeout_ms = unpack_timeout(value); crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, timeout_ms); } } } } #endif /*! * \brief Unpack operation XML into an action structure * * Unpack an operation's meta-attributes (normalizing the interval, timeout, * and start delay values as integer milliseconds), requirements, and * failure policy. * * \param[in,out] action Action to unpack into * \param[in] xml_obj Operation XML (or NULL if all defaults) * \param[in] container Resource that contains affected resource, if any * \param[in] data_set Cluster state * \param[in] interval_ms How frequently to perform the operation */ static void unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container, pe_working_set_t * data_set, guint interval_ms) { int timeout_ms = 0; const char *value = NULL; #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t *rsc_details = NULL; #endif pe_rsc_eval_data_t rsc_rule_data = { .standard = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_CLASS), .provider = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_PROVIDER), .agent = crm_element_value(action->rsc->xml, XML_EXPR_ATTR_TYPE) }; pe_op_eval_data_t op_rule_data = { .op_name = action->task, .interval = interval_ms }; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = &rsc_rule_data, .op_data = &op_rule_data }; CRM_CHECK(action && action->rsc, return); // Cluster-wide pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data, action->meta, NULL, FALSE, data_set); // Determine probe default timeout differently if (pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_casei) && (interval_ms == 0)) { xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); if (min_interval_mon) { value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); if (value) { crm_trace("\t%s: Setting default timeout to minimum-interval " "monitor's timeout '%s'", action->uuid, value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), strdup(value)); } } } if (xml_obj) { xmlAttrPtr xIter = NULL; // take precedence over defaults pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_META_SETS, &rule_data, action->meta, NULL, TRUE, data_set); #if ENABLE_VERSIONED_ATTRS rsc_details = pe_rsc_action_details(action); pe_eval_versioned_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, &rule_data, rsc_details->versioned_parameters, NULL); pe_eval_versioned_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, &rule_data, rsc_details->versioned_meta, NULL); #endif /* Anything set as an XML property has highest precedence. * This ensures we use the name and interval from the tag. */ for (xIter = xml_obj->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_obj, prop_name); g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value)); } } g_hash_table_remove(action->meta, "id"); // Normalize interval to milliseconds if (interval_ms > 0) { g_hash_table_replace(action->meta, strdup(XML_LRM_ATTR_INTERVAL), crm_strdup_printf("%u", interval_ms)); } else { g_hash_table_remove(action->meta, XML_LRM_ATTR_INTERVAL); } /* * Timeout order of precedence: * 1. pcmk_monitor_timeout (if rsc has pcmk_ra_cap_fence_params * and task is start or a probe; pcmk_monitor_timeout works * by default for a recurring monitor) * 2. explicit op timeout on the primitive * 3. default op timeout * a. if probe, then min-interval monitor's timeout * b. else, in XML_CIB_TAG_OPCONFIG * 4. CRM_DEFAULT_OP_TIMEOUT_S * * #1 overrides general rule of XML property having highest * precedence. */ if (is_set(pcmk_get_ra_caps(rsc_rule_data.standard), pcmk_ra_cap_fence_params) && (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei) || (pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_casei) && (interval_ms == 0))) && action->rsc->parameters) { value = g_hash_table_lookup(action->rsc->parameters, "pcmk_monitor_timeout"); if (value) { crm_trace("\t%s: Setting timeout to pcmk_monitor_timeout '%s', " "overriding default", action->uuid, value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), strdup(value)); } } // Normalize timeout to positive milliseconds value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT); timeout_ms = unpack_timeout(value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), crm_itoa(timeout_ms)); if (!pcmk__strcase_any_of(action->task, RSC_START, RSC_PROMOTE, NULL)) { action->needs = rsc_req_nothing; value = "nothing (not start/promote)"; } else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) { action->needs = rsc_req_stonith; value = "fencing (resource)"; } else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) { action->needs = rsc_req_quorum; value = "quorum (resource)"; } else { action->needs = rsc_req_nothing; value = "nothing (resource)"; } pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->uuid, value); value = unpack_operation_on_fail(action); if (value == NULL) { } else if (pcmk__str_eq(value, "block", pcmk__str_casei)) { action->on_fail = action_fail_block; g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block")); value = "block"; // The above could destroy the original string } else if (pcmk__str_eq(value, "fence", pcmk__str_casei)) { action->on_fail = action_fail_fence; value = "node fencing"; if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for " "operation '%s' to 'stop' because 'fence' is not " "valid when fencing is disabled", action->uuid); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if (pcmk__str_eq(value, "standby", pcmk__str_casei)) { action->on_fail = action_fail_standby; value = "node standby"; } else if (pcmk__strcase_any_of(value, "ignore", "nothing", NULL)) { action->on_fail = action_fail_ignore; value = "ignore"; } else if (pcmk__str_eq(value, "migrate", pcmk__str_casei)) { action->on_fail = action_fail_migrate; value = "force migration"; } else if (pcmk__str_eq(value, "stop", pcmk__str_casei)) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if (pcmk__str_eq(value, "restart", pcmk__str_casei)) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else if (pcmk__str_eq(value, "restart-container", pcmk__str_casei)) { if (container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate)"; } else { value = NULL; } } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { action->on_fail = action_fail_demote; value = "demote instance"; } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if (value == NULL && container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate) (default)"; /* For remote nodes, ensure that any failure that results in dropping an * active connection to the node results in fencing of the node. * * There are only two action failures that don't result in fencing. * 1. probes - probe failures are expected. * 2. start - a start failure indicates that an active connection does not already * exist. The user can set op on-fail=fence if they really want to fence start * failures. */ } else if (((value == NULL) || !is_set(action->rsc->flags, pe_rsc_managed)) && (pe__resource_is_remote_conn(action->rsc, data_set) && !(pcmk__str_eq(action->task, CRMD_ACTION_STATUS, pcmk__str_casei) && (interval_ms == 0)) && (!pcmk__str_eq(action->task, CRMD_ACTION_START, pcmk__str_casei)))) { if (!is_set(action->rsc->flags, pe_rsc_managed)) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop unmanaged remote node (enforcing default)"; } else { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { value = "fence remote node (default)"; } else { value = "recover remote node connection (default)"; } if (action->rsc->remote_reconnect_ms) { action->fail_role = RSC_ROLE_STOPPED; } action->on_fail = action_fail_reset_remote; } } else if (value == NULL && pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if (value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value); value = NULL; if (xml_obj != NULL) { value = g_hash_table_lookup(action->meta, "role_after_failure"); if (value) { pe_warn_once(pe_wo_role_after, "Support for role_after_failure is deprecated and will be removed in a future release"); } } if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if (action->fail_role == RSC_ROLE_UNKNOWN) { if (pcmk__str_eq(action->task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task, role2text(action->fail_role)); value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY); if (value) { unpack_start_delay(value, action->meta); } else { long long start_delay = 0; value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN); if (unpack_interval_origin(value, xml_obj, interval_ms, data_set->now, &start_delay)) { g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY), crm_strdup_printf("%lld", start_delay)); } } #if ENABLE_VERSIONED_ATTRS unpack_versioned_meta(rsc_details->versioned_meta, xml_obj, interval_ms, data_set->now); #endif } static xmlNode * find_rsc_op_entry_helper(pe_resource_t * rsc, const char *key, gboolean include_disabled) { guint interval_ms = 0; gboolean do_retry = TRUE; char *local_key = NULL; const char *name = NULL; const char *value = NULL; const char *interval_spec = NULL; char *match_key = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; retry: for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { name = crm_element_value(operation, "name"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } interval_ms = crm_parse_interval_spec(interval_spec); match_key = pcmk__op_key(rsc->id, name, interval_ms); if (pcmk__str_eq(key, match_key, pcmk__str_casei)) { op = operation; } free(match_key); if (rsc->clone_name) { match_key = pcmk__op_key(rsc->clone_name, name, interval_ms); if (pcmk__str_eq(key, match_key, pcmk__str_casei)) { op = operation; } free(match_key); } if (op != NULL) { free(local_key); return op; } } } free(local_key); if (do_retry == FALSE) { return NULL; } do_retry = FALSE; if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) { local_key = pcmk__op_key(rsc->id, "migrate", 0); key = local_key; goto retry; } else if (strstr(key, "_notify_")) { local_key = pcmk__op_key(rsc->id, "notify", 0); key = local_key; goto retry; } return NULL; } xmlNode * find_rsc_op_entry(pe_resource_t * rsc, const char *key) { return find_rsc_op_entry_helper(rsc, key, FALSE); } void print_node(const char *pre_text, pe_node_t * node, gboolean details) { if (node == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } CRM_ASSERT(node->details); crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", node->details->online ? "" : "Unavailable/Unclean ", node->details->uname, node->weight, node->fixed ? "True" : "False"); if (details) { int log_level = LOG_TRACE; char *pe_mutable = strdup("\t\t"); GListPtr gIter = node->details->running_rsc; crm_trace("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); free(pe_mutable); crm_trace("\t\t=== Resources"); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; rsc->fns->print(rsc, "\t\t", pe_print_log|pe_print_pending, &log_level); } } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_trace("%s%s %s ==> %s", user_data == NULL ? "" : (char *)user_data, user_data == NULL ? "" : ": ", (char *)key, (char *)value); } void pe_free_action(pe_action_t * action) { if (action == NULL) { return; } g_list_free_full(action->actions_before, free); /* pe_action_wrapper_t* */ g_list_free_full(action->actions_after, free); /* pe_action_wrapper_t* */ if (action->extra) { g_hash_table_destroy(action->extra); } if (action->meta) { g_hash_table_destroy(action->meta); } #if ENABLE_VERSIONED_ATTRS if (action->rsc) { pe_free_rsc_action_details(action); } #endif free(action->cancel_task); free(action->reason); free(action->task); free(action->uuid); free(action->node); free(action); } GListPtr find_recurring_actions(GListPtr input, pe_node_t * not_on_node) { const char *value = NULL; GListPtr result = NULL; GListPtr gIter = input; CRM_CHECK(input != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS); if (value == NULL) { /* skip */ } else if (pcmk__str_eq(value, "0", pcmk__str_casei)) { /* skip */ } else if (pcmk__str_eq(CRMD_ACTION_CANCEL, action->task, pcmk__str_casei)) { /* skip */ } else if (not_on_node == NULL) { crm_trace("(null) Found: %s", action->uuid); result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ } else if (action->node->details != not_on_node->details) { crm_trace("Found: %s", action->uuid); result = g_list_prepend(result, action); } } return result; } enum action_tasks get_complex_task(pe_resource_t * rsc, const char *name, gboolean allow_non_atomic) { enum action_tasks task = text2task(name); if (rsc == NULL) { return task; } else if (allow_non_atomic == FALSE || rsc->variant == pe_native) { switch (task) { case stopped_rsc: case started_rsc: case action_demoted: case action_promoted: crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id); return task - 1; default: break; } } return task; } pe_action_t * find_first_action(GListPtr input, const char *uuid, const char *task, pe_node_t * on_node) { GListPtr gIter = NULL; CRM_CHECK(uuid || task, return NULL); for (gIter = input; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (uuid != NULL && !pcmk__str_eq(uuid, action->uuid, pcmk__str_casei)) { continue; } else if (task != NULL && !pcmk__str_eq(task, action->task, pcmk__str_casei)) { continue; } else if (on_node == NULL) { return action; } else if (action->node == NULL) { continue; } else if (on_node->details == action->node->details) { return action; } } return NULL; } GListPtr find_actions(GListPtr input, const char *key, const pe_node_t *on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (!pcmk__str_eq(key, action->uuid, pcmk__str_casei)) { crm_trace("%s does not match action %s", key, action->uuid); continue; } else if (on_node == NULL) { crm_trace("Action %s matches (ignoring node)", key); result = g_list_prepend(result, action); } else if (action->node == NULL) { crm_trace("Action %s matches (unallocated, assigning to %s)", key, on_node->details->uname); action->node = pe__copy_node(on_node); result = g_list_prepend(result, action); } else if (on_node->details == action->node->details) { crm_trace("Action %s on %s matches", key, on_node->details->uname); result = g_list_prepend(result, action); } else { crm_trace("Action %s on node %s does not match requested node %s", key, action->node->details->uname, on_node->details->uname); } } return result; } GList * find_actions_exact(GList *input, const char *key, const pe_node_t *on_node) { GList *result = NULL; CRM_CHECK(key != NULL, return NULL); if (on_node == NULL) { crm_trace("Not searching for action %s because node not specified", key); return NULL; } for (GList *gIter = input; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (action->node == NULL) { crm_trace("Skipping comparison of %s vs action %s without node", key, action->uuid); } else if (!pcmk__str_eq(key, action->uuid, pcmk__str_casei)) { crm_trace("Desired action %s doesn't match %s", key, action->uuid); } else if (!pcmk__str_eq(on_node->details->id, action->node->details->id, pcmk__str_casei)) { crm_trace("Action %s desired node ID %s doesn't match %s", key, on_node->details->id, action->node->details->id); } else { crm_trace("Action %s matches", key); result = g_list_prepend(result, action); } } return result; } /*! * \brief Find all actions of given type for a resource * * \param[in] rsc Resource to search * \param[in] node Find only actions scheduled on this node * \param[in] task Action name to search for * \param[in] require_node If TRUE, NULL node or action node will not match * * \return List of actions found (or NULL if none) * \note If node is not NULL and require_node is FALSE, matching actions * without a node will be assigned to node. */ GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node) { GList *result = NULL; char *key = pcmk__op_key(rsc->id, task, 0); if (require_node) { result = find_actions_exact(rsc->actions, key, node); } else { result = find_actions(rsc->actions, key, node); } free(key); return result; } static void resource_node_score(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag) { pe_node_t *match = NULL; if ((rsc->exclusive_discover || (node->rsc_discover_mode == pe_discover_never)) && pcmk__str_eq(tag, "symmetric_default", pcmk__str_casei)) { /* This string comparision may be fragile, but exclusive resources and * exclusive nodes should not have the symmetric_default constraint * applied to them. */ return; } else if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; resource_node_score(child_rsc, node, score, tag); } } pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match == NULL) { match = pe__copy_node(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match); } match->weight = pe__add_scores(match->weight, score); } void resource_location(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag, pe_working_set_t * data_set) { if (node != NULL) { resource_node_score(rsc, node, score, tag); } else if (data_set != NULL) { GListPtr gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node_iter = (pe_node_t *) gIter->data; resource_node_score(rsc, node_iter, score, tag); } } else { GHashTableIter iter; pe_node_t *node_iter = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) { resource_node_score(rsc, node_iter, score, tag); } } if (node == NULL && score == -INFINITY) { if (rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int, why) do { \ free(a_uuid); \ free(b_uuid); \ crm_trace("%s (%d) %c %s (%d) : %s", \ a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \ b_xml_id, b_call_id, why); \ return an_int; \ } while(0) gint sort_op_by_callid(gconstpointer a, gconstpointer b) { int a_call_id = -1; int b_call_id = -1; char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID); if (pcmk__str_eq(a_xml_id, b_xml_id, pcmk__str_casei)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unlikely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why it's happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0, "duplicate"); } crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id); crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id); if (a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesn't matter since * stops are never pending */ sort_return(0, "pending"); } else if (a_call_id >= 0 && a_call_id < b_call_id) { sort_return(-1, "call id"); } else if (b_call_id >= 0 && a_call_id > b_call_id) { sort_return(1, "call id"); } else if (b_call_id >= 0 && a_call_id == b_call_id) { /* * The op and last_failed_op are the same * Order on last-rc-change */ time_t last_a = -1; time_t last_b = -1; crm_element_value_epoch(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a); crm_element_value_epoch(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b); crm_trace("rc-change: %lld vs %lld", (long long) last_a, (long long) last_b); if (last_a >= 0 && last_a < last_b) { sort_return(-1, "rc-change"); } else if (last_b >= 0 && last_a > last_b) { sort_return(1, "rc-change"); } sort_return(0, "rc-change"); } else { /* One of the inputs is a pending operation * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other */ int a_id = -1; int b_id = -1; const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic")); if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL, NULL)) { sort_return(0, "bad magic a"); } if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL, NULL)) { sort_return(0, "bad magic b"); } /* try to determine the relative age of the operation... * some pending operations (e.g. a start) may have been superseded * by a subsequent stop * * [a|b]_id == -1 means it's a shutdown operation and _always_ comes last */ if (!pcmk__str_eq(a_uuid, b_uuid, pcmk__str_casei) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesn't match then one better * be a pending operation. * pending operations don't survive between elections and joins * because we query the LRM directly */ if (b_call_id == -1) { sort_return(-1, "transition + call"); } else if (a_call_id == -1) { sort_return(1, "transition + call"); } } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) { sort_return(-1, "transition"); } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) { sort_return(1, "transition"); } } /* we should never end up here */ CRM_CHECK(FALSE, sort_return(0, "default")); } time_t get_effective_time(pe_working_set_t * data_set) { if(data_set) { if (data_set->now == NULL) { crm_trace("Recording a new 'now'"); data_set->now = crm_time_new(NULL); } return crm_time_get_seconds_since_epoch(data_set->now); } crm_trace("Defaulting to 'now'"); return time(NULL); } gboolean get_target_role(pe_resource_t * rsc, enum rsc_role_e * role) { enum rsc_role_e local_role = RSC_ROLE_UNKNOWN; const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); CRM_CHECK(role != NULL, return FALSE); if (pcmk__str_eq(value, "started", pcmk__str_null_matches | pcmk__str_casei) || pcmk__str_eq("default", value, pcmk__str_casei)) { return FALSE; } local_role = text2role(value); if (local_role == RSC_ROLE_UNKNOWN) { pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s " "because '%s' is not valid", rsc->id, value); return FALSE; } else if (local_role > RSC_ROLE_STARTED) { if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable)) { if (local_role > RSC_ROLE_SLAVE) { /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */ return FALSE; } } else { pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s " "because '%s' only makes sense for promotable " "clones", rsc->id, value); return FALSE; } } *role = local_role; return TRUE; } gboolean order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order) { GListPtr gIter = NULL; pe_action_wrapper_t *wrapper = NULL; GListPtr list = NULL; if (order == pe_order_none) { return FALSE; } if (lh_action == NULL || rh_action == NULL) { return FALSE; } crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid); /* Ensure we never create a dependency on ourselves... it's happened */ CRM_ASSERT(lh_action != rh_action); /* Filter dups, otherwise update_action_states() has too much work to do */ gIter = lh_action->actions_after; for (; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *after = (pe_action_wrapper_t *) gIter->data; if (after->action == rh_action && (after->type & order)) { return FALSE; } } wrapper = calloc(1, sizeof(pe_action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_prepend(list, wrapper); lh_action->actions_after = list; wrapper = NULL; /* order |= pe_order_implies_then; */ /* order ^= pe_order_implies_then; */ wrapper = calloc(1, sizeof(pe_action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_prepend(list, wrapper); rh_action->actions_before = list; return TRUE; } pe_action_t * get_pseudo_op(const char *name, pe_working_set_t * data_set) { pe_action_t *op = NULL; if(data_set->singletons) { op = g_hash_table_lookup(data_set->singletons, name); } if (op == NULL) { op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set); set_bit(op->flags, pe_action_pseudo); set_bit(op->flags, pe_action_runnable); } return op; } void destroy_ticket(gpointer data) { pe_ticket_t *ticket = data; if (ticket->state) { g_hash_table_destroy(ticket->state); } free(ticket->id); free(ticket); } pe_ticket_t * ticket_new(const char *ticket_id, pe_working_set_t * data_set) { pe_ticket_t *ticket = NULL; if (pcmk__str_empty(ticket_id)) { return NULL; } if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_ticket); } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = calloc(1, sizeof(pe_ticket_t)); if (ticket == NULL) { crm_err("Cannot allocate ticket '%s'", ticket_id); return NULL; } crm_trace("Creaing ticket entry for %s", ticket_id); ticket->id = strdup(ticket_id); ticket->granted = FALSE; ticket->last_granted = -1; ticket->standby = FALSE; ticket->state = crm_str_table_new(); g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket); } return ticket; } static void filter_parameters(xmlNode * param_set, const char *param_string, bool need_present) { if (param_set && param_string) { xmlAttrPtr xIter = param_set->properties; while (xIter) { const char *prop_name = (const char *)xIter->name; char *name = crm_strdup_printf(" %s ", prop_name); char *match = strstr(param_string, name); free(name); // Do now, because current entry might get removed below xIter = xIter->next; if (need_present && match == NULL) { crm_trace("%s not found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } else if (need_present == FALSE && match) { crm_trace("%s found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } } } } #if ENABLE_VERSIONED_ATTRS static void append_versioned_params(xmlNode *versioned_params, const char *ra_version, xmlNode *params) { GHashTable *hash = pe_unpack_versioned_parameters(versioned_params, ra_version); char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { crm_xml_add(params, key, value); } g_hash_table_destroy(hash); } #endif /*! * \internal * \brief Calculate action digests and store in node's digest cache * * \param[in] rsc Resource that action was for * \param[in] task Name of action performed * \param[in] key Action's task key * \param[in] node Node action was performed on * \param[in] xml_op XML of operation in CIB status (if available) * \param[in] calc_secure Whether to calculate secure digest * \param[in] data_set Cluster working set * * \return Pointer to node's digest cache entry */ static op_digest_cache_t * rsc_action_digest(pe_resource_t *rsc, const char *task, const char *key, pe_node_t *node, xmlNode *xml_op, bool calc_secure, pe_working_set_t *data_set) { op_digest_cache_t *data = NULL; data = g_hash_table_lookup(node->details->digest_cache, key); if (data == NULL) { GHashTable *local_rsc_params = crm_str_table_new(); pe_action_t *action = custom_action(rsc, strdup(key), task, node, TRUE, FALSE, data_set); #if ENABLE_VERSIONED_ATTRS xmlNode *local_versioned_params = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS); const char *ra_version = NULL; #endif const char *op_version; const char *restart_list = NULL; const char *secure_list = " passwd password "; data = calloc(1, sizeof(op_digest_cache_t)); CRM_ASSERT(data != NULL); get_rsc_attributes(local_rsc_params, rsc, node, data_set); #if ENABLE_VERSIONED_ATTRS pe_get_versioned_attributes(local_versioned_params, rsc, node, data_set); #endif data->params_all = create_xml_node(NULL, XML_TAG_PARAMS); // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside if (pe__add_bundle_remote_name(rsc, data->params_all, XML_RSC_ATTR_REMOTE_RA_ADDR)) { crm_trace("Set address for bundle connection %s (on %s)", rsc->id, node->details->uname); } g_hash_table_foreach(local_rsc_params, hash2field, data->params_all); g_hash_table_foreach(action->extra, hash2field, data->params_all); g_hash_table_foreach(rsc->parameters, hash2field, data->params_all); g_hash_table_foreach(action->meta, hash2metafield, data->params_all); if(xml_op) { secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE); restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); #if ENABLE_VERSIONED_ATTRS ra_version = crm_element_value(xml_op, XML_ATTR_RA_VERSION); #endif } else { op_version = CRM_FEATURE_SET; } #if ENABLE_VERSIONED_ATTRS append_versioned_params(local_versioned_params, ra_version, data->params_all); append_versioned_params(rsc->versioned_parameters, ra_version, data->params_all); { pe_rsc_action_details_t *details = pe_rsc_action_details(action); append_versioned_params(details->versioned_parameters, ra_version, data->params_all); } #endif pcmk__filter_op_for_digest(data->params_all); g_hash_table_destroy(local_rsc_params); pe_free_action(action); data->digest_all_calc = calculate_operation_digest(data->params_all, op_version); if (calc_secure) { data->params_secure = copy_xml(data->params_all); if(secure_list) { filter_parameters(data->params_secure, secure_list, FALSE); } data->digest_secure_calc = calculate_operation_digest(data->params_secure, op_version); } if(xml_op && crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST) != NULL) { data->params_restart = copy_xml(data->params_all); if (restart_list) { filter_parameters(data->params_restart, restart_list, TRUE); } data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version); } g_hash_table_insert(node->details->digest_cache, strdup(key), data); } return data; } op_digest_cache_t * rsc_action_digest_cmp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * node, pe_working_set_t * data_set) { op_digest_cache_t *data = NULL; char *key = NULL; guint interval_ms = 0; const char *op_version; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *digest_all; const char *digest_restart; CRM_ASSERT(node != NULL); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); key = pcmk__op_key(rsc->id, task, interval_ms); data = rsc_action_digest(rsc, task, key, node, xml_op, is_set(data_set->flags, pe_flag_sanitized), data_set); data->rc = RSC_DIGEST_MATCH; if (digest_restart && data->digest_restart_calc && strcmp(data->digest_restart_calc, digest_restart) != 0) { pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s", key, node->details->uname, crm_str(digest_restart), data->digest_restart_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); data->rc = RSC_DIGEST_RESTART; } else if (digest_all == NULL) { /* it is unknown what the previous op digest was */ data->rc = RSC_DIGEST_UNKNOWN; } else if (strcmp(digest_all, data->digest_all_calc) != 0) { pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (%s:%s) %s", key, node->details->uname, crm_str(digest_all), data->digest_all_calc, (interval_ms > 0)? "reschedule" : "reload", op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); data->rc = RSC_DIGEST_ALL; } free(key); return data; } /*! * \internal * \brief Create an unfencing summary for use in special node attribute * * Create a string combining a fence device's resource ID, agent type, and * parameter digest (whether for all parameters or just non-private parameters). * This can be stored in a special node attribute, allowing us to detect changes * in either the agent type or parameters, to know whether unfencing must be * redone or can be safely skipped when the device's history is cleaned. * * \param[in] rsc_id Fence device resource ID * \param[in] agent_type Fence device agent * \param[in] param_digest Fence device parameter digest * * \return Newly allocated string with unfencing digest * \note The caller is responsible for freeing the result. */ static inline char * create_unfencing_summary(const char *rsc_id, const char *agent_type, const char *param_digest) { return crm_strdup_printf("%s:%s:%s", rsc_id, agent_type, param_digest); } /*! * \internal * \brief Check whether a node can skip unfencing * * Check whether a fence device's current definition matches a node's * stored summary of when it was last unfenced by the device. * * \param[in] rsc_id Fence device's resource ID * \param[in] agent Fence device's agent type * \param[in] digest_calc Fence device's current parameter digest * \param[in] node_summary Value of node's special unfencing node attribute * (a comma-separated list of unfencing summaries for * all devices that have unfenced this node) * * \return TRUE if digest matches, FALSE otherwise */ static bool unfencing_digest_matches(const char *rsc_id, const char *agent, const char *digest_calc, const char *node_summary) { bool matches = FALSE; if (rsc_id && agent && digest_calc && node_summary) { char *search_secure = create_unfencing_summary(rsc_id, agent, digest_calc); /* The digest was calculated including the device ID and agent, * so there is no risk of collision using strstr(). */ matches = (strstr(node_summary, search_secure) != NULL); crm_trace("Calculated unfencing digest '%s' %sfound in '%s'", search_secure, matches? "" : "not ", node_summary); free(search_secure); } return matches; } /* Magic string to use as action name for digest cache entries used for * unfencing checks. This is not a real action name (i.e. "on"), so * check_action_definition() won't confuse these entries with real actions. */ #define STONITH_DIGEST_TASK "stonith-on" /*! * \internal * \brief Calculate fence device digests and digest comparison result * * \param[in] rsc Fence device resource * \param[in] agent Fence device's agent type * \param[in] node Node with digest cache to use * \param[in] data_set Cluster working set * * \return Node's digest cache entry */ static op_digest_cache_t * fencing_action_digest_cmp(pe_resource_t *rsc, const char *agent, pe_node_t *node, pe_working_set_t *data_set) { const char *node_summary = NULL; // Calculate device's current parameter digests char *key = pcmk__op_key(rsc->id, STONITH_DIGEST_TASK, 0); op_digest_cache_t *data = rsc_action_digest(rsc, STONITH_DIGEST_TASK, key, node, NULL, TRUE, data_set); free(key); // Check whether node has special unfencing summary node attribute node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_ALL); if (node_summary == NULL) { data->rc = RSC_DIGEST_UNKNOWN; return data; } // Check whether full parameter digest matches if (unfencing_digest_matches(rsc->id, agent, data->digest_all_calc, node_summary)) { data->rc = RSC_DIGEST_MATCH; return data; } // Check whether secure parameter digest matches node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_SECURE); if (unfencing_digest_matches(rsc->id, agent, data->digest_secure_calc, node_summary)) { data->rc = RSC_DIGEST_MATCH; if (is_set(data_set->flags, pe_flag_stdout)) { printf("Only 'private' parameters to %s for unfencing %s changed\n", rsc->id, node->details->uname); } return data; } // Parameters don't match data->rc = RSC_DIGEST_ALL; if (is_set(data_set->flags, (pe_flag_sanitized|pe_flag_stdout)) && data->digest_secure_calc) { char *digest = create_unfencing_summary(rsc->id, agent, data->digest_secure_calc); printf("Parameters to %s for unfencing %s changed, try '%s'\n", rsc->id, node->details->uname, digest); free(digest); } return data; } const char *rsc_printable_id(pe_resource_t *rsc) { if (is_not_set(rsc->flags, pe_rsc_unique)) { return ID(rsc->xml); } return rsc->id; } void clear_bit_recursive(pe_resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; clear_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; clear_bit_recursive(child_rsc, flag); } } void set_bit_recursive(pe_resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; set_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; set_bit_recursive(child_rsc, flag); } } static GListPtr find_unfencing_devices(GListPtr candidates, GListPtr matches) { for (GListPtr gIter = candidates; gIter != NULL; gIter = gIter->next) { pe_resource_t *candidate = gIter->data; const char *provides = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_PROVIDES); const char *requires = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_REQUIRES); if(candidate->children) { matches = find_unfencing_devices(candidate->children, matches); } else if (is_not_set(candidate->flags, pe_rsc_fence_device)) { continue; } else if (pcmk__str_eq(provides, "unfencing", pcmk__str_casei) || pcmk__str_eq(requires, "unfencing", pcmk__str_casei)) { matches = g_list_prepend(matches, candidate); } } return matches; } static int node_priority_fencing_delay(pe_node_t * node, pe_working_set_t * data_set) { int member_count = 0; int online_count = 0; int top_priority = 0; int lowest_priority = 0; GListPtr gIter = NULL; // `priority-fencing-delay` is disabled if (data_set->priority_fencing_delay <= 0) { return 0; } /* No need to request a delay if the fencing target is not a normal cluster * member, for example if it's a remote node or a guest node. */ if (node->details->type != node_member) { return 0; } // No need to request a delay if the fencing target is in our partition if (node->details->online) { return 0; } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *n = gIter->data; if (n->details->type != node_member) { continue; } member_count ++; if (n->details->online) { online_count++; } if (member_count == 1 || n->details->priority > top_priority) { top_priority = n->details->priority; } if (member_count == 1 || n->details->priority < lowest_priority) { lowest_priority = n->details->priority; } } // No need to delay if we have more than half of the cluster members if (online_count > member_count / 2) { return 0; } /* All the nodes have equal priority. * Any configured corresponding `pcmk_delay_base/max` will be applied. */ if (lowest_priority == top_priority) { return 0; } if (node->details->priority < top_priority) { return 0; } return data_set->priority_fencing_delay; } pe_action_t * pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t * data_set) { char *op_key = NULL; pe_action_t *stonith_op = NULL; if(op == NULL) { op = data_set->stonith_action; } op_key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op); if(data_set->singletons) { stonith_op = g_hash_table_lookup(data_set->singletons, op_key); } if(stonith_op == NULL) { stonith_op = custom_action(NULL, op_key, CRM_OP_FENCE, node, TRUE, TRUE, data_set); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param(stonith_op->meta, "stonith_action", op); if (pe__is_guest_or_remote_node(node) && is_set(data_set->flags, pe_flag_enable_unfencing)) { /* Extra work to detect device changes on remotes * * We may do this for all nodes in the future, but for now * the check_action_definition() based stuff works fine. */ long max = 1024; long digests_all_offset = 0; long digests_secure_offset = 0; char *digests_all = calloc(max, sizeof(char)); char *digests_secure = calloc(max, sizeof(char)); GListPtr matches = find_unfencing_devices(data_set->resources, NULL); for (GListPtr gIter = matches; gIter != NULL; gIter = gIter->next) { pe_resource_t *match = gIter->data; const char *agent = g_hash_table_lookup(match->meta, XML_ATTR_TYPE); op_digest_cache_t *data = NULL; data = fencing_action_digest_cmp(match, agent, node, data_set); if(data->rc == RSC_DIGEST_ALL) { optional = FALSE; crm_notice("Unfencing %s (remote): because the definition of %s changed", node->details->uname, match->id); if (is_set(data_set->flags, pe_flag_stdout)) { fprintf(stdout, " notice: Unfencing %s (remote): because the definition of %s changed\n", node->details->uname, match->id); } } digests_all_offset += snprintf( digests_all+digests_all_offset, max-digests_all_offset, "%s:%s:%s,", match->id, agent, data->digest_all_calc); digests_secure_offset += snprintf( digests_secure+digests_secure_offset, max-digests_secure_offset, "%s:%s:%s,", match->id, agent, data->digest_secure_calc); } g_hash_table_insert(stonith_op->meta, strdup(XML_OP_ATTR_DIGESTS_ALL), digests_all); g_hash_table_insert(stonith_op->meta, strdup(XML_OP_ATTR_DIGESTS_SECURE), digests_secure); } } else { free(op_key); } if (data_set->priority_fencing_delay > 0 /* It's a suitable case where `priority-fencing-delay` applies. * At least add `priority-fencing-delay` field as an indicator. */ && (priority_delay /* Re-calculate priority delay for the suitable case when * pe_fence_op() is called again by stage6() after node priority has * been actually calculated with native_add_running() */ || g_hash_table_lookup(stonith_op->meta, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY) != NULL)) { /* Add `priority-fencing-delay` to the fencing op even if it's 0 for * the targeting node. So that it takes precedence over any possible * `pcmk_delay_base/max`. */ char *delay_s = crm_itoa(node_priority_fencing_delay(node, data_set)); g_hash_table_insert(stonith_op->meta, strdup(XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY), delay_s); } if(optional == FALSE && pe_can_fence(data_set, node)) { pe_action_required(stonith_op, NULL, reason); } else if(reason && stonith_op->reason == NULL) { stonith_op->reason = strdup(reason); } return stonith_op; } void trigger_unfencing( pe_resource_t * rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t * data_set) { if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { /* No resources require it */ return; } else if (rsc != NULL && is_not_set(rsc->flags, pe_rsc_fence_device)) { /* Wasn't a stonith device */ return; } else if(node && node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { pe_action_t *unfence = pe_fence_op(node, "on", FALSE, reason, FALSE, data_set); if(dependency) { order_actions(unfence, dependency, pe_order_optional); } } else if(rsc) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { trigger_unfencing(rsc, node, reason, dependency, data_set); } } } } gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref) { pe_tag_t *tag = NULL; GListPtr gIter = NULL; gboolean is_existing = FALSE; CRM_CHECK(tags && tag_name && obj_ref, return FALSE); tag = g_hash_table_lookup(tags, tag_name); if (tag == NULL) { tag = calloc(1, sizeof(pe_tag_t)); if (tag == NULL) { return FALSE; } tag->id = strdup(tag_name); tag->refs = NULL; g_hash_table_insert(tags, strdup(tag_name), tag); } for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *existing_ref = (const char *) gIter->data; if (pcmk__str_eq(existing_ref, obj_ref, pcmk__str_none)){ is_existing = TRUE; break; } } if (is_existing == FALSE) { tag->refs = g_list_append(tag->refs, strdup(obj_ref)); crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref); } return TRUE; } void pe_action_set_flag_reason(const char *function, long line, pe_action_t *action, pe_action_t *reason, const char *text, enum pe_action_flags flags, bool overwrite) { bool unset = FALSE; bool update = FALSE; const char *change = NULL; if(is_set(flags, pe_action_runnable)) { unset = TRUE; change = "unrunnable"; } else if(is_set(flags, pe_action_optional)) { unset = TRUE; change = "required"; } else if(is_set(flags, pe_action_migrate_runnable)) { unset = TRUE; overwrite = TRUE; change = "unrunnable"; } else if(is_set(flags, pe_action_dangle)) { change = "dangling"; } else if(is_set(flags, pe_action_requires_any)) { change = "required"; } else { crm_err("Unknown flag change to %x by %s: 0x%s", flags, action->uuid, (reason? reason->uuid : "0")); } if(unset) { if(is_set(action->flags, flags)) { - action->flags = crm_clear_bit(function, line, action->uuid, action->flags, flags); + action->flags = pcmk__clear_flags_as(function, line, LOG_TRACE, + "Action", action->uuid, + action->flags, flags, NULL); update = TRUE; } } else { if(is_not_set(action->flags, flags)) { - action->flags = crm_set_bit(function, line, action->uuid, action->flags, flags); + action->flags = pcmk__set_flags_as(function, line, LOG_TRACE, + "Action", action->uuid, + action->flags, flags, NULL); update = TRUE; } } if((change && update) || text) { char *reason_text = NULL; if(reason == NULL) { pe_action_set_reason(action, text, overwrite); } else if(reason->rsc == NULL) { reason_text = crm_strdup_printf("%s %s%c %s", change, reason->task, text?':':0, text?text:""); } else { reason_text = crm_strdup_printf("%s %s %s%c %s", change, reason->rsc->id, reason->task, text?':':0, text?text:"NA"); } if(reason_text && action->rsc != reason->rsc) { pe_action_set_reason(action, reason_text, overwrite); } free(reason_text); } } void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite) { if (action->reason != NULL && overwrite) { pe_rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'", action->uuid, action->reason, crm_str(reason)); free(action->reason); } else if (action->reason == NULL) { pe_rsc_trace(action->rsc, "Set %s reason to '%s'", action->uuid, crm_str(reason)); } else { // crm_assert(action->reason != NULL && !overwrite); return; } if (reason != NULL) { action->reason = strdup(reason); } else { action->reason = NULL; } } /*! * \internal * \brief Check whether shutdown has been requested for a node * * \param[in] node Node to check * * \return TRUE if node has shutdown attribute set and nonzero, FALSE otherwise * \note This differs from simply using node->details->shutdown in that it can * be used before that has been determined (and in fact to determine it), * and it can also be used to distinguish requested shutdown from implicit * shutdown of remote nodes by virtue of their connection stopping. */ bool pe__shutdown_requested(pe_node_t *node) { const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); return !pcmk__str_eq(shutdown, "0", pcmk__str_null_matches); } /*! * \internal * \brief Update a data set's "recheck by" time * * \param[in] recheck Epoch time when recheck should happen * \param[in,out] data_set Current working set */ void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set) { if ((recheck > get_effective_time(data_set)) && ((data_set->recheck_by == 0) || (data_set->recheck_by > recheck))) { data_set->recheck_by = recheck; } } /*! * \internal * \brief Wrapper for pe_unpack_nvpairs() using a cluster working set */ void pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name, pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pe_working_set_t *data_set) { crm_time_t *next_change = crm_time_new_undefined(); pe_eval_nvpairs(data_set->input, xml_obj, set_name, rule_data, hash, always_first, overwrite, next_change); if (crm_time_is_defined(next_change)) { time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change); pe__update_recheck_time(recheck, data_set); } crm_time_free(next_change); } bool pe__resource_is_disabled(pe_resource_t *rsc) { const char *target_role = NULL; CRM_CHECK(rsc != NULL, return false); target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if (target_role) { enum rsc_role_e target_role_e = text2role(target_role); if ((target_role_e == RSC_ROLE_STOPPED) || ((target_role_e == RSC_ROLE_SLAVE) && is_set(uber_parent(rsc)->flags, pe_rsc_promotable))) { return true; } } return false; } /*! * \internal * \brief Create an action to clear a resource's history from CIB * * \param[in] rsc Resource to clear * \param[in] node Node to clear history on * * \return New action to clear resource history */ pe_action_t * pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { char *key = NULL; CRM_ASSERT(rsc && node); key = pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0); return custom_action(rsc, key, CRM_OP_LRM_DELETE, node, FALSE, TRUE, data_set); } bool pe__rsc_running_on_any_node_in_list(pe_resource_t *rsc, GListPtr node_list) { for (GListPtr ele = rsc->running_on; ele; ele = ele->next) { pe_node_t *node = (pe_node_t *) ele->data; if (pcmk__str_in_list(node_list, node->details->uname)) { return true; } } return false; } bool pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GListPtr only_node) { return (rsc->fns->active(rsc, FALSE) && !pe__rsc_running_on_any_node_in_list(rsc, only_node)); } GListPtr pe__filter_rsc_list(GListPtr rscs, GListPtr filter) { GListPtr retval = NULL; for (GListPtr gIter = rscs; gIter; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* I think the second condition is safe here for all callers of this * function. If not, it needs to move into pe__node_text. */ if (pcmk__str_in_list(filter, rsc_printable_id(rsc)) || (rsc->parent && pcmk__str_in_list(filter, rsc_printable_id(rsc->parent)))) { retval = g_list_prepend(retval, rsc); } } return retval; }