diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h index d93e212616..b99aa03f43 100644 --- a/include/crm/pengine/internal.h +++ b/include/crm/pengine/internal.h @@ -1,520 +1,536 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PE_INTERNAL__H # define PE_INTERNAL__H # include # include # include # include # define pe_rsc_info(rsc, fmt, args...) crm_log_tag(LOG_INFO, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_debug(rsc, fmt, args...) crm_log_tag(LOG_DEBUG, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_trace(rsc, fmt, args...) crm_log_tag(LOG_TRACE, rsc ? rsc->id : "", fmt, ##args) # define pe_err(fmt...) { was_processing_error = TRUE; crm_config_error = TRUE; crm_err(fmt); } # define pe_warn(fmt...) { was_processing_warning = TRUE; crm_config_warning = TRUE; crm_warn(fmt); } # define pe_proc_err(fmt...) { was_processing_error = TRUE; crm_err(fmt); } # define pe_proc_warn(fmt...) { was_processing_warning = TRUE; crm_warn(fmt); } #define pe__set_action_flags(action, flags_to_set) do { \ (action)->flags = pcmk__set_flags_as(__FUNCTION__, __LINE__, \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_set), \ #flags_to_set); \ } while (0) #define pe__clear_action_flags(action, flags_to_clear) do { \ (action)->flags = pcmk__clear_flags_as(__FUNCTION__, __LINE__, \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) +#define pe__set_raw_action_flags(action_flags, action_name, flags_to_set) do { \ + action_flags = pcmk__set_flags_as(__FUNCTION__, __LINE__, \ + LOG_TRACE, "Action", action_name, \ + (action_flags), \ + (flags_to_set), #flags_to_set); \ + } while (0) + +#define pe__clear_raw_action_flags(action_flags, action_name, flags_to_clear) do { \ + action_flags = pcmk__clear_flags_as(__FUNCTION__, __LINE__, \ + LOG_TRACE, \ + "Action", action_name, \ + (action_flags), \ + (flags_to_clear), \ + #flags_to_clear); \ + } while (0) + #define pe__set_action_flags_as(function, line, action, flags_to_set) do { \ (action)->flags = pcmk__set_flags_as((function), (line), \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_set), \ #flags_to_set); \ } while (0) #define pe__clear_action_flags_as(function, line, action, flags_to_clear) do { \ (action)->flags = pcmk__clear_flags_as((function), (line), \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) // Some warnings we don't want to print every transition enum pe_warn_once_e { pe_wo_blind = 0x0001, pe_wo_restart_type = 0x0002, pe_wo_role_after = 0x0004, pe_wo_poweroff = 0x0008, pe_wo_require_all = 0x0010, pe_wo_order_score = 0x0020, pe_wo_neg_threshold = 0x0040, }; extern uint32_t pe_wo; #define pe_warn_once(pe_wo_bit, fmt...) do { \ if (is_not_set(pe_wo, pe_wo_bit)) { \ if (pe_wo_bit == pe_wo_blind) { \ crm_warn(fmt); \ } else { \ pe_warn(fmt); \ } \ set_bit(pe_wo, pe_wo_bit); \ } \ } while (0); typedef struct pe__location_constraint_s { char *id; // Constraint XML ID pe_resource_t *rsc_lh; // Resource being located enum rsc_role_e role_filter; // Role to locate enum pe_discover_e discover_mode; // Resource discovery GListPtr node_list_rh; // List of pe_node_t* } pe__location_t; typedef struct pe__order_constraint_s { int id; enum pe_ordering type; void *lh_opaque; pe_resource_t *lh_rsc; pe_action_t *lh_action; char *lh_action_task; void *rh_opaque; pe_resource_t *rh_rsc; pe_action_t *rh_action; char *rh_action_task; } pe__ordering_t; typedef struct notify_data_s { GSList *keys; // Environment variable name/value pairs const char *action; pe_action_t *pre; pe_action_t *post; pe_action_t *pre_done; pe_action_t *post_done; GListPtr active; /* notify_entry_t* */ GListPtr inactive; /* notify_entry_t* */ GListPtr start; /* notify_entry_t* */ GListPtr stop; /* notify_entry_t* */ GListPtr demote; /* notify_entry_t* */ GListPtr promote; /* notify_entry_t* */ GListPtr master; /* notify_entry_t* */ GListPtr slave; /* notify_entry_t* */ GHashTable *allowed_nodes; } notify_data_t; bool pe_can_fence(pe_working_set_t *data_set, pe_node_t *node); int pe__add_scores(int score1, int score2); void add_hash_param(GHashTable * hash, const char *name, const char *value); char *native_parameter(pe_resource_t * rsc, pe_node_t * node, gboolean create, const char *name, pe_working_set_t * data_set); pe_node_t *native_location(const pe_resource_t *rsc, GList **list, int current); void pe_metadata(void); void verify_pe_options(GHashTable * options); void common_update_score(pe_resource_t * rsc, const char *id, int score); void native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set); gboolean native_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean group_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean clone_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set); pe_resource_t *native_find_rsc(pe_resource_t *rsc, const char *id, const pe_node_t *node, int flags); gboolean native_active(pe_resource_t * rsc, gboolean all); gboolean group_active(pe_resource_t * rsc, gboolean all); gboolean clone_active(pe_resource_t * rsc, gboolean all); gboolean pe__bundle_active(pe_resource_t *rsc, gboolean all); void native_print(pe_resource_t * rsc, const char *pre_text, long options, void *print_data); void group_print(pe_resource_t * rsc, const char *pre_text, long options, void *print_data); void clone_print(pe_resource_t * rsc, const char *pre_text, long options, void *print_data); void pe__print_bundle(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); int pe__name_and_nvpairs_xml(pcmk__output_t *out, bool is_list, const char *tag_name , size_t pairs_count, ...); char *pe__node_display_name(pe_node_t *node, bool print_detail); int pe__ban_html(pcmk__output_t *out, va_list args); int pe__ban_text(pcmk__output_t *out, va_list args); int pe__ban_xml(pcmk__output_t *out, va_list args); int pe__clone_xml(pcmk__output_t *out, va_list args); int pe__clone_html(pcmk__output_t *out, va_list args); int pe__clone_text(pcmk__output_t *out, va_list args); int pe__cluster_counts_html(pcmk__output_t *out, va_list args); int pe__cluster_counts_text(pcmk__output_t *out, va_list args); int pe__cluster_counts_xml(pcmk__output_t *out, va_list args); int pe__cluster_dc_html(pcmk__output_t *out, va_list args); int pe__cluster_dc_text(pcmk__output_t *out, va_list args); int pe__cluster_dc_xml(pcmk__output_t *out, va_list args); int pe__cluster_maint_mode_html(pcmk__output_t *out, va_list args); int pe__cluster_maint_mode_text(pcmk__output_t *out, va_list args); int pe__cluster_options_html(pcmk__output_t *out, va_list args); int pe__cluster_options_log(pcmk__output_t *out, va_list args); int pe__cluster_options_text(pcmk__output_t *out, va_list args); int pe__cluster_options_xml(pcmk__output_t *out, va_list args); int pe__cluster_stack_html(pcmk__output_t *out, va_list args); int pe__cluster_stack_text(pcmk__output_t *out, va_list args); int pe__cluster_stack_xml(pcmk__output_t *out, va_list args); int pe__cluster_summary(pcmk__output_t *out, va_list args); int pe__cluster_summary_html(pcmk__output_t *out, va_list args); int pe__cluster_times_html(pcmk__output_t *out, va_list args); int pe__cluster_times_xml(pcmk__output_t *out, va_list args); int pe__cluster_times_text(pcmk__output_t *out, va_list args); int pe__failed_action_text(pcmk__output_t *out, va_list args); int pe__failed_action_xml(pcmk__output_t *out, va_list args); int pe__group_xml(pcmk__output_t *out, va_list args); int pe__group_html(pcmk__output_t *out, va_list args); int pe__group_text(pcmk__output_t *out, va_list args); int pe__bundle_xml(pcmk__output_t *out, va_list args); int pe__bundle_html(pcmk__output_t *out, va_list args); int pe__bundle_text(pcmk__output_t *out, va_list args); int pe__node_html(pcmk__output_t *out, va_list args); int pe__node_text(pcmk__output_t *out, va_list args); int pe__node_xml(pcmk__output_t *out, va_list args); int pe__node_attribute_html(pcmk__output_t *out, va_list args); int pe__node_attribute_text(pcmk__output_t *out, va_list args); int pe__node_attribute_xml(pcmk__output_t *out, va_list args); int pe__node_list_html(pcmk__output_t *out, va_list args); int pe__node_list_text(pcmk__output_t *out, va_list args); int pe__node_list_xml(pcmk__output_t *out, va_list args); int pe__op_history_text(pcmk__output_t *out, va_list args); int pe__op_history_xml(pcmk__output_t *out, va_list args); int pe__resource_history_text(pcmk__output_t *out, va_list args); int pe__resource_history_xml(pcmk__output_t *out, va_list args); int pe__resource_xml(pcmk__output_t *out, va_list args); int pe__resource_html(pcmk__output_t *out, va_list args); int pe__resource_text(pcmk__output_t *out, va_list args); int pe__ticket_html(pcmk__output_t *out, va_list args); int pe__ticket_text(pcmk__output_t *out, va_list args); int pe__ticket_xml(pcmk__output_t *out, va_list args); void native_free(pe_resource_t * rsc); void group_free(pe_resource_t * rsc); void clone_free(pe_resource_t * rsc); void pe__free_bundle(pe_resource_t *rsc); enum rsc_role_e native_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e group_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e clone_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e pe__bundle_resource_state(const pe_resource_t *rsc, gboolean current); void pe__count_common(pe_resource_t *rsc); void pe__count_bundle(pe_resource_t *rsc); gboolean common_unpack(xmlNode * xml_obj, pe_resource_t ** rsc, pe_resource_t * parent, pe_working_set_t * data_set); void common_free(pe_resource_t * rsc); pe_node_t *pe__copy_node(const pe_node_t *this_node); extern time_t get_effective_time(pe_working_set_t * data_set); /* Failure handling utilities (from failcounts.c) */ // bit flags for fail count handling options enum pe_fc_flags_e { pe_fc_default = 0x00, pe_fc_effective = 0x01, // don't count expired failures pe_fc_fillers = 0x02, // if container, include filler failures in count }; int pe_get_failcount(pe_node_t *node, pe_resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set); pe_action_t *pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_working_set_t *data_set); /* Functions for finding/counting a resource's active nodes */ pe_node_t *pe__find_active_on(const pe_resource_t *rsc, unsigned int *count_all, unsigned int *count_clean); pe_node_t *pe__find_active_requires(const pe_resource_t *rsc, unsigned int *count); static inline pe_node_t * pe__current_node(const pe_resource_t *rsc) { return pe__find_active_on(rsc, NULL, NULL); } /* Binary like operators for lists of nodes */ extern void node_list_exclude(GHashTable * list, GListPtr list2, gboolean merge_scores); GHashTable *pe__node_list2table(GList *list); static inline gpointer pe_hash_table_lookup(GHashTable * hash, gconstpointer key) { if (hash) { return g_hash_table_lookup(hash, key); } return NULL; } extern pe_action_t *get_pseudo_op(const char *name, pe_working_set_t * data_set); extern gboolean order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order); /* Printing functions for debug */ extern void print_node(const char *pre_text, pe_node_t * node, gboolean details); extern void print_str_str(gpointer key, gpointer value, gpointer user_data); extern void pe__output_node(pe_node_t * node, gboolean details, pcmk__output_t *out); extern void dump_node_capacity(int level, const char *comment, pe_node_t * node); extern void dump_rsc_utilization(int level, const char *comment, pe_resource_t * rsc, pe_node_t * node); void pe__show_node_weights_as(const char *file, const char *function, int line, bool to_log, pe_resource_t *rsc, const char *comment, GHashTable *nodes); #define pe__show_node_weights(level, rsc, text, nodes) \ pe__show_node_weights_as(__FILE__, __FUNCTION__, __LINE__, \ (level), (rsc), (text), (nodes)) /* Sorting functions */ extern gint sort_rsc_priority(gconstpointer a, gconstpointer b); extern gint sort_rsc_index(gconstpointer a, gconstpointer b); extern xmlNode *find_rsc_op_entry(pe_resource_t * rsc, const char *key); extern pe_action_t *custom_action(pe_resource_t * rsc, char *key, const char *task, pe_node_t * on_node, gboolean optional, gboolean foo, pe_working_set_t * data_set); # define delete_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DELETE, 0) # define delete_action(rsc, node, optional) custom_action( \ rsc, delete_key(rsc), CRMD_ACTION_DELETE, node, \ optional, TRUE, data_set); # define stopped_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STOPPED, 0) # define stopped_action(rsc, node, optional) custom_action( \ rsc, stopped_key(rsc), CRMD_ACTION_STOPPED, node, \ optional, TRUE, data_set); # define stop_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STOP, 0) # define stop_action(rsc, node, optional) custom_action( \ rsc, stop_key(rsc), CRMD_ACTION_STOP, node, \ optional, TRUE, data_set); # define reload_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_RELOAD, 0) # define start_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_START, 0) # define start_action(rsc, node, optional) custom_action( \ rsc, start_key(rsc), CRMD_ACTION_START, node, \ optional, TRUE, data_set) # define started_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STARTED, 0) # define started_action(rsc, node, optional) custom_action( \ rsc, started_key(rsc), CRMD_ACTION_STARTED, node, \ optional, TRUE, data_set) # define promote_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_PROMOTE, 0) # define promote_action(rsc, node, optional) custom_action( \ rsc, promote_key(rsc), CRMD_ACTION_PROMOTE, node, \ optional, TRUE, data_set) # define promoted_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_PROMOTED, 0) # define promoted_action(rsc, node, optional) custom_action( \ rsc, promoted_key(rsc), CRMD_ACTION_PROMOTED, node, \ optional, TRUE, data_set) # define demote_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DEMOTE, 0) # define demote_action(rsc, node, optional) custom_action( \ rsc, demote_key(rsc), CRMD_ACTION_DEMOTE, node, \ optional, TRUE, data_set) # define demoted_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DEMOTED, 0) # define demoted_action(rsc, node, optional) custom_action( \ rsc, demoted_key(rsc), CRMD_ACTION_DEMOTED, node, \ optional, TRUE, data_set) extern int pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set); extern pe_action_t *find_first_action(GListPtr input, const char *uuid, const char *task, pe_node_t * on_node); extern enum action_tasks get_complex_task(pe_resource_t * rsc, const char *name, gboolean allow_non_atomic); extern GListPtr find_actions(GListPtr input, const char *key, const pe_node_t *on_node); GList *find_actions_exact(GList *input, const char *key, const pe_node_t *on_node); extern GListPtr find_recurring_actions(GListPtr input, pe_node_t * not_on_node); GList *pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node); extern void pe_free_action(pe_action_t * action); extern void resource_location(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag, pe_working_set_t * data_set); extern gint sort_op_by_callid(gconstpointer a, gconstpointer b); extern gboolean get_target_role(pe_resource_t * rsc, enum rsc_role_e *role); extern pe_resource_t *find_clone_instance(pe_resource_t * rsc, const char *sub_id, pe_working_set_t * data_set); extern void destroy_ticket(gpointer data); extern pe_ticket_t *ticket_new(const char *ticket_id, pe_working_set_t * data_set); // Resources for manipulating resource names const char *pe_base_name_end(const char *id); char *clone_strip(const char *last_rsc_id); char *clone_zero(const char *last_rsc_id); static inline bool pe_base_name_eq(pe_resource_t *rsc, const char *id) { if (id && rsc && rsc->id) { // Number of characters in rsc->id before any clone suffix size_t base_len = pe_base_name_end(rsc->id) - rsc->id + 1; return (strlen(id) == base_len) && !strncmp(id, rsc->id, base_len); } return FALSE; } int pe__target_rc_from_xml(xmlNode *xml_op); gint sort_node_uname(gconstpointer a, gconstpointer b); bool is_set_recursive(pe_resource_t * rsc, long long flag, bool any); enum rsc_digest_cmp_val { /*! Digests are the same */ RSC_DIGEST_MATCH = 0, /*! Params that require a restart changed */ RSC_DIGEST_RESTART, /*! Some parameter changed. */ RSC_DIGEST_ALL, /*! rsc op didn't have a digest associated with it, so * it is unknown if parameters changed or not. */ RSC_DIGEST_UNKNOWN, }; typedef struct op_digest_cache_s { enum rsc_digest_cmp_val rc; xmlNode *params_all; xmlNode *params_secure; xmlNode *params_restart; char *digest_all_calc; char *digest_secure_calc; char *digest_restart_calc; } op_digest_cache_t; op_digest_cache_t *rsc_action_digest_cmp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * node, pe_working_set_t * data_set); pe_action_t *pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t * data_set); void trigger_unfencing( pe_resource_t * rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t * data_set); void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite); void pe_action_set_flag_reason(const char *function, long line, pe_action_t *action, pe_action_t *reason, const char *text, enum pe_action_flags flags, bool overwrite); #define pe_action_required(action, reason, text) pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, reason, text, pe_action_optional, FALSE) #define pe_action_implies(action, reason, flag) pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, reason, NULL, flag, FALSE) void set_bit_recursive(pe_resource_t * rsc, unsigned long long flag); void clear_bit_recursive(pe_resource_t * rsc, unsigned long long flag); gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref); void print_rscs_brief(GListPtr rsc_list, const char * pre_text, long options, void * print_data, gboolean print_all); int pe__rscs_brief_output(pcmk__output_t *out, GListPtr rsc_list, long options, gboolean print_all); void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason, bool priority_delay); pe_node_t *pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); void common_print(pe_resource_t * rsc, const char *pre_text, const char *name, pe_node_t *node, long options, void *print_data); int pe__common_output_text(pcmk__output_t *out, pe_resource_t * rsc, const char *name, pe_node_t *node, long options); int pe__common_output_html(pcmk__output_t *out, pe_resource_t * rsc, const char *name, pe_node_t *node, long options); pe_resource_t *pe__find_bundle_replica(const pe_resource_t *bundle, const pe_node_t *node); bool pe__bundle_needs_remote_name(pe_resource_t *rsc); const char *pe__add_bundle_remote_name(pe_resource_t *rsc, xmlNode *xml, const char *field); const char *pe_node_attribute_calculated(const pe_node_t *node, const char *name, const pe_resource_t *rsc); const char *pe_node_attribute_raw(pe_node_t *node, const char *name); bool pe__is_universal_clone(pe_resource_t *rsc, pe_working_set_t *data_set); void pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set); void pe__foreach_param_check(pe_working_set_t *data_set, void (*cb)(pe_resource_t*, pe_node_t*, xmlNode*, enum pe_check_parameters, pe_working_set_t*)); void pe__free_param_checks(pe_working_set_t *data_set); bool pe__shutdown_requested(pe_node_t *node); void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set); #define BOOL2STR(x) ((x) ? "true" : "false") /*! * \internal * \brief Register xml formatting message functions. */ void pe__register_messages(pcmk__output_t *out); void pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name, pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pe_working_set_t *data_set); bool pe__resource_is_disabled(pe_resource_t *rsc); pe_action_t *pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set); GListPtr pe__rscs_with_tag(pe_working_set_t *data_set, const char *tag_name); GListPtr pe__unames_with_tag(pe_working_set_t *data_set, const char *tag_name); bool pe__rsc_has_tag(pe_working_set_t *data_set, const char *rsc, const char *tag); bool pe__uname_has_tag(pe_working_set_t *data_set, const char *node, const char *tag); bool pe__rsc_running_on_any_node_in_list(pe_resource_t *rsc, GListPtr node_list); GListPtr pe__filter_rsc_list(GListPtr rscs, GListPtr filter); bool pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GListPtr only_node); gboolean pe__bundle_is_filtered(pe_resource_t *rsc, GListPtr only_rsc, gboolean check_parent); gboolean pe__clone_is_filtered(pe_resource_t *rsc, GListPtr only_rsc, gboolean check_parent); gboolean pe__group_is_filtered(pe_resource_t *rsc, GListPtr only_rsc, gboolean check_parent); gboolean pe__native_is_filtered(pe_resource_t *rsc, GListPtr only_rsc, gboolean check_parent); #endif diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c index f4b571acaa..3ed0d0b32f 100644 --- a/lib/pacemaker/pcmk_sched_allocate.c +++ b/lib/pacemaker/pcmk_sched_allocate.c @@ -1,3053 +1,3053 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pacemaker); void set_alloc_actions(pe_working_set_t * data_set); extern void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set); extern gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set); static void apply_remote_node_ordering(pe_working_set_t *data_set); static enum remote_connection_state get_remote_node_state(pe_node_t *node); enum remote_connection_state { remote_state_unknown = 0, remote_state_alive = 1, remote_state_resting = 2, remote_state_failed = 3, remote_state_stopped = 4 }; static const char * state2text(enum remote_connection_state state) { switch (state) { case remote_state_unknown: return "unknown"; case remote_state_alive: return "alive"; case remote_state_resting: return "resting"; case remote_state_failed: return "failed"; case remote_state_stopped: return "stopped"; } return "impossible"; } resource_alloc_functions_t resource_class_alloc_functions[] = { { pcmk__native_merge_weights, pcmk__native_allocate, native_create_actions, native_create_probe, native_internal_constraints, native_rsc_colocation_lh, native_rsc_colocation_rh, native_rsc_location, native_action_flags, native_update_actions, native_expand, native_append_meta, }, { pcmk__group_merge_weights, pcmk__group_allocate, group_create_actions, native_create_probe, group_internal_constraints, group_rsc_colocation_lh, group_rsc_colocation_rh, group_rsc_location, group_action_flags, group_update_actions, group_expand, group_append_meta, }, { pcmk__native_merge_weights, pcmk__clone_allocate, clone_create_actions, clone_create_probe, clone_internal_constraints, clone_rsc_colocation_lh, clone_rsc_colocation_rh, clone_rsc_location, clone_action_flags, pcmk__multi_update_actions, clone_expand, clone_append_meta, }, { pcmk__native_merge_weights, pcmk__bundle_allocate, pcmk__bundle_create_actions, pcmk__bundle_create_probe, pcmk__bundle_internal_constraints, pcmk__bundle_rsc_colocation_lh, pcmk__bundle_rsc_colocation_rh, pcmk__bundle_rsc_location, pcmk__bundle_action_flags, pcmk__multi_update_actions, pcmk__bundle_expand, pcmk__bundle_append_meta, } }; gboolean update_action_flags(pe_action_t * action, enum pe_action_flags flags, const char *source, int line) { static unsigned long calls = 0; gboolean changed = FALSE; gboolean clear = is_set(flags, pe_action_clear); enum pe_action_flags last = action->flags; if (clear) { pe__clear_action_flags_as(source, line, action, flags); } else { pe__set_action_flags_as(source, line, action, flags); } if (last != action->flags) { calls++; changed = TRUE; /* Useful for tracking down _who_ changed a specific flag */ /* CRM_ASSERT(calls != 534); */ - clear_bit(flags, pe_action_clear); + pe__clear_raw_action_flags(flags, "action update", pe_action_clear); crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x, %lu, %s)", action->uuid, action->node ? action->node->details->uname : "[none]", clear ? "un-" : "", flags, last, action->flags, calls, source); } return changed; } static gboolean check_rsc_parameters(pe_resource_t * rsc, pe_node_t * node, xmlNode * rsc_entry, gboolean active_here, pe_working_set_t * data_set) { int attr_lpc = 0; gboolean force_restart = FALSE; gboolean delete_resource = FALSE; gboolean changed = FALSE; const char *value = NULL; const char *old_value = NULL; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; for (; attr_lpc < DIMOF(attr_list); attr_lpc++) { value = crm_element_value(rsc->xml, attr_list[attr_lpc]); old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]); if (value == old_value /* i.e. NULL */ || pcmk__str_eq(value, old_value, pcmk__str_none)) { continue; } changed = TRUE; trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set); if (active_here) { force_restart = TRUE; crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s", rsc->id, node->details->uname, attr_list[attr_lpc], crm_str(old_value), crm_str(value)); } } if (force_restart) { /* make sure the restart happens */ stop_action(rsc, node, FALSE); set_bit(rsc->flags, pe_rsc_start_pending); delete_resource = TRUE; } else if (changed) { delete_resource = TRUE; } return delete_resource; } static void CancelXmlOp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * active_node, const char *reason, pe_working_set_t * data_set) { guint interval_ms = 0; pe_action_t *cancel = NULL; const char *task = NULL; const char *call_id = NULL; CRM_CHECK(xml_op != NULL, return); CRM_CHECK(active_node != NULL, return); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); crm_info("Action " PCMK__OP_FMT " on %s will be stopped: %s", rsc->id, task, interval_ms, active_node->details->uname, (reason? reason : "unknown")); cancel = pe_cancel_op(rsc, task, interval_ms, active_node, data_set); add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set); } static gboolean check_action_definition(pe_resource_t * rsc, pe_node_t * active_node, xmlNode * xml_op, pe_working_set_t * data_set) { char *key = NULL; guint interval_ms = 0; const op_digest_cache_t *digest_data = NULL; gboolean did_change = FALSE; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *digest_secure = NULL; CRM_CHECK(active_node != NULL, return FALSE); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if (interval_ms > 0) { xmlNode *op_match = NULL; /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = pcmk__op_key(rsc->id, task, interval_ms); pe_rsc_trace(rsc, "Checking parameters for %s", key); op_match = find_rsc_op_entry(rsc, key); if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) { CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set); free(key); return TRUE; } else if (op_match == NULL) { pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname); free(key); return TRUE; } free(key); key = NULL; } crm_trace("Testing " PCMK__OP_FMT " on %s", rsc->id, task, interval_ms, active_node->details->uname); if ((interval_ms == 0) && pcmk__str_eq(task, RSC_STATUS, pcmk__str_casei)) { /* Reload based on the start action not a probe */ task = RSC_START; } else if ((interval_ms == 0) && pcmk__str_eq(task, RSC_MIGRATED, pcmk__str_casei)) { /* Reload based on the start action not a migrate */ task = RSC_START; } else if ((interval_ms == 0) && pcmk__str_eq(task, RSC_PROMOTE, pcmk__str_casei)) { /* Reload based on the start action not a promote */ task = RSC_START; } digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set); if(is_set(data_set->flags, pe_flag_sanitized)) { digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST); } if(digest_data->rc != RSC_DIGEST_MATCH && digest_secure && digest_data->digest_secure_calc && strcmp(digest_data->digest_secure_calc, digest_secure) == 0) { if (is_set(data_set->flags, pe_flag_stdout)) { printf("Only 'private' parameters to " PCMK__OP_FMT " on %s changed: %s\n", rsc->id, task, interval_ms, active_node->details->uname, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); } } else if (digest_data->rc == RSC_DIGEST_RESTART) { /* Changes that force a restart */ pe_action_t *required = NULL; did_change = TRUE; key = pcmk__op_key(rsc->id, task, interval_ms); crm_log_xml_info(digest_data->params_restart, "params:restart"); required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set); pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL, "resource definition change", pe_action_optional, TRUE); trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set); } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) { /* Changes that can potentially be handled by a reload */ const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); did_change = TRUE; trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set); crm_log_xml_info(digest_data->params_all, "params:reload"); key = pcmk__op_key(rsc->id, task, interval_ms); if (interval_ms > 0) { pe_action_t *op = NULL; #if 0 /* Always reload/restart the entire resource */ ReloadRsc(rsc, active_node, data_set); #else /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */ op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set); - set_bit(op->flags, pe_action_reschedule); + pe__set_action_flags(op, pe_action_reschedule); #endif } else if (digest_restart) { pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id); /* Reload this resource */ ReloadRsc(rsc, active_node, data_set); free(key); } else { pe_action_t *required = NULL; pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id); /* Re-send the start/demote/promote op * Recurring ops will be detected independently */ required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set); pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL, "resource definition change", pe_action_optional, TRUE); } } return did_change; } /*! * \internal * \brief Do deferred action checks after allocation * * \param[in] data_set Working set for cluster */ static void check_params(pe_resource_t *rsc, pe_node_t *node, xmlNode *rsc_op, enum pe_check_parameters check, pe_working_set_t *data_set) { const char *reason = NULL; op_digest_cache_t *digest_data = NULL; switch (check) { case pe_check_active: if (check_action_definition(rsc, node, rsc_op, data_set) && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { reason = "action definition changed"; } break; case pe_check_last_failure: digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, data_set); switch (digest_data->rc) { case RSC_DIGEST_UNKNOWN: crm_trace("Resource %s history entry %s on %s has no digest to compare", rsc->id, ID(rsc_op), node->details->id); break; case RSC_DIGEST_MATCH: break; default: reason = "resource parameters have changed"; break; } break; } if (reason) { pe__clear_failcount(rsc, node, reason, data_set); } } static void check_actions_for(xmlNode * rsc_entry, pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set) { GListPtr gIter = NULL; int offset = -1; int stop_index = 0; int start_index = 0; const char *task = NULL; xmlNode *rsc_op = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; CRM_CHECK(node != NULL, return); if (is_set(rsc->flags, pe_rsc_orphan)) { pe_resource_t *parent = uber_parent(rsc); if(parent == NULL || pe_rsc_is_clone(parent) == FALSE || is_set(parent->flags, pe_rsc_unique)) { pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id); DeleteRsc(rsc, node, FALSE, data_set); } else { pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id); } return; } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s", rsc->id, node->details->uname); return; } pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname); if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { op_list = g_list_prepend(op_list, rsc_op); } } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; offset++; if (start_index < stop_index) { /* stopped */ continue; } else if (offset < start_index) { /* action occurred prior to a start */ continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((interval_ms > 0) && (is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) { // Maintenance mode cancels recurring operations CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set); } else if ((interval_ms > 0) || pcmk__strcase_any_of(task, RSC_STATUS, RSC_START, RSC_PROMOTE, RSC_MIGRATED, NULL)) { /* If a resource operation failed, and the operation's definition * has changed, clear any fail count so they can be retried fresh. */ if (pe__bundle_needs_remote_name(rsc)) { /* We haven't allocated resources to nodes yet, so if the * REMOTE_CONTAINER_HACK is used, we may calculate the digest * based on the literal "#uname" value rather than the properly * substituted value. That would mistakenly make the action * definition appear to have been changed. Defer the check until * later in this case. */ pe__add_param_check(rsc_op, rsc, node, pe_check_active, data_set); } else if (check_action_definition(rsc, node, rsc_op, data_set) && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { pe__clear_failcount(rsc, node, "action definition changed", data_set); } } } g_list_free(sorted_op_list); } static GListPtr find_rsc_list(GListPtr result, pe_resource_t * rsc, const char *id, gboolean renamed_clones, gboolean partial, pe_working_set_t * data_set) { GListPtr gIter = NULL; gboolean match = FALSE; if (id == NULL) { return NULL; } if (rsc == NULL) { if (data_set == NULL) { return NULL; } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } return result; } if (partial) { if (strstr(rsc->id, id)) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) { match = TRUE; } } else { if (strcmp(rsc->id, id) == 0) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) { match = TRUE; } } if (match) { result = g_list_prepend(result, rsc); } if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } } return result; } static void check_actions(pe_working_set_t * data_set) { const char *id = NULL; pe_node_t *node = NULL; xmlNode *lrm_rscs = NULL; xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); xmlNode *node_state = NULL; for (node_state = __xml_first_child_element(status); node_state != NULL; node_state = __xml_next_element(node_state)) { if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { id = crm_element_value(node_state, XML_ATTR_ID); lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE); node = pe_find_node_id(data_set->nodes, id); if (node == NULL) { continue; /* Still need to check actions for a maintenance node to cancel existing monitor operations */ } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) { crm_trace("Skipping param check for %s: can't run resources", node->details->uname); continue; } crm_trace("Processing node %s", node->details->uname); if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child_element(lrm_rscs); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { if (pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { if (xml_has_children(rsc_entry)) { GListPtr gIter = NULL; GListPtr result = NULL; const char *rsc_id = ID(rsc_entry); CRM_CHECK(rsc_id != NULL, return); result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set); for (gIter = result; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (rsc->variant != pe_native) { continue; } check_actions_for(rsc_entry, rsc, node, data_set); } g_list_free(result); } } } } } } } static void apply_placement_constraints(pe_working_set_t * data_set) { for (GList *gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { pe__location_t *cons = gIter->data; cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons); } } static gboolean failcount_clear_action_exists(pe_node_t * node, pe_resource_t * rsc) { gboolean rc = FALSE; GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE); if (list) { rc = TRUE; } g_list_free(list); return rc; } /*! * \internal * \brief Force resource away if failures hit migration threshold * * \param[in,out] rsc Resource to check for failures * \param[in,out] node Node to check for failures * \param[in,out] data_set Cluster working set to update */ static void check_migration_threshold(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { int fail_count, countdown; pe_resource_t *failed; /* Migration threshold of 0 means never force away */ if (rsc->migration_threshold == 0) { return; } // If we're ignoring failures, also ignore the migration threshold if (is_set(rsc->flags, pe_rsc_failure_ignored)) { return; } /* If there are no failures, there's no need to force away */ fail_count = pe_get_failcount(node, rsc, NULL, pe_fc_effective|pe_fc_fillers, NULL, data_set); if (fail_count <= 0) { return; } /* How many more times recovery will be tried on this node */ countdown = QB_MAX(rsc->migration_threshold - fail_count, 0); /* If failed resource has a parent, we'll force the parent away */ failed = rsc; if (is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } if (countdown == 0) { resource_location(failed, node, -INFINITY, "__fail_limit__", data_set); crm_warn("Forcing %s away from %s after %d failures (max=%d)", failed->id, node->details->uname, fail_count, rsc->migration_threshold); } else { crm_info("%s can fail %d more times on %s before being forced off", failed->id, countdown, node->details->uname); } } static void common_apply_stickiness(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set) { if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; common_apply_stickiness(child_rsc, node, data_set); } return; } if (is_set(rsc->flags, pe_rsc_managed) && rsc->stickiness != 0 && pcmk__list_of_1(rsc->running_on)) { pe_node_t *current = pe_find_node_id(rsc->running_on, node->details->id); pe_node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (current == NULL) { } else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) { pe_resource_t *sticky_rsc = rsc; resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set); pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location" " (node=%s, weight=%d)", sticky_rsc->id, node->details->uname, rsc->stickiness); } else { GHashTableIter iter; pe_node_t *nIter = NULL; pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric" " and node %s is not explicitly allowed", rsc->id, node->details->uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) { crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight); } } } /* Check the migration threshold only if a failcount clear action * has not already been placed for this resource on the node. * There is no sense in potentially forcing the resource from this * node if the failcount is being reset anyway. * * @TODO A clear_failcount operation can be scheduled in stage4() via * check_actions_for(), or in stage5() via check_params(). This runs in * stage2(), so it cannot detect those, meaning we might check the migration * threshold when we shouldn't -- worst case, we stop or move the resource, * then move it back next transition. */ if (failcount_clear_action_exists(node, rsc) == FALSE) { check_migration_threshold(rsc, node, data_set); } } void complex_set_cmds(pe_resource_t * rsc) { GListPtr gIter = rsc->children; rsc->cmds = &resource_class_alloc_functions[rsc->variant]; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; complex_set_cmds(child_rsc); } } void set_alloc_actions(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; complex_set_cmds(rsc); } } static void calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data) { const char *key = (const char *)gKey; const char *value = (const char *)gValue; int *system_health = (int *)user_data; if (!gKey || !gValue || !user_data) { return; } if (pcmk__starts_with(key, "#health")) { int score; /* Convert the value into an integer */ score = char2score(value); /* Add it to the running total */ *system_health = pe__add_scores(score, *system_health); } } static gboolean apply_system_health(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy"); int base_health = 0; if (pcmk__str_eq(health_strategy, "none", pcmk__str_null_matches | pcmk__str_casei)) { /* Prevent any accidental health -> score translation */ pcmk__score_red = 0; pcmk__score_yellow = 0; pcmk__score_green = 0; return TRUE; } else if (pcmk__str_eq(health_strategy, "migrate-on-red", pcmk__str_casei)) { /* Resources on nodes which have health values of red are * weighted away from that node. */ pcmk__score_red = -INFINITY; pcmk__score_yellow = 0; pcmk__score_green = 0; } else if (pcmk__str_eq(health_strategy, "only-green", pcmk__str_casei)) { /* Resources on nodes which have health values of red or yellow * are forced away from that node. */ pcmk__score_red = -INFINITY; pcmk__score_yellow = -INFINITY; pcmk__score_green = 0; } else if (pcmk__str_eq(health_strategy, "progressive", pcmk__str_casei)) { /* Same as the above, but use the r/y/g scores provided by the user * Defaults are provided by the pe_prefs table * Also, custom health "base score" can be used */ base_health = crm_parse_int(pe_pref(data_set->config_hash, "node-health-base"), "0"); } else if (pcmk__str_eq(health_strategy, "custom", pcmk__str_casei)) { /* Requires the admin to configure the rsc_location constaints for * processing the stored health scores */ /* TODO: Check for the existence of appropriate node health constraints */ return TRUE; } else { crm_err("Unknown node health strategy: %s", health_strategy); return FALSE; } crm_info("Applying automated node health strategy: %s", health_strategy); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { int system_health = base_health; pe_node_t *node = (pe_node_t *) gIter->data; /* Search through the node hash table for system health entries. */ g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health); crm_info(" Node %s has an combined system health of %d", node->details->uname, system_health); /* If the health is non-zero, then create a new rsc2node so that the * weight will be added later on. */ if (system_health != 0) { GListPtr gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set); } } } return TRUE; } gboolean stage0(pe_working_set_t * data_set) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); if (data_set->input == NULL) { return FALSE; } if (is_set(data_set->flags, pe_flag_have_status) == FALSE) { crm_trace("Calculating status"); cluster_status(data_set); } set_alloc_actions(data_set); apply_system_health(data_set); unpack_constraints(cib_constraints, data_set); return TRUE; } /* * Check nodes for resources started outside of the LRM */ gboolean probe_resources(pe_working_set_t * data_set) { pe_action_t *probe_node_complete = NULL; for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED); if (node->details->online == FALSE) { if (pe__is_remote_node(node) && node->details->remote_rsc && (get_remote_node_state(node) == remote_state_failed)) { pe_fence_node(data_set, node, "the connection is unrecoverable", FALSE); } continue; } else if (node->details->unclean) { continue; } else if (node->details->rsc_discovery_enabled == FALSE) { /* resource discovery is disabled for this node */ continue; } if (probed != NULL && crm_is_true(probed) == FALSE) { pe_action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname), CRM_OP_REPROBE, node, FALSE, TRUE, data_set); add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); continue; } for (GListPtr gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set); } } return TRUE; } static void rsc_discover_filter(pe_resource_t *rsc, pe_node_t *node) { GListPtr gIter = rsc->children; pe_resource_t *top = uber_parent(rsc); pe_node_t *match; if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) { return; } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; rsc_discover_filter(child_rsc, node); } match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match && match->rsc_discover_mode != pe_discover_exclusive) { match->weight = -INFINITY; } } static time_t shutdown_time(pe_node_t *node, pe_working_set_t *data_set) { const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); time_t result = 0; if (shutdown) { errno = 0; result = (time_t) crm_parse_ll(shutdown, NULL); if (errno != 0) { result = 0; } } return result? result : get_effective_time(data_set); } static void apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set) { const char *class; // Only primitives and (uncloned) groups may be locked if (rsc->variant == pe_group) { for (GList *item = rsc->children; item != NULL; item = item->next) { apply_shutdown_lock((pe_resource_t *) item->data, data_set); } } else if (rsc->variant != pe_native) { return; } // Fence devices and remote connections can't be locked class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches) || pe__resource_is_remote_conn(rsc, data_set)) { return; } if (rsc->lock_node != NULL) { // The lock was obtained from resource history if (rsc->running_on != NULL) { /* The resource was started elsewhere even though it is now * considered locked. This shouldn't be possible, but as a * failsafe, we don't want to disturb the resource now. */ pe_rsc_info(rsc, "Cancelling shutdown lock because %s is already active", rsc->id); pe__clear_resource_history(rsc, rsc->lock_node, data_set); rsc->lock_node = NULL; rsc->lock_time = 0; } // Only a resource active on exactly one node can be locked } else if (pcmk__list_of_1(rsc->running_on)) { pe_node_t *node = rsc->running_on->data; if (node->details->shutdown) { if (node->details->unclean) { pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown", rsc->id, node->details->uname); } else { rsc->lock_node = node; rsc->lock_time = shutdown_time(node, data_set); } } } if (rsc->lock_node == NULL) { // No lock needed return; } if (data_set->shutdown_lock > 0) { time_t lock_expiration = rsc->lock_time + data_set->shutdown_lock; pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)", rsc->id, rsc->lock_node->details->uname, (long long) lock_expiration); pe__update_recheck_time(++lock_expiration, data_set); } else { pe_rsc_info(rsc, "Locking %s to %s due to shutdown", rsc->id, rsc->lock_node->details->uname); } // If resource is locked to one node, ban it from all other nodes for (GList *item = data_set->nodes; item != NULL; item = item->next) { pe_node_t *node = item->data; if (strcmp(node->details->uname, rsc->lock_node->details->uname)) { resource_location(rsc, node, -CRM_SCORE_INFINITY, XML_CONFIG_ATTR_SHUTDOWN_LOCK, data_set); } } } /* * \internal * \brief Stage 2 of cluster status: apply node-specific criteria * * Count known nodes, and apply location constraints, stickiness, and exclusive * resource discovery. */ gboolean stage2(pe_working_set_t * data_set) { GListPtr gIter = NULL; if (is_set(data_set->flags, pe_flag_shutdown_lock)) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { apply_shutdown_lock((pe_resource_t *) gIter->data, data_set); } } if (is_not_set(data_set->flags, pe_flag_no_compat)) { // @COMPAT API backward compatibility for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node && (node->weight >= 0) && node->details->online && (node->details->type != node_ping)) { data_set->max_valid_nodes++; } } } crm_trace("Applying placement constraints"); apply_placement_constraints(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { GListPtr gIter2 = NULL; pe_node_t *node = (pe_node_t *) gIter->data; gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; common_apply_stickiness(rsc, node, data_set); rsc_discover_filter(rsc, node); } } return TRUE; } /* * Create internal resource constraints before allocation */ gboolean stage3(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; rsc->cmds->internal_constraints(rsc, data_set); } return TRUE; } /* * Check for orphaned or redefined actions */ gboolean stage4(pe_working_set_t * data_set) { check_actions(data_set); return TRUE; } static void * convert_const_pointer(const void *ptr) { /* Worst function ever */ return (void *)ptr; } static gint sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data) { int rc = 0; int r1_weight = -INFINITY; int r2_weight = -INFINITY; const char *reason = "existence"; const GListPtr nodes = (GListPtr) data; const pe_resource_t *resource1 = a; const pe_resource_t *resource2 = b; pe_node_t *r1_node = NULL; pe_node_t *r2_node = NULL; GListPtr gIter = NULL; GHashTable *r1_nodes = NULL; GHashTable *r2_nodes = NULL; reason = "priority"; r1_weight = resource1->priority; r2_weight = resource2->priority; if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "no node list"; if (nodes == NULL) { goto done; } r1_nodes = pcmk__native_merge_weights(convert_const_pointer(resource1), resource1->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); pe__show_node_weights(true, NULL, resource1->id, r1_nodes); r2_nodes = pcmk__native_merge_weights(convert_const_pointer(resource2), resource2->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); pe__show_node_weights(true, NULL, resource2->id, r2_nodes); /* Current location score */ reason = "current location"; r1_weight = -INFINITY; r2_weight = -INFINITY; if (resource1->running_on) { r1_node = pe__current_node(resource1); r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id); if (r1_node != NULL) { r1_weight = r1_node->weight; } } if (resource2->running_on) { r2_node = pe__current_node(resource2); r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id); if (r2_node != NULL) { r2_weight = r2_node->weight; } } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "score"; for (gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; r1_node = NULL; r2_node = NULL; r1_weight = -INFINITY; if (r1_nodes) { r1_node = g_hash_table_lookup(r1_nodes, node->details->id); } if (r1_node) { r1_weight = r1_node->weight; } r2_weight = -INFINITY; if (r2_nodes) { r2_node = g_hash_table_lookup(r2_nodes, node->details->id); } if (r2_node) { r2_weight = r2_node->weight; } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } } done: crm_trace("%s (%d) on %s %c %s (%d) on %s: %s", resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a", rc < 0 ? '>' : rc > 0 ? '<' : '=', resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason); if (r1_nodes) { g_hash_table_destroy(r1_nodes); } if (r2_nodes) { g_hash_table_destroy(r2_nodes); } return rc; } static void allocate_resources(pe_working_set_t * data_set) { GListPtr gIter = NULL; if (is_set(data_set->flags, pe_flag_have_remote_nodes)) { /* Allocate remote connection resources first (which will also allocate * any colocation dependencies). If the connection is migrating, always * prefer the partial migration target. */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (rsc->is_remote_node == FALSE) { continue; } pe_rsc_trace(rsc, "Allocating remote connection resource '%s'", rsc->id); rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set); } } /* now do the rest of the resources */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (rsc->is_remote_node == TRUE) { continue; } pe_rsc_trace(rsc, "Allocating %s resource '%s'", crm_element_name(rsc->xml), rsc->id); rsc->cmds->allocate(rsc, NULL, data_set); } } /* We always use pe_order_preserve with these convenience functions to exempt * internally generated constraints from the prohibition of user constraints * involving remote connection resources. * * The start ordering additionally uses pe_order_runnable_left so that the * specified action is not runnable if the start is not runnable. */ static inline void order_start_then_action(pe_resource_t *lh_rsc, pe_action_t *rh_action, enum pe_ordering extra, pe_working_set_t *data_set) { if (lh_rsc && rh_action && data_set) { custom_action_order(lh_rsc, start_key(lh_rsc), NULL, rh_action->rsc, NULL, rh_action, pe_order_preserve | pe_order_runnable_left | extra, data_set); } } static inline void order_action_then_stop(pe_action_t *lh_action, pe_resource_t *rh_rsc, enum pe_ordering extra, pe_working_set_t *data_set) { if (lh_action && rh_rsc && data_set) { custom_action_order(lh_action->rsc, NULL, lh_action, rh_rsc, stop_key(rh_rsc), NULL, pe_order_preserve | extra, data_set); } } // Clear fail counts for orphaned rsc on all online nodes static void cleanup_orphans(pe_resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node->details->online && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, data_set)) { pe_action_t *clear_op = NULL; clear_op = pe__clear_failcount(rsc, node, "it is orphaned", data_set); /* We can't use order_action_then_stop() here because its * pe_order_preserve breaks things */ custom_action_order(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc), NULL, pe_order_optional, data_set); } } } gboolean stage5(pe_working_set_t * data_set) { GListPtr gIter = NULL; int log_prio = show_utilization? LOG_STDOUT : LOG_TRACE; if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) { GListPtr nodes = g_list_copy(data_set->nodes); nodes = sort_nodes_by_weight(nodes, NULL, data_set); data_set->resources = g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes); g_list_free(nodes); } gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; dump_node_capacity(log_prio, "Original", node); } crm_trace("Allocating services"); /* Take (next) highest resource, assign it and create its actions */ allocate_resources(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; dump_node_capacity(log_prio, "Remaining", node); } // Process deferred action checks pe__foreach_param_check(data_set, check_params); pe__free_param_checks(data_set); if (is_set(data_set->flags, pe_flag_startup_probes)) { crm_trace("Calculating needed probes"); /* This code probably needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=100: With probes: ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints 36s ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph Without probes: ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph */ probe_resources(data_set); } crm_trace("Handle orphans"); if (is_set(data_set->flags, pe_flag_stop_rsc_orphans)) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* There's no need to recurse into rsc->children because those * should just be unallocated clone instances. */ if (is_set(rsc->flags, pe_rsc_orphan)) { cleanup_orphans(rsc, data_set); } } } crm_trace("Creating actions"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; rsc->cmds->create_actions(rsc, data_set); } crm_trace("Creating done"); return TRUE; } static gboolean is_managed(const pe_resource_t * rsc) { GListPtr gIter = rsc->children; if (is_set(rsc->flags, pe_rsc_managed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if (is_managed(child_rsc)) { return TRUE; } } return FALSE; } static gboolean any_managed_resources(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (is_managed(rsc)) { return TRUE; } } return FALSE; } /*! * \internal * \brief Create pseudo-op for guest node fence, and order relative to it * * \param[in] node Guest node to fence * \param[in] data_set Working set of CIB state */ static void fence_guest(pe_node_t *node, pe_working_set_t *data_set) { pe_resource_t *container = node->details->remote_rsc->container; pe_action_t *stop = NULL; pe_action_t *stonith_op = NULL; /* The fence action is just a label; we don't do anything differently for * off vs. reboot. We specify it explicitly, rather than let it default to * cluster's default action, because we are not _initiating_ fencing -- we * are creating a pseudo-event to describe fencing that is already occurring * by other means (container recovery). */ const char *fence_action = "off"; /* Check whether guest's container resource has any explicit stop or * start (the stop may be implied by fencing of the guest's host). */ if (container) { stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL); if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) { fence_action = "reboot"; } } /* Create a fence pseudo-event, so we have an event to order actions * against, and the controller can always detect it. */ stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", FALSE, data_set); update_action_flags(stonith_op, pe_action_pseudo | pe_action_runnable, __FUNCTION__, __LINE__); /* We want to imply stops/demotes after the guest is stopped, not wait until * it is restarted, so we always order pseudo-fencing after stop, not start * (even though start might be closer to what is done for a real reboot). */ if(stop && is_set(stop->flags, pe_action_pseudo)) { pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, FALSE, data_set); crm_info("Implying guest node %s is down (action %d) after %s fencing", node->details->uname, stonith_op->id, stop->node->details->uname); order_actions(parent_stonith_op, stonith_op, pe_order_runnable_left|pe_order_implies_then); } else if (stop) { order_actions(stop, stonith_op, pe_order_runnable_left|pe_order_implies_then); crm_info("Implying guest node %s is down (action %d) " "after container %s is stopped (action %d)", node->details->uname, stonith_op->id, container->id, stop->id); } else { /* If we're fencing the guest node but there's no stop for the guest * resource, we must think the guest is already stopped. However, we may * think so because its resource history was just cleaned. To avoid * unnecessarily considering the guest node down if it's really up, * order the pseudo-fencing after any stop of the connection resource, * which will be ordered after any container (re-)probe. */ stop = find_first_action(node->details->remote_rsc->actions, NULL, RSC_STOP, NULL); if (stop) { order_actions(stop, stonith_op, pe_order_optional); crm_info("Implying guest node %s is down (action %d) " "after connection is stopped (action %d)", node->details->uname, stonith_op->id, stop->id); } else { /* Not sure why we're fencing, but everything must already be * cleanly stopped. */ crm_info("Implying guest node %s is down (action %d) ", node->details->uname, stonith_op->id); } } /* Order/imply other actions relative to pseudo-fence as with real fence */ pcmk__order_vs_fence(stonith_op, data_set); } /* * Create dependencies for stonith and shutdown operations */ gboolean stage6(pe_working_set_t * data_set) { pe_action_t *dc_down = NULL; pe_action_t *stonith_op = NULL; gboolean integrity_lost = FALSE; gboolean need_stonith = TRUE; GListPtr gIter; GListPtr stonith_ops = NULL; GList *shutdown_ops = NULL; /* Remote ordering constraints need to happen prior to calculating fencing * because it is one more place we will mark the node as dirty. * * A nice side effect of doing them early is that apply_*_ordering() can be * simpler because pe_fence_node() has already done some of the work. */ crm_trace("Creating remote ordering constraints"); apply_remote_node_ordering(data_set); crm_trace("Processing fencing and shutdown cases"); if (any_managed_resources(data_set) == FALSE) { crm_notice("Delaying fencing operations until there are resources to manage"); need_stonith = FALSE; } /* Check each node for stonith/shutdown */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; /* Guest nodes are "fenced" by recovering their container resource, * so handle them separately. */ if (pe__is_guest_node(node)) { if (node->details->remote_requires_reset && need_stonith && pe_can_fence(data_set, node)) { fence_guest(node, data_set); } continue; } stonith_op = NULL; if (node->details->unclean && need_stonith && pe_can_fence(data_set, node)) { stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, data_set); pe_warn("Scheduling Node %s for STONITH", node->details->uname); pcmk__order_vs_fence(stonith_op, data_set); if (node->details->is_dc) { // Remember if the DC is being fenced dc_down = stonith_op; } else { if (is_not_set(data_set->flags, pe_flag_concurrent_fencing) && (stonith_ops != NULL)) { /* Concurrent fencing is disabled, so order each non-DC * fencing in a chain. If there is any DC fencing or * shutdown, it will be ordered after the last action in the * chain later. */ order_actions((pe_action_t *) stonith_ops->data, stonith_op, pe_order_optional); } // Remember all non-DC fencing actions in a separate list stonith_ops = g_list_prepend(stonith_ops, stonith_op); } } else if (node->details->online && node->details->shutdown && /* TODO define what a shutdown op means for a remote node. * For now we do not send shutdown operations for remote nodes, but * if we can come up with a good use for this in the future, we will. */ pe__is_guest_or_remote_node(node) == FALSE) { pe_action_t *down_op = sched_shutdown_op(node, data_set); if (node->details->is_dc) { // Remember if the DC is being shut down dc_down = down_op; } else { // Remember non-DC shutdowns for later ordering shutdown_ops = g_list_prepend(shutdown_ops, down_op); } } if (node->details->unclean && stonith_op == NULL) { integrity_lost = TRUE; pe_warn("Node %s is unclean!", node->details->uname); } } if (integrity_lost) { if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED"); pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE"); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) { crm_notice("Cannot fence unclean nodes until quorum is" " attained (or no-quorum-policy is set to ignore)"); } } if (dc_down != NULL) { /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated * DC elections. However, we don't want to order non-DC shutdowns before * a DC *fencing*, because even though we don't want a node that's * shutting down to become DC, the DC fencing could be ordered before a * clone stop that's also ordered before the shutdowns, thus leading to * a graph loop. */ if (pcmk__str_eq(dc_down->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { for (gIter = shutdown_ops; gIter != NULL; gIter = gIter->next) { pe_action_t *node_stop = (pe_action_t *) gIter->data; crm_debug("Ordering shutdown on %s before %s on DC %s", node_stop->node->details->uname, dc_down->task, dc_down->node->details->uname); order_actions(node_stop, dc_down, pe_order_optional); } } // Order any non-DC fencing before any DC fencing or shutdown if (is_set(data_set->flags, pe_flag_concurrent_fencing)) { /* With concurrent fencing, order each non-DC fencing action * separately before any DC fencing or shutdown. */ for (gIter = stonith_ops; gIter != NULL; gIter = gIter->next) { order_actions((pe_action_t *) gIter->data, dc_down, pe_order_optional); } } else if (stonith_ops) { /* Without concurrent fencing, the non-DC fencing actions are * already ordered relative to each other, so we just need to order * the DC fencing after the last action in the chain (which is the * first item in the list). */ order_actions((pe_action_t *) stonith_ops->data, dc_down, pe_order_optional); } } g_list_free(stonith_ops); g_list_free(shutdown_ops); return TRUE; } /* * Determine the sets of independent actions and the correct order for the * actions in each set. * * Mark dependencies of un-runnable actions un-runnable * */ static GListPtr find_actions_by_task(GListPtr actions, pe_resource_t * rsc, const char *original_key) { GListPtr list = NULL; list = find_actions(actions, original_key, NULL); if (list == NULL) { /* we're potentially searching a child of the original resource */ char *key = NULL; char *task = NULL; guint interval_ms = 0; if (parse_op_key(original_key, NULL, &task, &interval_ms)) { key = pcmk__op_key(rsc->id, task, interval_ms); list = find_actions(actions, key, NULL); } else { crm_err("search key: %s", original_key); } free(key); free(task); } return list; } static void rsc_order_then(pe_action_t *lh_action, pe_resource_t *rsc, pe__ordering_t *order) { GListPtr gIter = NULL; GListPtr rh_actions = NULL; pe_action_t *rh_action = NULL; enum pe_ordering type; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); type = order->type; rh_action = order->rh_action; crm_trace("Processing RH of ordering constraint %d", order->id); if (rh_action != NULL) { rh_actions = g_list_prepend(NULL, rh_action); } else if (rsc != NULL) { rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task); } if (rh_actions == NULL) { pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..." " ignoring", rsc->id, order->rh_action_task); if (lh_action) { pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid); } return; } if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) { pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid, order->rh_action_task); clear_bit(type, pe_order_implies_then); } gIter = rh_actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *rh_action_iter = (pe_action_t *) gIter->data; if (lh_action) { order_actions(lh_action, rh_action_iter, type); } else if (type & pe_order_implies_then) { update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__); crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type); } else { crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type); } } g_list_free(rh_actions); } static void rsc_order_first(pe_resource_t *lh_rsc, pe__ordering_t *order, pe_working_set_t *data_set) { GListPtr gIter = NULL; GListPtr lh_actions = NULL; pe_action_t *lh_action = order->lh_action; pe_resource_t *rh_rsc = order->rh_rsc; crm_trace("Processing LH of ordering constraint %d", order->id); CRM_ASSERT(lh_rsc != NULL); if (lh_action != NULL) { lh_actions = g_list_prepend(NULL, lh_action); } else { lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task); } if (lh_actions == NULL && lh_rsc != rh_rsc) { char *key = NULL; char *op_type = NULL; guint interval_ms = 0; parse_op_key(order->lh_action_task, NULL, &op_type, &interval_ms); key = pcmk__op_key(lh_rsc->id, op_type, interval_ms); if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && pcmk__str_eq(op_type, RSC_STOP, pcmk__str_casei)) { free(key); pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); } else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && pcmk__str_eq(op_type, RSC_DEMOTE, pcmk__str_casei)) { free(key); pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); } else { pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set); lh_actions = g_list_prepend(NULL, lh_action); } free(op_type); } gIter = lh_actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *lh_action_iter = (pe_action_t *) gIter->data; if (rh_rsc == NULL && order->rh_action) { rh_rsc = order->rh_action->rsc; } if (rh_rsc) { rsc_order_then(lh_action_iter, rh_rsc, order); } else if (order->rh_action) { order_actions(lh_action_iter, order->rh_action, order->type); } } g_list_free(lh_actions); } extern void update_colo_start_chain(pe_action_t *action, pe_working_set_t *data_set); static int is_recurring_action(pe_action_t *action) { guint interval_ms; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { return 0; } return (interval_ms > 0); } static void apply_container_ordering(pe_action_t *action, pe_working_set_t *data_set) { /* VMs are also classified as containers for these purposes... in * that they both involve a 'thing' running on a real or remote * cluster node. * * This allows us to be smarter about the type and extent of * recovery actions required in various scenarios */ pe_resource_t *remote_rsc = NULL; pe_resource_t *container = NULL; enum action_tasks task = text2task(action->task); CRM_ASSERT(action->rsc); CRM_ASSERT(action->node); CRM_ASSERT(pe__is_guest_or_remote_node(action->node)); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc); container = remote_rsc->container; CRM_ASSERT(container); if(is_set(container->flags, pe_rsc_failed)) { pe_fence_node(data_set, action->node, "container failed", FALSE); } crm_trace("Order %s action %s relative to %s%s for %s%s", action->task, action->uuid, is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", remote_rsc->id, is_set(container->flags, pe_rsc_failed)? "failed " : "", container->id); if (pcmk__strcase_any_of(action->task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ task = stop_rsc; } switch (task) { case start_rsc: case action_promote: /* Force resource recovery if the container is recovered */ order_start_then_action(container, action, pe_order_implies_then, data_set); /* Wait for the connection resource to be up too */ order_start_then_action(remote_rsc, action, pe_order_none, data_set); break; case stop_rsc: case action_demote: if (is_set(container->flags, pe_rsc_failed)) { /* When the container representing a guest node fails, any stop * or demote actions for resources running on the guest node * are implied by the container stopping. This is similar to * how fencing operations work for cluster nodes and remote * nodes. */ } else { /* Ensure the operation happens before the connection is brought * down. * * If we really wanted to, we could order these after the * connection start, IFF the container's current role was * stopped (otherwise we re-introduce an ordering loop when the * connection is restarting). */ order_action_then_stop(action, remote_rsc, pe_order_none, data_set); } break; default: /* Wait for the connection resource to be up */ if (is_recurring_action(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ if(task != no_action) { order_start_then_action(remote_rsc, action, pe_order_implies_then, data_set); } } else { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } break; } } static enum remote_connection_state get_remote_node_state(pe_node_t *node) { pe_resource_t *remote_rsc = NULL; pe_node_t *cluster_node = NULL; CRM_ASSERT(node); remote_rsc = node->details->remote_rsc; CRM_ASSERT(remote_rsc); cluster_node = pe__current_node(remote_rsc); /* If the cluster node the remote connection resource resides on * is unclean or went offline, we can't process any operations * on that remote node until after it starts elsewhere. */ if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) { /* The connection resource is not going to run anywhere */ if (cluster_node && cluster_node->details->unclean) { /* The remote connection is failed because its resource is on a * failed node and can't be recovered elsewhere, so we must fence. */ return remote_state_failed; } if (is_not_set(remote_rsc->flags, pe_rsc_failed)) { /* Connection resource is cleanly stopped */ return remote_state_stopped; } /* Connection resource is failed */ if ((remote_rsc->next_role == RSC_ROLE_STOPPED) && remote_rsc->remote_reconnect_ms && node->details->remote_was_fenced && !pe__shutdown_requested(node)) { /* We won't know whether the connection is recoverable until the * reconnect interval expires and we reattempt connection. */ return remote_state_unknown; } /* The remote connection is in a failed state. If there are any * resources known to be active on it (stop) or in an unknown state * (probe), we must assume the worst and fence it. */ return remote_state_failed; } else if (cluster_node == NULL) { /* Connection is recoverable but not currently running anywhere, see if we can recover it first */ return remote_state_unknown; } else if(cluster_node->details->unclean == TRUE || cluster_node->details->online == FALSE) { /* Connection is running on a dead node, see if we can recover it first */ return remote_state_resting; } else if (pcmk__list_of_multiple(remote_rsc->running_on) && remote_rsc->partial_migration_source && remote_rsc->partial_migration_target) { /* We're in the middle of migrating a connection resource, * wait until after the resource migrates before performing * any actions. */ return remote_state_resting; } return remote_state_alive; } /*! * \internal * \brief Order actions on remote node relative to actions for the connection */ static void apply_remote_ordering(pe_action_t *action, pe_working_set_t *data_set) { pe_resource_t *remote_rsc = NULL; enum action_tasks task = text2task(action->task); enum remote_connection_state state = get_remote_node_state(action->node); enum pe_ordering order_opts = pe_order_none; if (action->rsc == NULL) { return; } CRM_ASSERT(action->node); CRM_ASSERT(pe__is_guest_or_remote_node(action->node)); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc); crm_trace("Order %s action %s relative to %s%s (state: %s)", action->task, action->uuid, is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", remote_rsc->id, state2text(state)); if (pcmk__strcase_any_of(action->task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ task = stop_rsc; } switch (task) { case start_rsc: case action_promote: order_opts = pe_order_none; if (state == remote_state_failed) { /* Force recovery, by making this action required */ order_opts |= pe_order_implies_then; } /* Ensure connection is up before running this action */ order_start_then_action(remote_rsc, action, order_opts, data_set); break; case stop_rsc: if(state == remote_state_alive) { order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); } else if(state == remote_state_failed) { /* The resource is active on the node, but since we don't have a * valid connection, the only way to stop the resource is by * fencing the node. There is no need to order the stop relative * to the remote connection, since the stop will become implied * by the fencing. */ pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable", FALSE); } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) { /* State must be remote_state_unknown or remote_state_stopped. * Since the connection is not coming back up in this * transition, stop this resource first. */ order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); } else { /* The connection is going to be started somewhere else, so * stop this resource after that completes. */ order_start_then_action(remote_rsc, action, pe_order_none, data_set); } break; case action_demote: /* Only order this demote relative to the connection start if the * connection isn't being torn down. Otherwise, the demote would be * blocked because the connection start would not be allowed. */ if(state == remote_state_resting || state == remote_state_unknown) { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } /* Otherwise we can rely on the stop ordering */ break; default: /* Wait for the connection resource to be up */ if (is_recurring_action(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ order_start_then_action(remote_rsc, action, pe_order_implies_then, data_set); } else { pe_node_t *cluster_node = pe__current_node(remote_rsc); if(task == monitor_rsc && state == remote_state_failed) { /* We would only be here if we do not know the * state of the resource on the remote node. * Since we have no way to find out, it is * necessary to fence the node. */ pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable", FALSE); } if(cluster_node && state == remote_state_stopped) { /* The connection is currently up, but is going * down permanently. * * Make sure we check services are actually * stopped _before_ we let the connection get * closed */ order_action_then_stop(action, remote_rsc, pe_order_runnable_left, data_set); } else { order_start_then_action(remote_rsc, action, pe_order_none, data_set); } } break; } } static void apply_remote_node_ordering(pe_working_set_t *data_set) { if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) { return; } for (GListPtr gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; pe_resource_t *remote = NULL; // We are only interested in resource actions if (action->rsc == NULL) { continue; } /* Special case: If we are clearing the failcount of an actual * remote connection resource, then make sure this happens before * any start of the resource in this transition. */ if (action->rsc->is_remote_node && pcmk__str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT, pcmk__str_casei)) { custom_action_order(action->rsc, NULL, action, action->rsc, pcmk__op_key(action->rsc->id, RSC_START, 0), NULL, pe_order_optional, data_set); continue; } // We are only interested in actions allocated to a node if (action->node == NULL) { continue; } if (!pe__is_guest_or_remote_node(action->node)) { continue; } /* We are only interested in real actions. * * @TODO This is probably wrong; pseudo-actions might be converted to * real actions and vice versa later in update_actions() at the end of * stage7(). */ if (is_set(action->flags, pe_action_pseudo)) { continue; } remote = action->node->details->remote_rsc; if (remote == NULL) { // Orphaned continue; } /* Another special case: if a resource is moving to a Pacemaker Remote * node, order the stop on the original node after any start of the * remote connection. This ensures that if the connection fails to * start, we leave the resource running on the original node. */ if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) { for (GList *item = action->rsc->actions; item != NULL; item = item->next) { pe_action_t *rsc_action = item->data; if ((rsc_action->node->details != action->node->details) && pcmk__str_eq(rsc_action->task, RSC_STOP, pcmk__str_casei)) { custom_action_order(remote, start_key(remote), NULL, action->rsc, NULL, rsc_action, pe_order_optional, data_set); } } } /* The action occurs across a remote connection, so create * ordering constraints that guarantee the action occurs while the node * is active (after start, before stop ... things like that). * * This is somewhat brittle in that we need to make sure the results of * this ordering are compatible with the result of get_router_node(). * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part * of this logic rather than action2xml(). */ if (remote->container) { crm_trace("Container ordering for %s", action->uuid); apply_container_ordering(action, data_set); } else { crm_trace("Remote ordering for %s", action->uuid); apply_remote_ordering(action, data_set); } } } static gboolean order_first_probe_unneeded(pe_action_t * probe, pe_action_t * rh_action) { /* No need to probe the resource on the node that is being * unfenced. Otherwise it might introduce transition loop * since probe will be performed after the node is * unfenced. */ if (pcmk__str_eq(rh_action->task, CRM_OP_FENCE, pcmk__str_casei) && probe->node && rh_action->node && probe->node->details == rh_action->node->details) { const char *op = g_hash_table_lookup(rh_action->meta, "stonith_action"); if (pcmk__str_eq(op, "on", pcmk__str_casei)) { return TRUE; } } // Shutdown waits for probe to complete only if it's on the same node if ((pcmk__str_eq(rh_action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) && probe->node && rh_action->node && probe->node->details != rh_action->node->details) { return TRUE; } return FALSE; } static void order_first_probes_imply_stops(pe_working_set_t * data_set) { GListPtr gIter = NULL; for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) { pe__ordering_t *order = gIter->data; enum pe_ordering order_type = pe_order_optional; pe_resource_t *lh_rsc = order->lh_rsc; pe_resource_t *rh_rsc = order->rh_rsc; pe_action_t *lh_action = order->lh_action; pe_action_t *rh_action = order->rh_action; const char *lh_action_task = order->lh_action_task; const char *rh_action_task = order->rh_action_task; GListPtr probes = NULL; GListPtr rh_actions = NULL; GListPtr pIter = NULL; if (lh_rsc == NULL) { continue; } else if (rh_rsc && lh_rsc == rh_rsc) { continue; } if (lh_action == NULL && lh_action_task == NULL) { continue; } if (rh_action == NULL && rh_action_task == NULL) { continue; } /* Technically probe is expected to return "not running", which could be * the alternative of stop action if the status of the resource is * unknown yet. */ if (lh_action && !pcmk__str_eq(lh_action->task, RSC_STOP, pcmk__str_casei)) { continue; } else if (lh_action == NULL && lh_action_task && !pcmk__ends_with(lh_action_task, "_" RSC_STOP "_0")) { continue; } /* Do not probe the resource inside of a stopping container. Otherwise * it might introduce transition loop since probe will be performed * after the container starts again. */ if (rh_rsc && lh_rsc->container == rh_rsc) { if (rh_action && pcmk__str_eq(rh_action->task, RSC_STOP, pcmk__str_casei)) { continue; } else if (rh_action == NULL && rh_action_task && pcmk__ends_with(rh_action_task,"_" RSC_STOP "_0")) { continue; } } if (order->type == pe_order_none) { continue; } // Preserve the order options for future filtering if (is_set(order->type, pe_order_apply_first_non_migratable)) { set_bit(order_type, pe_order_apply_first_non_migratable); } if (is_set(order->type, pe_order_same_node)) { set_bit(order_type, pe_order_same_node); } // Keep the order types for future filtering if (order->type == pe_order_anti_colocation || order->type == pe_order_load) { order_type = order->type; } probes = pe__resource_actions(lh_rsc, NULL, RSC_STATUS, FALSE); if (probes == NULL) { continue; } if (rh_action) { rh_actions = g_list_prepend(rh_actions, rh_action); } else if (rh_rsc && rh_action_task) { rh_actions = find_actions(rh_rsc->actions, rh_action_task, NULL); } if (rh_actions == NULL) { g_list_free(probes); continue; } crm_trace("Processing for LH probe based on ordering constraint %s -> %s" " (id=%d, type=%.6x)", lh_action ? lh_action->uuid : lh_action_task, rh_action ? rh_action->uuid : rh_action_task, order->id, order->type); for (pIter = probes; pIter != NULL; pIter = pIter->next) { pe_action_t *probe = (pe_action_t *) pIter->data; GListPtr rIter = NULL; for (rIter = rh_actions; rIter != NULL; rIter = rIter->next) { pe_action_t *rh_action_iter = (pe_action_t *) rIter->data; if (order_first_probe_unneeded(probe, rh_action_iter)) { continue; } order_actions(probe, rh_action_iter, order_type); } } g_list_free(rh_actions); g_list_free(probes); } } static void order_first_probe_then_restart_repromote(pe_action_t * probe, pe_action_t * after, pe_working_set_t * data_set) { GListPtr gIter = NULL; bool interleave = FALSE; pe_resource_t *compatible_rsc = NULL; if (probe == NULL || probe->rsc == NULL || probe->rsc->variant != pe_native) { return; } if (after == NULL // Avoid running into any possible loop || is_set(after->flags, pe_action_tracking)) { return; } if (!pcmk__str_eq(probe->task, RSC_STATUS, pcmk__str_casei)) { return; } pe__set_action_flags(after, pe_action_tracking); crm_trace("Processing based on %s %s -> %s %s", probe->uuid, probe->node ? probe->node->details->uname: "", after->uuid, after->node ? after->node->details->uname : ""); if (after->rsc /* Better not build a dependency directly with a clone/group. * We are going to proceed through the ordering chain and build * dependencies with its children. */ && after->rsc->variant == pe_native && probe->rsc != after->rsc) { GListPtr then_actions = NULL; enum pe_ordering probe_order_type = pe_order_optional; if (pcmk__str_eq(after->task, RSC_START, pcmk__str_casei)) { then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP, FALSE); } else if (pcmk__str_eq(after->task, RSC_PROMOTE, pcmk__str_casei)) { then_actions = pe__resource_actions(after->rsc, NULL, RSC_DEMOTE, FALSE); } for (gIter = then_actions; gIter != NULL; gIter = gIter->next) { pe_action_t *then = (pe_action_t *) gIter->data; // Skip any pseudo action which for example is implied by fencing if (is_set(then->flags, pe_action_pseudo)) { continue; } order_actions(probe, then, probe_order_type); } g_list_free(then_actions); } if (after->rsc && after->rsc->variant > pe_group) { const char *interleave_s = g_hash_table_lookup(after->rsc->meta, XML_RSC_ATTR_INTERLEAVE); interleave = crm_is_true(interleave_s); if (interleave) { /* For an interleaved clone, we should build a dependency only * with the relevant clone child. */ compatible_rsc = find_compatible_child(probe->rsc, after->rsc, RSC_ROLE_UNKNOWN, FALSE, data_set); } } for (gIter = after->actions_after; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) gIter->data; /* pe_order_implies_then is the reason why a required A.start * implies/enforces B.start to be required too, which is the cause of * B.restart/re-promote. * * Not sure about pe_order_implies_then_on_node though. It's now only * used for unfencing case, which tends to introduce transition * loops... */ if (is_not_set(after_wrapper->type, pe_order_implies_then)) { /* The order type between a group/clone and its child such as * B.start-> B_child.start is: * pe_order_implies_first_printed | pe_order_runnable_left * * Proceed through the ordering chain and build dependencies with * its children. */ if (after->rsc == NULL || after->rsc->variant < pe_group || probe->rsc->parent == after->rsc || after_wrapper->action->rsc == NULL || after_wrapper->action->rsc->variant > pe_group || after->rsc != after_wrapper->action->rsc->parent) { continue; } /* Proceed to the children of a group or a non-interleaved clone. * For an interleaved clone, proceed only to the relevant child. */ if (after->rsc->variant > pe_group && interleave == TRUE && (compatible_rsc == NULL || compatible_rsc != after_wrapper->action->rsc)) { continue; } } crm_trace("Proceeding through %s %s -> %s %s (type=0x%.6x)", after->uuid, after->node ? after->node->details->uname: "", after_wrapper->action->uuid, after_wrapper->action->node ? after_wrapper->action->node->details->uname : "", after_wrapper->type); order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set); } } static void clear_actions_tracking_flag(pe_working_set_t * data_set) { GListPtr gIter = NULL; for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (is_set(action->flags, pe_action_tracking)) { pe__clear_action_flags(action, pe_action_tracking); } } } static void order_first_rsc_probes(pe_resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; GListPtr probes = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t * child = (pe_resource_t *) gIter->data; order_first_rsc_probes(child, data_set); } if (rsc->variant != pe_native) { return; } probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE); for (gIter = probes; gIter != NULL; gIter= gIter->next) { pe_action_t *probe = (pe_action_t *) gIter->data; GListPtr aIter = NULL; for (aIter = probe->actions_after; aIter != NULL; aIter = aIter->next) { pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) aIter->data; order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set); clear_actions_tracking_flag(data_set); } } g_list_free(probes); } static void order_first_probes(pe_working_set_t * data_set) { GListPtr gIter = NULL; for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; order_first_rsc_probes(rsc, data_set); } order_first_probes_imply_stops(data_set); } static void order_then_probes(pe_working_set_t * data_set) { #if 0 GListPtr gIter = NULL; for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* Given "A then B", we would prefer to wait for A to be * started before probing B. * * If A was a filesystem on which the binaries and data for B * lived, it would have been useful if the author of B's agent * could assume that A is running before B.monitor will be * called. * * However we can't _only_ probe once A is running, otherwise * we'd not detect the state of B if A could not be started * for some reason. * * In practice however, we cannot even do an opportunistic * version of this because B may be moving: * * B.probe -> B.start * B.probe -> B.stop * B.stop -> B.start * A.stop -> A.start * A.start -> B.probe * * So far so good, but if we add the result of this code: * * B.stop -> A.stop * * Then we get a loop: * * B.probe -> B.stop -> A.stop -> A.start -> B.probe * * We could kill the 'B.probe -> B.stop' dependency, but that * could mean stopping B "too" soon, because B.start must wait * for the probes to complete. * * Another option is to allow it only if A is a non-unique * clone with clone-max == node-max (since we'll never be * moving it). However, we could still be stopping one * instance at the same time as starting another. * The complexity of checking for allowed conditions combined * with the ever narrowing usecase suggests that this code * should remain disabled until someone gets smarter. */ pe_action_t *start = NULL; GListPtr actions = NULL; GListPtr probes = NULL; actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE); if (actions) { start = actions->data; g_list_free(actions); } if(start == NULL) { crm_err("No start action for %s", rsc->id); continue; } probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE); for (actions = start->actions_before; actions != NULL; actions = actions->next) { pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data; GListPtr pIter = NULL; pe_action_t *first = before->action; pe_resource_t *first_rsc = first->rsc; if(first->required_runnable_before) { GListPtr clone_actions = NULL; for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) { before = (pe_action_wrapper_t *) clone_actions->data; crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid); CRM_ASSERT(before->action->rsc); first_rsc = before->action->rsc; break; } } else if(!pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) { crm_trace("Not a start op %s for %s", first->uuid, start->uuid); } if(first_rsc == NULL) { continue; } else if(uber_parent(first_rsc) == uber_parent(start->rsc)) { crm_trace("Same parent %s for %s", first_rsc->id, start->uuid); continue; } else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) { crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid); continue; } crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant); for (pIter = probes; pIter != NULL; pIter = pIter->next) { pe_action_t *probe = (pe_action_t *) pIter->data; crm_err("Ordering %s before %s", first->uuid, probe->uuid); order_actions(first, probe, pe_order_optional); } } } #endif } static void order_probes(pe_working_set_t * data_set) { order_first_probes(data_set); order_then_probes(data_set); } gboolean stage7(pe_working_set_t * data_set) { GList *gIter = NULL; crm_trace("Applying ordering constraints"); /* Don't ask me why, but apparently they need to be processed in * the order they were created in... go figure * * Also g_list_append() has horrendous performance characteristics * So we need to use g_list_prepend() and then reverse the list here */ data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints); for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) { pe__ordering_t *order = gIter->data; pe_resource_t *rsc = order->lh_rsc; crm_trace("Applying ordering constraint: %d", order->id); if (rsc != NULL) { crm_trace("rsc_action-to-*"); rsc_order_first(rsc, order, data_set); continue; } rsc = order->rh_rsc; if (rsc != NULL) { crm_trace("action-to-rsc_action"); rsc_order_then(order->lh_action, rsc, order); } else { crm_trace("action-to-action"); order_actions(order->lh_action, order->rh_action, order->type); } } for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; update_colo_start_chain(action, data_set); } crm_trace("Ordering probes"); order_probes(data_set); crm_trace("Updating %d actions", g_list_length(data_set->actions)); for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; update_action(action, data_set); } // Check for invalid orderings for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; pe_action_wrapper_t *input = NULL; for (GList *input_iter = action->actions_before; input_iter != NULL; input_iter = input_iter->next) { input = (pe_action_wrapper_t *) input_iter->data; if (pcmk__ordering_is_invalid(action, input)) { input->type = pe_order_none; } } } LogNodeActions(data_set, FALSE); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; LogActions(rsc, data_set, FALSE); } return TRUE; } static int transition_id = -1; /*! * \internal * \brief Log a message after calculating a transition * * \param[in] filename Where transition input is stored */ void pcmk__log_transition_summary(const char *filename) { if (was_processing_error) { crm_err("Calculated transition %d (with errors), saving inputs in %s", transition_id, filename); } else if (was_processing_warning) { crm_warn("Calculated transition %d (with warnings), saving inputs in %s", transition_id, filename); } else { crm_notice("Calculated transition %d, saving inputs in %s", transition_id, filename); } if (crm_config_error) { crm_notice("Configuration errors found during scheduler processing," " please run \"crm_verify -L\" to identify issues"); } } /* * Create a dependency graph to send to the transitioner (via the controller) */ gboolean stage8(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *value = NULL; transition_id++; crm_trace("Creating transition graph %d.", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); value = pe_pref(data_set->config_hash, "stonith-timeout"); crm_xml_add(data_set->graph, "stonith-timeout", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); if (is_set(data_set->flags, pe_flag_start_failure_fatal)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); value = pe_pref(data_set->config_hash, "migration-limit"); if (crm_parse_ll(value, NULL) > 0) { crm_xml_add(data_set->graph, "migration-limit", value); } if (data_set->recheck_by > 0) { char *recheck_epoch = NULL; recheck_epoch = crm_strdup_printf("%llu", (long long) data_set->recheck_by); crm_xml_add(data_set->graph, "recheck-by", recheck_epoch); free(recheck_epoch); } /* errors... slist_iter(action, pe_action_t, action_list, lpc, if(action->optional == FALSE && action->runnable == FALSE) { print_action("Ignoring", action, TRUE); } ); */ /* The following code will de-duplicate action inputs, so nothing past this * should rely on the action input type flags retaining their original * values. */ gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id); rsc->cmds->expand(rsc, data_set); } crm_log_xml_trace(data_set->graph, "created resource-driven action list"); /* pseudo action to distribute list of nodes with maintenance state update */ add_maintenance_update(data_set); /* catch any non-resource specific actions */ crm_trace("processing non-resource actions"); gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (action->rsc && action->node && action->node->details->shutdown && is_not_set(action->rsc->flags, pe_rsc_maintenance) && is_not_set(action->flags, pe_action_optional) && is_not_set(action->flags, pe_action_runnable) && pcmk__str_eq(action->task, RSC_STOP, pcmk__str_none) ) { /* Eventually we should just ignore the 'fence' case * But for now it's the best way to detect (in CTS) when * CIB resource updates are being lost */ if (is_set(data_set->flags, pe_flag_have_quorum) || data_set->no_quorum_policy == no_quorum_ignore) { crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)", action->node->details->unclean ? "fence" : "shut down", action->node->details->uname, action->rsc->id, is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked", is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : "", action->uuid); } } graph_element_from_action(action, data_set); } crm_log_xml_trace(data_set->graph, "created generic action list"); crm_trace("Created transition graph %d.", transition_id); return TRUE; } void LogNodeActions(pe_working_set_t * data_set, gboolean terminal) { GListPtr gIter = NULL; for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { char *node_name = NULL; char *task = NULL; pe_action_t *action = (pe_action_t *) gIter->data; if (action->rsc != NULL) { continue; } else if (is_set(action->flags, pe_action_optional)) { continue; } if (pe__is_guest_node(action->node)) { node_name = crm_strdup_printf("%s (resource: %s)", action->node->details->uname, action->node->details->remote_rsc->container->id); } else if(action->node) { node_name = crm_strdup_printf("%s", action->node->details->uname); } if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { task = strdup("Shutdown"); } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { const char *op = g_hash_table_lookup(action->meta, "stonith_action"); task = crm_strdup_printf("Fence (%s)", op); } if(task == NULL) { /* Nothing to report */ } else if(terminal && action->reason) { printf(" * %s %s '%s'\n", task, node_name, action->reason); } else if(terminal) { printf(" * %s %s\n", task, node_name); } else if(action->reason) { crm_notice(" * %s %s '%s'\n", task, node_name, action->reason); } else { crm_notice(" * %s %s\n", task, node_name); } free(node_name); free(task); } } diff --git a/lib/pacemaker/pcmk_sched_graph.c b/lib/pacemaker/pcmk_sched_graph.c index dd4fdbf2e3..43caa78536 100644 --- a/lib/pacemaker/pcmk_sched_graph.c +++ b/lib/pacemaker/pcmk_sched_graph.c @@ -1,1848 +1,1849 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include void update_colo_start_chain(pe_action_t *action, pe_working_set_t *data_set); gboolean rsc_update_action(pe_action_t * first, pe_action_t * then, enum pe_ordering type); static enum pe_action_flags get_action_flags(pe_action_t * action, pe_node_t * node) { enum pe_action_flags flags = action->flags; if (action->rsc) { flags = action->rsc->cmds->action_flags(action, NULL); if (pe_rsc_is_clone(action->rsc) && node) { /* We only care about activity on $node */ enum pe_action_flags clone_flags = action->rsc->cmds->action_flags(action, node); /* Go to great lengths to ensure the correct value for pe_action_runnable... * * If we are a clone, then for _ordering_ constraints, it's only relevant * if we are runnable _anywhere_. * * This only applies to _runnable_ though, and only for ordering constraints. * If this function is ever used during colocation, then we'll need additional logic * * Not very satisfying, but it's logical and appears to work well. */ if (is_not_set(clone_flags, pe_action_runnable) && is_set(flags, pe_action_runnable)) { - pe_rsc_trace(action->rsc, "Fixing up runnable flag for %s", action->uuid); - set_bit(clone_flags, pe_action_runnable); + pe__set_raw_action_flags(clone_flags, action->rsc->id, + pe_action_runnable); } flags = clone_flags; } } return flags; } static char * convert_non_atomic_uuid(char *old_uuid, pe_resource_t * rsc, gboolean allow_notify, gboolean free_original) { guint interval_ms = 0; char *uuid = NULL; char *rid = NULL; char *raw_task = NULL; int task = no_action; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Processing %s", old_uuid); if (old_uuid == NULL) { return NULL; } else if (strstr(old_uuid, "notify") != NULL) { goto done; /* no conversion */ } else if (rsc->variant < pe_group) { goto done; /* no conversion */ } CRM_ASSERT(parse_op_key(old_uuid, &rid, &raw_task, &interval_ms)); if (interval_ms > 0) { goto done; /* no conversion */ } task = text2task(raw_task); switch (task) { case stop_rsc: case start_rsc: case action_notify: case action_promote: case action_demote: break; case stopped_rsc: case started_rsc: case action_notified: case action_promoted: case action_demoted: task--; break; case monitor_rsc: case shutdown_crm: case stonith_node: task = no_action; break; default: crm_err("Unknown action: %s", raw_task); task = no_action; break; } if (task != no_action) { if (is_set(rsc->flags, pe_rsc_notify) && allow_notify) { uuid = pcmk__notify_key(rid, "confirmed-post", task2text(task + 1)); } else { uuid = pcmk__op_key(rid, task2text(task + 1), 0); } pe_rsc_trace(rsc, "Converted %s -> %s", old_uuid, uuid); } done: if (uuid == NULL) { uuid = strdup(old_uuid); } if (free_original) { free(old_uuid); } free(raw_task); free(rid); return uuid; } static pe_action_t * rsc_expand_action(pe_action_t * action) { gboolean notify = FALSE; pe_action_t *result = action; pe_resource_t *rsc = action->rsc; if (rsc == NULL) { return action; } if ((rsc->parent == NULL) || (pe_rsc_is_clone(rsc) && (rsc->parent->variant == pe_container))) { /* Only outermost resources have notification actions. * The exception is those in bundles. */ notify = is_set(rsc->flags, pe_rsc_notify); } if (rsc->variant >= pe_group) { /* Expand 'start' -> 'started' */ char *uuid = NULL; uuid = convert_non_atomic_uuid(action->uuid, rsc, notify, FALSE); if (uuid) { pe_rsc_trace(rsc, "Converting %s to %s %d", action->uuid, uuid, is_set(rsc->flags, pe_rsc_notify)); result = find_first_action(rsc->actions, uuid, NULL, NULL); if (result == NULL) { crm_err("Couldn't expand %s to %s in %s", action->uuid, uuid, rsc->id); result = action; } free(uuid); } } return result; } static enum pe_graph_flags graph_update_action(pe_action_t * first, pe_action_t * then, pe_node_t * node, enum pe_action_flags first_flags, enum pe_action_flags then_flags, pe_action_wrapper_t *order, pe_working_set_t *data_set) { enum pe_graph_flags changed = pe_graph_none; enum pe_ordering type = order->type; gboolean processed = FALSE; /* TODO: Do as many of these in parallel as possible */ if(is_set(type, pe_order_implies_then_on_node)) { /* Normally we want the _whole_ 'then' clone to * restart if 'first' is restarted, so then->node is * needed. * * However for unfencing, we want to limit this to * instances on the same node as 'first' (the * unfencing operation), so first->node is supplied. * * Swap the node, from then on we can can treat it * like any other 'pe_order_implies_then' */ clear_bit(type, pe_order_implies_then_on_node); set_bit(type, pe_order_implies_then); node = first->node; } - clear_bit(first_flags, pe_action_pseudo); + pe__clear_raw_action_flags(first_flags, "first action update", + pe_action_pseudo); if (type & pe_order_implies_then) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, first_flags & pe_action_optional, pe_action_optional, pe_order_implies_then, data_set); } else if (is_set(first_flags, pe_action_optional) == FALSE) { if (update_action_flags(then, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__)) { changed |= pe_graph_updated_then; } } if (changed) { pe_rsc_trace(then->rsc, "implies right: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("implies right: %s then %s %p", first->uuid, then->uuid, then->rsc); } } if ((type & pe_order_restart) && then->rsc) { enum pe_action_flags restart = (pe_action_optional | pe_action_runnable); processed = TRUE; changed |= then->rsc->cmds->update_actions(first, then, node, first_flags, restart, pe_order_restart, data_set); if (changed) { pe_rsc_trace(then->rsc, "restart: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("restart: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_implies_first) { processed = TRUE; if (first->rsc) { changed |= first->rsc->cmds->update_actions(first, then, node, first_flags, pe_action_optional, pe_order_implies_first, data_set); } else if (is_set(first_flags, pe_action_optional) == FALSE) { pe_rsc_trace(first->rsc, "first unrunnable: %s (%d) then %s (%d)", first->uuid, is_set(first_flags, pe_action_optional), then->uuid, is_set(then_flags, pe_action_optional)); if (update_action_flags(first, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__)) { changed |= pe_graph_updated_first; } } if (changed) { pe_rsc_trace(then->rsc, "implies left: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("implies left: %s (%d) then %s (%d)", first->uuid, is_set(first_flags, pe_action_optional), then->uuid, is_set(then_flags, pe_action_optional)); } } if (type & pe_order_implies_first_master) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, first_flags & pe_action_optional, pe_action_optional, pe_order_implies_first_master, data_set); } if (changed) { pe_rsc_trace(then->rsc, "implies left when right rsc is Master role: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("implies left when right rsc is Master role: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_one_or_more) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, first_flags, pe_action_runnable, pe_order_one_or_more, data_set); } else if (is_set(first_flags, pe_action_runnable)) { /* alright. a "first" action is considered runnable, incremente * the 'runnable_before' counter */ then->runnable_before++; /* if the runnable before count for then exceeds the required number * of "before" runnable actions... mark then as runnable */ if (then->runnable_before >= then->required_runnable_before) { if (update_action_flags(then, pe_action_runnable, __FUNCTION__, __LINE__)) { changed |= pe_graph_updated_then; } } } if (changed) { pe_rsc_trace(then->rsc, "runnable_one_or_more: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("runnable_one_or_more: %s then %s", first->uuid, then->uuid); } } if (then->rsc && is_set(type, pe_order_probe)) { processed = TRUE; if (is_not_set(first_flags, pe_action_runnable) && first->rsc->running_on != NULL) { pe_rsc_trace(then->rsc, "Ignoring %s then %s - %s is about to be stopped", first->uuid, then->uuid, first->rsc->id); type = pe_order_none; order->type = pe_order_none; } else { pe_rsc_trace(then->rsc, "Enforcing %s then %s", first->uuid, then->uuid); changed |= then->rsc->cmds->update_actions(first, then, node, first_flags, pe_action_runnable, pe_order_runnable_left, data_set); } if (changed) { pe_rsc_trace(then->rsc, "runnable: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("runnable: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_runnable_left) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, first_flags, pe_action_runnable, pe_order_runnable_left, data_set); } else if (is_set(first_flags, pe_action_runnable) == FALSE) { pe_rsc_trace(then->rsc, "then unrunnable: %s then %s", first->uuid, then->uuid); if (update_action_flags(then, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__)) { changed |= pe_graph_updated_then; } } if (changed) { pe_rsc_trace(then->rsc, "runnable: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("runnable: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_implies_first_migratable) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, first_flags, pe_action_optional, pe_order_implies_first_migratable, data_set); } if (changed) { pe_rsc_trace(then->rsc, "optional: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("optional: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_pseudo_left) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, first_flags, pe_action_optional, pe_order_pseudo_left, data_set); } if (changed) { pe_rsc_trace(then->rsc, "optional: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("optional: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_optional) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, first_flags, pe_action_runnable, pe_order_optional, data_set); } if (changed) { pe_rsc_trace(then->rsc, "optional: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("optional: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_asymmetrical) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, first_flags, pe_action_runnable, pe_order_asymmetrical, data_set); } if (changed) { pe_rsc_trace(then->rsc, "asymmetrical: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("asymmetrical: %s then %s", first->uuid, then->uuid); } } if ((first->flags & pe_action_runnable) && (type & pe_order_implies_then_printed) && (first_flags & pe_action_optional) == 0) { processed = TRUE; crm_trace("%s implies %s printed", first->uuid, then->uuid); update_action_flags(then, pe_action_print_always, __FUNCTION__, __LINE__); /* don't care about changed */ } if (is_set(type, pe_order_implies_first_printed) && is_set(then_flags, pe_action_optional) == FALSE) { processed = TRUE; crm_trace("%s implies %s printed", then->uuid, first->uuid); update_action_flags(first, pe_action_print_always, __FUNCTION__, __LINE__); /* don't care about changed */ } if ((type & pe_order_implies_then || type & pe_order_implies_first || type & pe_order_restart) && first->rsc && pcmk__str_eq(first->task, RSC_STOP, pcmk__str_casei) && is_not_set(first->rsc->flags, pe_rsc_managed) && is_set(first->rsc->flags, pe_rsc_block) && is_not_set(first->flags, pe_action_runnable)) { if (update_action_flags(then, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__)) { changed |= pe_graph_updated_then; } if (changed) { pe_rsc_trace(then->rsc, "unmanaged left: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("unmanaged left: %s then %s", first->uuid, then->uuid); } } if (processed == FALSE) { crm_trace("Constraint 0x%.6x not applicable", type); } return changed; } static void mark_start_blocked(pe_resource_t *rsc, pe_resource_t *reason, pe_working_set_t *data_set) { GListPtr gIter = rsc->actions; char *reason_text = crm_strdup_printf("colocation with %s", reason->id); for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (!pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) { continue; } if (is_set(action->flags, pe_action_runnable)) { pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, reason_text, pe_action_runnable, FALSE); update_colo_start_chain(action, data_set); update_action(action, data_set); } } free(reason_text); } void update_colo_start_chain(pe_action_t *action, pe_working_set_t *data_set) { GListPtr gIter = NULL; pe_resource_t *rsc = NULL; if (is_not_set(action->flags, pe_action_runnable) && pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) { rsc = uber_parent(action->rsc); if (rsc->parent) { /* For bundles, uber_parent() returns the clone/master, not the * bundle, so the existence of rsc->parent implies this is a bundle. * In this case, we need the bundle resource, so that we can check * if all containers are stopped/stopping. */ rsc = rsc->parent; } } if (rsc == NULL || rsc->rsc_cons_lhs == NULL) { return; } /* if rsc has children, all the children need to have start set to * unrunnable before we follow the colo chain for the parent. */ for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *)gIter->data; pe_action_t *start = find_first_action(child->actions, NULL, RSC_START, NULL); if (start == NULL || is_set(start->flags, pe_action_runnable)) { return; } } for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *colocate_with = (rsc_colocation_t *)gIter->data; if (colocate_with->score == INFINITY) { mark_start_blocked(colocate_with->rsc_lh, action->rsc, data_set); } } } gboolean update_action(pe_action_t *then, pe_working_set_t *data_set) { GListPtr lpc = NULL; enum pe_graph_flags changed = pe_graph_none; int last_flags = then->flags; crm_trace("Processing %s (%s %s %s)", then->uuid, is_set(then->flags, pe_action_optional) ? "optional" : "required", is_set(then->flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(then->flags, pe_action_pseudo) ? "pseudo" : then->node ? then->node->details->uname : ""); if (is_set(then->flags, pe_action_requires_any)) { /* initialize current known runnable before actions to 0 * from here as graph_update_action is called for each of * then's before actions, this number will increment as * runnable 'first' actions are encountered */ then->runnable_before = 0; /* for backwards compatibility with previous options that use * the 'requires_any' flag, initialize required to 1 if it is * not set. */ if (then->required_runnable_before == 0) { then->required_runnable_before = 1; } pe__clear_action_flags(then, pe_action_runnable); /* We are relying on the pe_order_one_or_more clause of * graph_update_action(), called as part of the: * * 'if (first == other->action)' * * block below, to set this back if appropriate */ } for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) { pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data; pe_action_t *first = other->action; pe_node_t *then_node = then->node; pe_node_t *first_node = first->node; enum pe_action_flags then_flags = 0; enum pe_action_flags first_flags = 0; if (first->rsc && first->rsc->variant == pe_group && pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) { first_node = first->rsc->fns->location(first->rsc, NULL, FALSE); if (first_node) { crm_trace("First: Found node %s for %s", first_node->details->uname, first->uuid); } } if (then->rsc && then->rsc->variant == pe_group && pcmk__str_eq(then->task, RSC_START, pcmk__str_casei)) { then_node = then->rsc->fns->location(then->rsc, NULL, FALSE); if (then_node) { crm_trace("Then: Found node %s for %s", then_node->details->uname, then->uuid); } } /* Disable constraint if it only applies when on same node, but isn't */ if (is_set(other->type, pe_order_same_node) && first_node && then_node && (first_node->details != then_node->details)) { crm_trace("Disabled constraint %s on %s -> %s on %s", other->action->uuid, first_node->details->uname, then->uuid, then_node->details->uname); other->type = pe_order_none; continue; } clear_bit(changed, pe_graph_updated_first); if (first->rsc && is_set(other->type, pe_order_then_cancels_first) && is_not_set(then->flags, pe_action_optional)) { /* 'then' is required, so we must abandon 'first' * (e.g. a required stop cancels any reload). */ - set_bit(other->action->flags, pe_action_optional); + pe__set_action_flags(other->action, pe_action_optional); if (!strcmp(first->task, CRMD_ACTION_RELOAD)) { clear_bit(first->rsc->flags, pe_rsc_reload); } } if (first->rsc && then->rsc && (first->rsc != then->rsc) && (is_parent(then->rsc, first->rsc) == FALSE)) { first = rsc_expand_action(first); } if (first != other->action) { crm_trace("Ordering %s after %s instead of %s", then->uuid, first->uuid, other->action->uuid); } first_flags = get_action_flags(first, then_node); then_flags = get_action_flags(then, first_node); crm_trace("Checking %s (%s %s %s) against %s (%s %s %s) filter=0x%.6x type=0x%.6x", then->uuid, is_set(then_flags, pe_action_optional) ? "optional" : "required", is_set(then_flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(then_flags, pe_action_pseudo) ? "pseudo" : then->node ? then->node->details-> uname : "", first->uuid, is_set(first_flags, pe_action_optional) ? "optional" : "required", is_set(first_flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(first_flags, pe_action_pseudo) ? "pseudo" : first->node ? first->node->details-> uname : "", first_flags, other->type); if (first == other->action) { /* * 'first' was not expanded (e.g. from 'start' to 'running'), which could mean it: * - has no associated resource, * - was a primitive, * - was pre-expanded (e.g. 'running' instead of 'start') * * The third argument here to graph_update_action() is a node which is used under two conditions: * - Interleaving, in which case first->node and * then->node are equal (and NULL) * - If 'then' is a clone, to limit the scope of the * constraint to instances on the supplied node * */ pe_node_t *node = then->node; changed |= graph_update_action(first, then, node, first_flags, then_flags, other, data_set); /* 'first' was for a complex resource (clone, group, etc), * create a new dependency if necessary */ } else if (order_actions(first, then, other->type)) { /* This was the first time 'first' and 'then' were associated, * start again to get the new actions_before list */ changed |= (pe_graph_updated_then | pe_graph_disable); } if (changed & pe_graph_disable) { crm_trace("Disabled constraint %s -> %s in favor of %s -> %s", other->action->uuid, then->uuid, first->uuid, then->uuid); clear_bit(changed, pe_graph_disable); other->type = pe_order_none; } if (changed & pe_graph_updated_first) { GListPtr lpc2 = NULL; crm_trace("Updated %s (first %s %s %s), processing dependents ", first->uuid, is_set(first->flags, pe_action_optional) ? "optional" : "required", is_set(first->flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(first->flags, pe_action_pseudo) ? "pseudo" : first->node ? first->node->details-> uname : ""); for (lpc2 = first->actions_after; lpc2 != NULL; lpc2 = lpc2->next) { pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc2->data; update_action(other->action, data_set); } update_action(first, data_set); } } if (is_set(then->flags, pe_action_requires_any)) { if (last_flags != then->flags) { changed |= pe_graph_updated_then; } else { clear_bit(changed, pe_graph_updated_then); } } if (changed & pe_graph_updated_then) { crm_trace("Updated %s (then %s %s %s), processing dependents ", then->uuid, is_set(then->flags, pe_action_optional) ? "optional" : "required", is_set(then->flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(then->flags, pe_action_pseudo) ? "pseudo" : then->node ? then->node->details-> uname : ""); if (is_set(last_flags, pe_action_runnable) && is_not_set(then->flags, pe_action_runnable)) { update_colo_start_chain(then, data_set); } update_action(then, data_set); for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) { pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data; update_action(other->action, data_set); } } return FALSE; } gboolean shutdown_constraints(pe_node_t * node, pe_action_t * shutdown_op, pe_working_set_t * data_set) { /* add the stop to the before lists so it counts as a pre-req * for the shutdown */ GListPtr lpc = NULL; for (lpc = data_set->actions; lpc != NULL; lpc = lpc->next) { pe_action_t *action = (pe_action_t *) lpc->data; if (action->rsc == NULL || action->node == NULL) { continue; } else if (action->node->details != node->details) { continue; } else if (is_set(action->rsc->flags, pe_rsc_maintenance)) { pe_rsc_trace(action->rsc, "Skipping %s: maintenance mode", action->uuid); continue; } else if (node->details->maintenance) { pe_rsc_trace(action->rsc, "Skipping %s: node %s is in maintenance mode", action->uuid, node->details->uname); continue; } else if (!pcmk__str_eq(action->task, RSC_STOP, pcmk__str_casei)) { continue; } else if (is_not_set(action->rsc->flags, pe_rsc_managed) && is_not_set(action->rsc->flags, pe_rsc_block)) { /* * If another action depends on this one, we may still end up blocking */ pe_rsc_trace(action->rsc, "Skipping %s: unmanaged", action->uuid); continue; } pe_rsc_trace(action->rsc, "Ordering %s before shutdown on %s", action->uuid, node->details->uname); pe__clear_action_flags(action, pe_action_optional); custom_action_order(action->rsc, NULL, action, NULL, strdup(CRM_OP_SHUTDOWN), shutdown_op, pe_order_optional | pe_order_runnable_left, data_set); } return TRUE; } /*! * \internal * \brief Order all actions appropriately relative to a fencing operation * * Ensure start operations of affected resources are ordered after fencing, * imply stop and demote operations of affected resources by marking them as * pseudo-actions, etc. * * \param[in] stonith_op Fencing operation * \param[in,out] data_set Working set of cluster */ void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set) { CRM_CHECK(stonith_op && data_set, return); for (GList *r = data_set->resources; r != NULL; r = r->next) { rsc_stonith_ordering((pe_resource_t *) r->data, stonith_op, data_set); } } static pe_node_t * get_router_node(pe_action_t *action) { pe_node_t *began_on = NULL; pe_node_t *ended_on = NULL; pe_node_t *router_node = NULL; bool partial_migration = FALSE; const char *task = action->task; if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei) || !pe__is_guest_or_remote_node(action->node)) { return NULL; } CRM_ASSERT(action->node->details->remote_rsc != NULL); began_on = pe__current_node(action->node->details->remote_rsc); ended_on = action->node->details->remote_rsc->allocated_to; if (action->node->details->remote_rsc && (action->node->details->remote_rsc->container == NULL) && action->node->details->remote_rsc->partial_migration_target) { partial_migration = TRUE; } /* if there is only one location to choose from, * this is easy. Check for those conditions first */ if (!began_on || !ended_on) { /* remote rsc is either shutting down or starting up */ return began_on ? began_on : ended_on; } else if (began_on->details == ended_on->details) { /* remote rsc didn't move nodes. */ return began_on; } /* If we have get here, we know the remote resource * began on one node and is moving to another node. * * This means some actions will get routed through the cluster * node the connection rsc began on, and others are routed through * the cluster node the connection rsc ends up on. * * 1. stop, demote, migrate actions of resources living in the remote * node _MUST_ occur _BEFORE_ the connection can move (these actions * are all required before the remote rsc stop action can occur.) In * this case, we know these actions have to be routed through the initial * cluster node the connection resource lived on before the move takes place. * The exception is a partial migration of a (non-guest) remote * connection resource; in that case, all actions (even these) will be * ordered after the connection's pseudo-start on the migration target, * so the target is the router node. * * 2. Everything else (start, promote, monitor, probe, refresh, clear failcount * delete ....) must occur after the resource starts on the node it is * moving to. */ if (pcmk__str_eq(task, "notify", pcmk__str_casei)) { task = g_hash_table_lookup(action->meta, "notify_operation"); } /* 1. before connection rsc moves. */ if (pcmk__strcase_any_of(task, "stop", "demote", "migrate_from", "migrate_to", NULL) && !partial_migration) { router_node = began_on; /* 2. after connection rsc moves. */ } else { router_node = ended_on; } return router_node; } /*! * \internal * \brief Add an XML node tag for a specified ID * * \param[in] id Node UUID to add * \param[in,out] xml Parent XML tag to add to */ static xmlNode* add_node_to_xml_by_id(const char *id, xmlNode *xml) { xmlNode *node_xml; node_xml = create_xml_node(xml, XML_CIB_TAG_NODE); crm_xml_add(node_xml, XML_ATTR_UUID, id); return node_xml; } /*! * \internal * \brief Add an XML node tag for a specified node * * \param[in] node Node to add * \param[in,out] xml XML to add node to */ static void add_node_to_xml(const pe_node_t *node, void *xml) { add_node_to_xml_by_id(node->details->id, (xmlNode *) xml); } /*! * \internal * \brief Add XML with nodes that need an update of their maintenance state * * \param[in,out] xml Parent XML tag to add to * \param[in] data_set Working set for cluster */ static int add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set) { GListPtr gIter = NULL; xmlNode *maintenance = xml?create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE):NULL; int count = 0; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; struct pe_node_shared_s *details = node->details; if (!pe__is_guest_or_remote_node(node)) { continue; /* just remote nodes need to know atm */ } if (details->maintenance != details->remote_maintenance) { if (maintenance) { crm_xml_add( add_node_to_xml_by_id(node->details->id, maintenance), XML_NODE_IS_MAINTENANCE, details->maintenance?"1":"0"); } count++; } } crm_trace("%s %d nodes to adjust maintenance-mode " "to transition", maintenance?"Added":"Counted", count); return count; } /*! * \internal * \brief Add pseudo action with nodes needing maintenance state update * * \param[in,out] data_set Working set for cluster */ void add_maintenance_update(pe_working_set_t *data_set) { pe_action_t *action = NULL; if (add_maintenance_nodes(NULL, data_set)) { crm_trace("adding maintenance state update pseudo action"); action = get_pseudo_op(CRM_OP_MAINTENANCE_NODES, data_set); - set_bit(action->flags, pe_action_print_always); + pe__set_action_flags(action, pe_action_print_always); } } /*! * \internal * \brief Add XML with nodes that an action is expected to bring down * * If a specified action is expected to bring any nodes down, add an XML block * with their UUIDs. When a node is lost, this allows the controller to * determine whether it was expected. * * \param[in,out] xml Parent XML tag to add to * \param[in] action Action to check for downed nodes * \param[in] data_set Working set for cluster */ static void add_downed_nodes(xmlNode *xml, const pe_action_t *action, const pe_working_set_t *data_set) { CRM_CHECK(xml && action && action->node && data_set, return); if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { /* Shutdown makes the action's node down */ xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->node->details->id, downed); } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { /* Fencing makes the action's node and any hosted guest nodes down */ const char *fence = g_hash_table_lookup(action->meta, "stonith_action"); if (pcmk__strcase_any_of(fence, "off", "reboot", NULL)) { xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->node->details->id, downed); pe_foreach_guest_node(data_set, action->node, add_node_to_xml, downed); } } else if (action->rsc && action->rsc->is_remote_node && pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) { /* Stopping a remote connection resource makes connected node down, * unless it's part of a migration */ GListPtr iter; pe_action_t *input; gboolean migrating = FALSE; for (iter = action->actions_before; iter != NULL; iter = iter->next) { input = ((pe_action_wrapper_t *) iter->data)->action; if (input->rsc && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_casei) && pcmk__str_eq(input->task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { migrating = TRUE; break; } } if (!migrating) { xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->rsc->id, downed); } } } static bool should_lock_action(pe_action_t *action) { // Only actions taking place on resource's lock node are locked if ((action->rsc->lock_node == NULL) || (action->node == NULL) || (action->node->details != action->rsc->lock_node->details)) { return false; } /* During shutdown, only stops are locked (otherwise, another action such as * a demote would cause the controller to clear the lock) */ if (action->node->details->shutdown && action->task && strcmp(action->task, RSC_STOP)) { return false; } return true; } static xmlNode * action2xml(pe_action_t * action, gboolean as_input, pe_working_set_t *data_set) { gboolean needs_node_info = TRUE; gboolean needs_maintenance_info = FALSE; xmlNode *action_xml = NULL; xmlNode *args_xml = NULL; #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t *rsc_details = NULL; #endif if (action == NULL) { return NULL; } if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { /* All fences need node info; guest node fences are pseudo-events */ action_xml = create_xml_node(NULL, is_set(action->flags, pe_action_pseudo)? XML_GRAPH_TAG_PSEUDO_EVENT : XML_GRAPH_TAG_CRM_EVENT); } else if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); } else if (pcmk__str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT, pcmk__str_casei)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); } else if (pcmk__str_eq(action->task, CRM_OP_LRM_REFRESH, pcmk__str_casei)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { // CIB-only clean-up for shutdown locks action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB); /* } else if(pcmk__str_eq(action->task, RSC_PROBED, pcmk__str_casei)) { */ /* action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); */ } else if (is_set(action->flags, pe_action_pseudo)) { if (pcmk__str_eq(action->task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) { needs_maintenance_info = TRUE; } action_xml = create_xml_node(NULL, XML_GRAPH_TAG_PSEUDO_EVENT); needs_node_info = FALSE; } else { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); #if ENABLE_VERSIONED_ATTRS rsc_details = pe_rsc_action_details(action); #endif } crm_xml_add_int(action_xml, XML_ATTR_ID, action->id); crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task); if (action->rsc != NULL && action->rsc->clone_name != NULL) { char *clone_key = NULL; guint interval_ms; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } if (pcmk__str_eq(action->task, RSC_NOTIFY, pcmk__str_casei)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); CRM_CHECK(n_type != NULL, crm_err("No notify type value found for %s", action->uuid)); CRM_CHECK(n_task != NULL, crm_err("No notify operation value found for %s", action->uuid)); clone_key = pcmk__notify_key(action->rsc->clone_name, n_type, n_task); } else if(action->cancel_task) { clone_key = pcmk__op_key(action->rsc->clone_name, action->cancel_task, interval_ms); } else { clone_key = pcmk__op_key(action->rsc->clone_name, action->task, interval_ms); } CRM_CHECK(clone_key != NULL, crm_err("Could not generate a key for %s", action->uuid)); crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key); crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid); free(clone_key); } else { crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid); } if (needs_node_info && action->node != NULL) { pe_node_t *router_node = get_router_node(action); crm_xml_add(action_xml, XML_LRM_ATTR_TARGET, action->node->details->uname); crm_xml_add(action_xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id); if (router_node) { crm_xml_add(action_xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname); } g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET), strdup(action->node->details->uname)); g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET_UUID), strdup(action->node->details->id)); } /* No details if this action is only being listed in the inputs section */ if (as_input) { return action_xml; } if (action->rsc && is_not_set(action->flags, pe_action_pseudo)) { int lpc = 0; xmlNode *rsc_xml = NULL; const char *attr_list[] = { XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER, XML_ATTR_TYPE }; /* If a resource is locked to a node via shutdown-lock, mark its actions * so the controller can preserve the lock when the action completes. */ if (should_lock_action(action)) { crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, (long long) action->rsc->lock_time); } // List affected resource rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml)); if (is_set(action->rsc->flags, pe_rsc_orphan) && action->rsc->clone_name) { /* Do not use the 'instance free' name here as that * might interfere with the instance we plan to keep. * Ie. if there are more than two named /anonymous/ * instances on a given node, we need to make sure the * command goes to the right one. * * Keep this block, even when everyone is using * 'instance free' anonymous clone names - it means * we'll do the right thing if anyone toggles the * unique flag to 'off' */ crm_debug("Using orphan clone name %s instead of %s", action->rsc->id, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } else if (is_not_set(action->rsc->flags, pe_rsc_unique)) { const char *xml_id = ID(action->rsc->xml); crm_debug("Using anonymous clone name %s for %s (aka. %s)", xml_id, action->rsc->id, action->rsc->clone_name); /* ID is what we'd like client to use * ID_LONG is what they might know it as instead * * ID_LONG is only strictly needed /here/ during the * transition period until all nodes in the cluster * are running the new software /and/ have rebooted * once (meaning that they've only ever spoken to a DC * supporting this feature). * * If anyone toggles the unique flag to 'on', the * 'instance free' name will correspond to an orphan * and fall into the clause above instead */ crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); if (action->rsc->clone_name && !pcmk__str_eq(xml_id, action->rsc->clone_name, pcmk__str_casei)) { crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); } else { crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } } else { CRM_ASSERT(action->rsc->clone_name == NULL); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); } for (lpc = 0; lpc < DIMOF(attr_list); lpc++) { crm_xml_add(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); } } /* List any attributes in effect */ args_xml = create_xml_node(NULL, XML_TAG_ATTRS); crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if (action->rsc != NULL && action->node) { GHashTable *p = crm_str_table_new(); get_rsc_attributes(p, action->rsc, action->node, data_set); g_hash_table_foreach(p, hash2smartfield, args_xml); g_hash_table_destroy(p); #if ENABLE_VERSIONED_ATTRS { xmlNode *versioned_parameters = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS); pe_get_versioned_attributes(versioned_parameters, action->rsc, action->node, data_set); if (xml_has_children(versioned_parameters)) { add_node_copy(action_xml, versioned_parameters); } free_xml(versioned_parameters); } #endif } else if(action->rsc && action->rsc->variant <= pe_native) { g_hash_table_foreach(action->rsc->parameters, hash2smartfield, args_xml); #if ENABLE_VERSIONED_ATTRS if (xml_has_children(action->rsc->versioned_parameters)) { add_node_copy(action_xml, action->rsc->versioned_parameters); } #endif } #if ENABLE_VERSIONED_ATTRS if (rsc_details) { if (xml_has_children(rsc_details->versioned_parameters)) { add_node_copy(action_xml, rsc_details->versioned_parameters); } if (xml_has_children(rsc_details->versioned_meta)) { add_node_copy(action_xml, rsc_details->versioned_meta); } } #endif g_hash_table_foreach(action->meta, hash2metafield, args_xml); if (action->rsc != NULL) { const char *value = g_hash_table_lookup(action->rsc->meta, "external-ip"); pe_resource_t *parent = action->rsc; while (parent != NULL) { parent->cmds->append_meta(parent, args_xml); parent = parent->parent; } if(value) { hash2smartfield((gpointer)"pcmk_external_ip", (gpointer)value, (gpointer)args_xml); } if (action->node && /* make clang analyzer happy */ pe__is_guest_node(action->node)) { pe_node_t *host = NULL; enum action_tasks task = text2task(action->task); if(task == action_notify || task == action_notified) { const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); task = text2task(n_task); } // Differentiate between up and down actions switch (task) { case stop_rsc: case stopped_rsc: case action_demote: case action_demoted: host = pe__current_node(action->node->details->remote_rsc->container); break; case start_rsc: case started_rsc: case monitor_rsc: case action_promote: case action_promoted: host = action->node->details->remote_rsc->container->allocated_to; break; default: break; } if(host) { hash2metafield((gpointer)XML_RSC_ATTR_TARGET, (gpointer)g_hash_table_lookup(action->rsc->meta, XML_RSC_ATTR_TARGET), (gpointer)args_xml); hash2metafield((gpointer) PCMK__ENV_PHYSICAL_HOST, (gpointer)host->details->uname, (gpointer)args_xml); } } } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei) && action->node) { /* Pass the node's attributes as meta-attributes. * * @TODO: Determine whether it is still necessary to do this. It was * added in 33d99707, probably for the libfence-based implementation in * c9a90bd, which is no longer used. */ g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml); } sorted_xml(args_xml, action_xml, FALSE); free_xml(args_xml); /* List any nodes this action is expected to make down */ if (needs_node_info && (action->node != NULL)) { add_downed_nodes(action_xml, action, data_set); } if (needs_maintenance_info) { add_maintenance_nodes(action_xml, data_set); } crm_log_xml_trace(action_xml, "dumped action"); return action_xml; } static bool should_dump_action(pe_action_t *action) { CRM_CHECK(action != NULL, return false); if (is_set(action->flags, pe_action_dumped)) { crm_trace("Action %s (%d) already dumped", action->uuid, action->id); return false; } else if (is_set(action->flags, pe_action_pseudo) && pcmk__str_eq(action->task, CRM_OP_PROBED, pcmk__str_casei)) { GListPtr lpc = NULL; /* This is a horrible but convenient hack * * It mimimizes the number of actions with unsatisfied inputs * (i.e. not included in the graph) * * This in turn, means we can be more concise when printing * aborted/incomplete graphs. * * It also makes it obvious which node is preventing * probe_complete from running (presumably because it is only * partially up) * * For these reasons we tolerate such perversions */ for (lpc = action->actions_after; lpc != NULL; lpc = lpc->next) { pe_action_wrapper_t *wrapper = (pe_action_wrapper_t *) lpc->data; if (is_not_set(wrapper->action->flags, pe_action_runnable)) { /* Only interested in runnable operations */ } else if (!pcmk__str_eq(wrapper->action->task, RSC_START, pcmk__str_casei)) { /* Only interested in start operations */ } else if (is_set(wrapper->action->flags, pe_action_dumped) || should_dump_action(wrapper->action)) { crm_trace("Action %s (%d) should be dumped: " "dependency of %s (%d)", action->uuid, action->id, wrapper->action->uuid, wrapper->action->id); return true; } } } if (is_not_set(action->flags, pe_action_runnable)) { crm_trace("Ignoring action %s (%d): unrunnable", action->uuid, action->id); return false; } else if (is_set(action->flags, pe_action_optional) && is_not_set(action->flags, pe_action_print_always)) { crm_trace("Ignoring action %s (%d): optional", action->uuid, action->id); return false; // Monitors should be dumped even for unmanaged resources } else if (action->rsc && is_not_set(action->rsc->flags, pe_rsc_managed) && !pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_casei)) { const char *interval_ms_s = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS); // Cancellation of recurring monitors should still be dumped if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) { crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)", action->uuid, action->id, action->rsc->id); return false; } } if (is_set(action->flags, pe_action_pseudo) || pcmk__strcase_any_of(action->task, CRM_OP_FENCE, CRM_OP_SHUTDOWN, NULL)) { /* skip the next checks */ return true; } if (action->node == NULL) { pe_err("Skipping action %s (%d) " "because it was not allocated to a node (bug?)", action->uuid, action->id); log_action(LOG_DEBUG, "Unallocated action", action, false); return false; } else if (is_set(action->flags, pe_action_dc)) { crm_trace("Action %s (%d) should be dumped: " "can run on DC instead of %s", action->uuid, action->id, action->node->details->uname); } else if (pe__is_guest_node(action->node) && !action->node->details->remote_requires_reset) { crm_trace("Action %s (%d) should be dumped: " "assuming will be runnable on guest node %s", action->uuid, action->id, action->node->details->uname); } else if (action->node->details->online == false) { pe_err("Skipping action %s (%d) " "because it was scheduled for offline node (bug?)", action->uuid, action->id); log_action(LOG_DEBUG, "Action for offline node", action, FALSE); return false; #if 0 /* but this would also affect resources that can be safely * migrated before a fencing op */ } else if (action->node->details->unclean == false) { pe_err("Skipping action %s (%d) " "because it was scheduled for unclean node (bug?)", action->uuid, action->id); log_action(LOG_DEBUG, "Action for unclean node", action, false); return false; #endif } return true; } /* lowest to highest */ static gint sort_action_id(gconstpointer a, gconstpointer b) { const pe_action_wrapper_t *action_wrapper2 = (const pe_action_wrapper_t *)a; const pe_action_wrapper_t *action_wrapper1 = (const pe_action_wrapper_t *)b; if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (action_wrapper1->action->id > action_wrapper2->action->id) { return -1; } if (action_wrapper1->action->id < action_wrapper2->action->id) { return 1; } return 0; } /*! * \internal * \brief Check whether an action input should be in the transition graph * * \param[in] action Action to check * \param[in,out] input Action input to check * * \return true if input should be in graph, false otherwise * \note This function may not only check an input, but disable it under certian * circumstances (load or anti-colocation orderings that are not needed). */ static bool check_dump_input(pe_action_t *action, pe_action_wrapper_t *input) { int type = input->type; if (input->state == pe_link_dumped) { return true; } type &= ~pe_order_implies_first_printed; type &= ~pe_order_implies_then_printed; type &= ~pe_order_optional; if (input->type == pe_order_none) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering disabled", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (is_not_set(input->action->flags, pe_action_runnable) && (type == pe_order_none) && !pcmk__str_eq(input->action->uuid, CRM_OP_PROBED, pcmk__str_casei)) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (is_not_set(input->action->flags, pe_action_runnable) && is_set(input->type, pe_order_one_or_more)) { crm_trace("Ignoring %s (%d) input %s (%d): " "one-or-more and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (is_set(action->flags, pe_action_pseudo) && is_set(input->type, pe_order_stonith_stop)) { crm_trace("Ignoring %s (%d) input %s (%d): " "stonith stop but action is pseudo", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (is_set(input->type, pe_order_implies_first_migratable) && is_not_set(input->action->flags, pe_action_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "implies input migratable but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (is_set(input->type, pe_order_apply_first_non_migratable) && is_set(input->action->flags, pe_action_migrate_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "only if input unmigratable but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if ((input->type == pe_order_optional) && is_set(input->action->flags, pe_action_migrate_runnable) && pcmk__ends_with(input->action->uuid, "_stop_0")) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional but stop in migration", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (input->type == pe_order_load) { pe_node_t *input_node = input->action->node; // load orderings are relevant only if actions are for same node if (action->rsc && pcmk__str_eq(action->task, RSC_MIGRATE, pcmk__str_casei)) { pe_node_t *allocated = action->rsc->allocated_to; /* For load_stopped -> migrate_to orderings, we care about where it * has been allocated to, not where it will be executed. */ if ((input_node == NULL) || (allocated == NULL) || (input_node->details != allocated->details)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, (allocated? allocated->details->uname : ""), (input_node? input_node->details->uname : "")); input->type = pe_order_none; return false; } } else if ((input_node == NULL) || (action->node == NULL) || (input_node->details != action->node->details)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, (action->node? action->node->details->uname : ""), (input_node? input_node->details->uname : "")); input->type = pe_order_none; return false; } else if (is_set(input->action->flags, pe_action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering input optional", action->uuid, action->id, input->action->uuid, input->action->id); input->type = pe_order_none; return false; } } else if (input->type == pe_order_anti_colocation) { if (input->action->node && action->node && (input->action->node->details != action->node->details)) { crm_trace("Ignoring %s (%d) input %s (%d): " "anti-colocation node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, action->node->details->uname, input->action->node->details->uname); input->type = pe_order_none; return false; } else if (is_set(input->action->flags, pe_action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "anti-colocation input optional", action->uuid, action->id, input->action->uuid, input->action->id); input->type = pe_order_none; return false; } } else if (input->action->rsc && input->action->rsc != action->rsc && is_set(input->action->rsc->flags, pe_rsc_failed) && is_not_set(input->action->rsc->flags, pe_rsc_managed) && pcmk__ends_with(input->action->uuid, "_stop_0") && action->rsc && pe_rsc_is_clone(action->rsc)) { crm_warn("Ignoring requirement that %s complete before %s:" " unmanaged failed resources cannot prevent clone shutdown", input->action->uuid, action->uuid); return false; } else if (is_set(input->action->flags, pe_action_optional) && is_not_set(input->action->flags, pe_action_print_always) && is_not_set(input->action->flags, pe_action_dumped) && !should_dump_action(input->action)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input optional", action->uuid, action->id, input->action->uuid, input->action->id); return false; } crm_trace("%s (%d) input %s (%d) @ %s should be dumped: %s, %s, %s, 0x%.6x", action->uuid, action->id, input->action->uuid, input->action->id, input->action->node? input->action->node->details->uname : "no node", is_set(input->action->flags, pe_action_pseudo)? "pseudo" : "real", is_set(input->action->flags, pe_action_runnable)? "runnable" : "unrunnable", is_set(input->action->flags, pe_action_optional)? "optional" : "required", input->type); return true; } static bool graph_has_loop(pe_action_t *init_action, pe_action_t *action, pe_action_wrapper_t *input) { bool has_loop = false; if (is_set(input->action->flags, pe_action_tracking)) { crm_trace("Breaking tracking loop: %s@%s -> %s@%s (0x%.6x)", input->action->uuid, input->action->node? input->action->node->details->uname : "", action->uuid, action->node? action->node->details->uname : "", input->type); return false; } // Don't need to check inputs that won't be used if (!check_dump_input(action, input)) { return false; } if (input->action == init_action) { crm_debug("Input loop found in %s@%s ->...-> %s@%s", action->uuid, action->node? action->node->details->uname : "", init_action->uuid, init_action->node? init_action->node->details->uname : ""); return true; } - set_bit(input->action->flags, pe_action_tracking); + pe__set_action_flags(input->action, pe_action_tracking); crm_trace("Checking inputs of action %s@%s input %s@%s (0x%.6x)" "for graph loop with %s@%s ", action->uuid, action->node? action->node->details->uname : "", input->action->uuid, input->action->node? input->action->node->details->uname : "", input->type, init_action->uuid, init_action->node? init_action->node->details->uname : ""); // Recursively check input itself for loops for (GList *iter = input->action->actions_before; iter != NULL; iter = iter->next) { if (graph_has_loop(init_action, input->action, (pe_action_wrapper_t *) iter->data)) { // Recursive call already logged a debug message has_loop = true; goto done; } } done: pe__clear_action_flags(input->action, pe_action_tracking); if (!has_loop) { crm_trace("No input loop found in %s@%s -> %s@%s (0x%.6x)", input->action->uuid, input->action->node? input->action->node->details->uname : "", action->uuid, action->node? action->node->details->uname : "", input->type); } return has_loop; } bool pcmk__ordering_is_invalid(pe_action_t *action, pe_action_wrapper_t *input) { /* Prevent user-defined ordering constraints between resources * running in a guest node and the resource that defines that node. */ if (is_not_set(input->type, pe_order_preserve) && action->rsc && action->rsc->fillers && input->action->rsc && input->action->node && input->action->node->details->remote_rsc && (input->action->node->details->remote_rsc->container == action->rsc)) { crm_warn("Invalid ordering constraint between %s and %s", input->action->rsc->id, action->rsc->id); return true; } /* If there's an order like * "rscB_stop node2"-> "load_stopped_node2" -> "rscA_migrate_to node1" * * then rscA is being migrated from node1 to node2, while rscB is being * migrated from node2 to node1. If there would be a graph loop, * break the order "load_stopped_node2" -> "rscA_migrate_to node1". */ if ((input->type == pe_order_load) && action->rsc && pcmk__str_eq(action->task, RSC_MIGRATE, pcmk__str_casei) && graph_has_loop(action, action, input)) { return true; } return false; } // Remove duplicate inputs (regardless of flags) static void deduplicate_inputs(pe_action_t *action) { GList *item = NULL; GList *next = NULL; pe_action_wrapper_t *last_input = NULL; action->actions_before = g_list_sort(action->actions_before, sort_action_id); for (item = action->actions_before; item != NULL; item = next) { pe_action_wrapper_t *input = (pe_action_wrapper_t *) item->data; next = item->next; if (last_input && (input->action->id == last_input->action->id)) { crm_trace("Input %s (%d) duplicate skipped for action %s (%d)", input->action->uuid, input->action->id, action->uuid, action->id); /* For the purposes of scheduling, the ordering flags no longer * matter, but crm_simulate looks at certain ones when creating a * dot graph. Combining the flags is sufficient for that purpose. */ last_input->type |= input->type; if (input->state == pe_link_dumped) { last_input->state = pe_link_dumped; } free(item->data); action->actions_before = g_list_delete_link(action->actions_before, item); } else { last_input = input; input->state = pe_link_not_dumped; } } } /*! * \internal * \brief Add an action to the transition graph XML if appropriate * * \param[in] action Action to possibly add * \param[in] data_set Cluster working set * * \note This will de-duplicate the action inputs, meaning that the * pe_action_wrapper_t:type flags can no longer be relied on to retain * their original settings. That means this MUST be called after stage7() * is complete, and nothing after this should rely on those type flags. * (For example, some code looks for type equal to some flag rather than * whether the flag is set, and some code looks for particular * combinations of flags -- such code must be done before stage8().) */ void graph_element_from_action(pe_action_t *action, pe_working_set_t *data_set) { GList *lpc = NULL; int synapse_priority = 0; xmlNode *syn = NULL; xmlNode *set = NULL; xmlNode *in = NULL; xmlNode *xml_action = NULL; pe_action_wrapper_t *input = NULL; /* If we haven't already, de-duplicate inputs -- even if we won't be dumping * the action, so that crm_simulate dot graphs don't have duplicates. */ if (is_not_set(action->flags, pe_action_dedup)) { deduplicate_inputs(action); - set_bit(action->flags, pe_action_dedup); + pe__set_action_flags(action, pe_action_dedup); } if (should_dump_action(action) == FALSE) { return; } - set_bit(action->flags, pe_action_dumped); + pe__set_action_flags(action, pe_action_dumped); syn = create_xml_node(data_set->graph, "synapse"); set = create_xml_node(syn, "action_set"); in = create_xml_node(syn, "inputs"); crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse); data_set->num_synapse++; if (action->rsc != NULL) { synapse_priority = action->rsc->priority; } if (action->priority > synapse_priority) { synapse_priority = action->priority; } if (synapse_priority > 0) { crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); } xml_action = action2xml(action, FALSE, data_set); add_node_nocopy(set, crm_element_name(xml_action), xml_action); for (lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { input = (pe_action_wrapper_t *) lpc->data; if (check_dump_input(action, input)) { xmlNode *input_xml = create_xml_node(in, "trigger"); input->state = pe_link_dumped; xml_action = action2xml(input->action, TRUE, data_set); add_node_nocopy(input_xml, crm_element_name(xml_action), xml_action); } } } diff --git a/lib/pacemaker/pcmk_sched_group.c b/lib/pacemaker/pcmk_sched_group.c index f6c14d2130..3c0be169c9 100644 --- a/lib/pacemaker/pcmk_sched_group.c +++ b/lib/pacemaker/pcmk_sched_group.c @@ -1,532 +1,533 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #define VARIANT_GROUP 1 #include pe_node_t * pcmk__group_allocate(pe_resource_t *rsc, pe_node_t *prefer, pe_working_set_t *data_set) { pe_node_t *node = NULL; pe_node_t *group_node = NULL; GListPtr gIter = NULL; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if (is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } if (group_data->first_child == NULL) { // Nothing to allocate clear_bit(rsc->flags, pe_rsc_provisional); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); rsc->role = group_data->first_child->role; group_data->first_child->rsc_cons = g_list_concat(group_data->first_child->rsc_cons, rsc->rsc_cons); rsc->rsc_cons = NULL; group_data->last_child->rsc_cons_lhs = g_list_concat(group_data->last_child->rsc_cons_lhs, rsc->rsc_cons_lhs); rsc->rsc_cons_lhs = NULL; pe__show_node_weights(!show_scores, rsc, __FUNCTION__, rsc->allowed_nodes); gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; pe_rsc_trace(rsc, "Allocating group %s member %s", rsc->id, child_rsc->id); node = child_rsc->cmds->allocate(child_rsc, prefer, data_set); if (group_node == NULL) { group_node = node; } } rsc->next_role = group_data->first_child->next_role; clear_bit(rsc->flags, pe_rsc_allocating); clear_bit(rsc->flags, pe_rsc_provisional); if (group_data->colocated) { return group_node; } return NULL; } void group_update_pseudo_status(pe_resource_t * parent, pe_resource_t * child); void group_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set) { pe_action_t *op = NULL; const char *value = NULL; GListPtr gIter = rsc->children; pe_rsc_trace(rsc, "Creating actions for %s", rsc->id); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->create_actions(child_rsc, data_set); group_update_pseudo_status(rsc, child_rsc); } op = start_action(rsc, NULL, TRUE /* !group_data->child_starting */ ); - set_bit(op->flags, pe_action_pseudo | pe_action_runnable); + pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable); op = custom_action(rsc, started_key(rsc), RSC_STARTED, NULL, TRUE /* !group_data->child_starting */ , TRUE, data_set); - set_bit(op->flags, pe_action_pseudo | pe_action_runnable); + pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable); op = stop_action(rsc, NULL, TRUE /* !group_data->child_stopping */ ); - set_bit(op->flags, pe_action_pseudo | pe_action_runnable); + pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable); op = custom_action(rsc, stopped_key(rsc), RSC_STOPPED, NULL, TRUE /* !group_data->child_stopping */ , TRUE, data_set); - set_bit(op->flags, pe_action_pseudo | pe_action_runnable); + pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable); value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROMOTABLE); if (crm_is_true(value)) { op = custom_action(rsc, demote_key(rsc), RSC_DEMOTE, NULL, TRUE, TRUE, data_set); - set_bit(op->flags, pe_action_pseudo); - set_bit(op->flags, pe_action_runnable); + pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable); + op = custom_action(rsc, demoted_key(rsc), RSC_DEMOTED, NULL, TRUE, TRUE, data_set); - set_bit(op->flags, pe_action_pseudo); - set_bit(op->flags, pe_action_runnable); + pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable); op = custom_action(rsc, promote_key(rsc), RSC_PROMOTE, NULL, TRUE, TRUE, data_set); - set_bit(op->flags, pe_action_pseudo); - set_bit(op->flags, pe_action_runnable); + pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable); + op = custom_action(rsc, promoted_key(rsc), RSC_PROMOTED, NULL, TRUE, TRUE, data_set); - set_bit(op->flags, pe_action_pseudo); - set_bit(op->flags, pe_action_runnable); + pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable); } } void group_update_pseudo_status(pe_resource_t * parent, pe_resource_t * child) { GListPtr gIter = child->actions; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, parent); if (group_data->ordered == FALSE) { /* If this group is not ordered, then leave the meta-actions as optional */ return; } if (group_data->child_stopping && group_data->child_starting) { return; } for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (is_set(action->flags, pe_action_optional)) { continue; } if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei) && is_set(action->flags, pe_action_runnable)) { group_data->child_stopping = TRUE; pe_rsc_trace(action->rsc, "Based on %s the group is stopping", action->uuid); } else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei) && is_set(action->flags, pe_action_runnable)) { group_data->child_starting = TRUE; pe_rsc_trace(action->rsc, "Based on %s the group is starting", action->uuid); } } } void group_internal_constraints(pe_resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = rsc->children; pe_resource_t *last_rsc = NULL; pe_resource_t *last_active = NULL; pe_resource_t *top = uber_parent(rsc); group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set); new_rsc_order(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); new_rsc_order(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; int stop = pe_order_none; int stopped = pe_order_implies_then_printed; int start = pe_order_implies_then | pe_order_runnable_left; int started = pe_order_runnable_left | pe_order_implies_then | pe_order_implies_then_printed; child_rsc->cmds->internal_constraints(child_rsc, data_set); if (last_rsc == NULL) { if (group_data->ordered) { stop |= pe_order_optional; stopped = pe_order_implies_then; } } else if (group_data->colocated) { rsc_colocation_new("group:internal_colocation", NULL, INFINITY, child_rsc, last_rsc, NULL, NULL, data_set); } if (is_set(top->flags, pe_rsc_promotable)) { new_rsc_order(rsc, RSC_DEMOTE, child_rsc, RSC_DEMOTE, stop | pe_order_implies_first_printed, data_set); new_rsc_order(child_rsc, RSC_DEMOTE, rsc, RSC_DEMOTED, stopped, data_set); new_rsc_order(child_rsc, RSC_PROMOTE, rsc, RSC_PROMOTED, started, data_set); new_rsc_order(rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, pe_order_implies_first_printed, data_set); } order_start_start(rsc, child_rsc, pe_order_implies_first_printed); order_stop_stop(rsc, child_rsc, stop | pe_order_implies_first_printed); new_rsc_order(child_rsc, RSC_STOP, rsc, RSC_STOPPED, stopped, data_set); new_rsc_order(child_rsc, RSC_START, rsc, RSC_STARTED, started, data_set); if (group_data->ordered == FALSE) { order_start_start(rsc, child_rsc, start | pe_order_implies_first_printed); if (is_set(top->flags, pe_rsc_promotable)) { new_rsc_order(rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, start | pe_order_implies_first_printed, data_set); } } else if (last_rsc != NULL) { child_rsc->restart_type = pe_restart_restart; order_start_start(last_rsc, child_rsc, start); order_stop_stop(child_rsc, last_rsc, pe_order_optional | pe_order_restart); if (is_set(top->flags, pe_rsc_promotable)) { new_rsc_order(last_rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, start, data_set); new_rsc_order(child_rsc, RSC_DEMOTE, last_rsc, RSC_DEMOTE, pe_order_optional, data_set); } } else { /* If anyone in the group is starting, then * pe_order_implies_then will cause _everyone_ in the group * to be sent a start action * But this is safe since starting something that is already * started is required to be "safe" */ int flags = pe_order_none; order_start_start(rsc, child_rsc, flags); if (is_set(top->flags, pe_rsc_promotable)) { new_rsc_order(rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, flags, data_set); } } /* Look for partially active groups * Make sure they still shut down in sequence */ if (child_rsc->running_on) { if (group_data->ordered && last_rsc && last_rsc->running_on == NULL && last_active && last_active->running_on) { order_stop_stop(child_rsc, last_active, pe_order_optional); } last_active = child_rsc; } last_rsc = child_rsc; } if (group_data->ordered && last_rsc != NULL) { int stop_stop_flags = pe_order_implies_then; int stop_stopped_flags = pe_order_optional; order_stop_stop(rsc, last_rsc, stop_stop_flags); new_rsc_order(last_rsc, RSC_STOP, rsc, RSC_STOPPED, stop_stopped_flags, data_set); if (is_set(top->flags, pe_rsc_promotable)) { new_rsc_order(rsc, RSC_DEMOTE, last_rsc, RSC_DEMOTE, stop_stop_flags, data_set); new_rsc_order(last_rsc, RSC_DEMOTE, rsc, RSC_DEMOTED, stop_stopped_flags, data_set); } } } void group_rsc_colocation_lh(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, rsc_colocation_t *constraint, pe_working_set_t *data_set) { GListPtr gIter = NULL; group_variant_data_t *group_data = NULL; if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if (rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } if (constraint->score == 0) { return; } gIter = rsc_lh->children; pe_rsc_trace(rsc_lh, "Processing constraints from %s", rsc_lh->id); get_group_variant_data(group_data, rsc_lh); if (group_data->colocated) { group_data->first_child->cmds->rsc_colocation_lh(group_data->first_child, rsc_rh, constraint, data_set); return; } else if (constraint->score >= INFINITY) { pcmk__config_err("%s: Cannot perform mandatory colocation " "between non-colocated group and %s", rsc_lh->id, rsc_rh->id); return; } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_colocation_lh(child_rsc, rsc_rh, constraint, data_set); } } void group_rsc_colocation_rh(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, rsc_colocation_t *constraint, pe_working_set_t *data_set) { GListPtr gIter = rsc_rh->children; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc_rh); CRM_CHECK(rsc_lh->variant == pe_native, return); if (constraint->score == 0) { return; } pe_rsc_trace(rsc_rh, "Processing RH %s of constraint %s (LH is %s)", rsc_rh->id, constraint->id, rsc_lh->id); if (is_set(rsc_rh->flags, pe_rsc_provisional)) { return; } else if (group_data->colocated && group_data->first_child) { if (constraint->score >= INFINITY) { /* Ensure RHS is _fully_ up before can start LHS */ group_data->last_child->cmds->rsc_colocation_rh(rsc_lh, group_data->last_child, constraint, data_set); } else { /* A partially active RHS is fine */ group_data->first_child->cmds->rsc_colocation_rh(rsc_lh, group_data->first_child, constraint, data_set); } return; } else if (constraint->score >= INFINITY) { pcmk__config_err("%s: Cannot perform mandatory colocation with" " non-colocated group %s", rsc_lh->id, rsc_rh->id); return; } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint, data_set); } } enum pe_action_flags group_action_flags(pe_action_t * action, pe_node_t * node) { GListPtr gIter = NULL; enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); for (gIter = action->rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; enum action_tasks task = get_complex_task(child, action->task, TRUE); const char *task_s = task2text(task); pe_action_t *child_action = find_first_action(child->actions, NULL, task_s, node); if (child_action) { enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); if (is_set(flags, pe_action_optional) && is_set(child_flags, pe_action_optional) == FALSE) { pe_rsc_trace(action->rsc, "%s is mandatory because of %s", action->uuid, child_action->uuid); - clear_bit(flags, pe_action_optional); + pe__clear_raw_action_flags(flags, "group action", + pe_action_optional); pe__clear_action_flags(action, pe_action_optional); } if (!pcmk__str_eq(task_s, action->task, pcmk__str_casei) && is_set(flags, pe_action_runnable) && is_set(child_flags, pe_action_runnable) == FALSE) { pe_rsc_trace(action->rsc, "%s is not runnable because of %s", action->uuid, child_action->uuid); - clear_bit(flags, pe_action_runnable); + pe__clear_raw_action_flags(flags, "group action", + pe_action_runnable); pe__clear_action_flags(action, pe_action_runnable); } } else if (task != stop_rsc && task != action_demote) { pe_rsc_trace(action->rsc, "%s is not runnable because of %s (not found in %s)", action->uuid, task_s, child->id); - clear_bit(flags, pe_action_runnable); + pe__clear_raw_action_flags(flags, "group action", + pe_action_runnable); } } return flags; } enum pe_graph_flags group_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set) { GListPtr gIter = then->rsc->children; enum pe_graph_flags changed = pe_graph_none; CRM_ASSERT(then->rsc != NULL); changed |= native_update_actions(first, then, node, flags, filter, type, data_set); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; pe_action_t *child_action = find_first_action(child->actions, NULL, then->task, node); if (child_action) { changed |= child->cmds->update_actions(first, child_action, node, flags, filter, type, data_set); } } return changed; } void group_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { GListPtr gIter = rsc->children; GListPtr saved = constraint->node_list_rh; GListPtr zero = pcmk__copy_node_list(constraint->node_list_rh, true); gboolean reset_scores = TRUE; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); pe_rsc_debug(rsc, "Processing rsc_location %s for %s", constraint->id, rsc->id); native_rsc_location(rsc, constraint); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_location(child_rsc, constraint); if (group_data->colocated && reset_scores) { reset_scores = FALSE; constraint->node_list_rh = zero; } } constraint->node_list_rh = saved; g_list_free_full(zero, free); } void group_expand(pe_resource_t * rsc, pe_working_set_t * data_set) { CRM_CHECK(rsc != NULL, return); pe_rsc_trace(rsc, "Processing actions from %s", rsc->id); native_expand(rsc, data_set); for (GListPtr gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } } GHashTable * pcmk__group_merge_weights(pe_resource_t *rsc, const char *rhs, GHashTable *nodes, const char *attr, float factor, uint32_t flags) { GListPtr gIter = rsc->rsc_cons_lhs; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); if (is_set(rsc->flags, pe_rsc_merging)) { pe_rsc_info(rsc, "Breaking dependency loop with %s at %s", rsc->id, rhs); return nodes; } set_bit(rsc->flags, pe_rsc_merging); nodes = group_data->first_child->cmds->merge_weights(group_data->first_child, rhs, nodes, attr, factor, flags); for (; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score == 0) { continue; } nodes = pcmk__native_merge_weights(constraint->rsc_lh, rsc->id, nodes, constraint->node_attribute, constraint->score / (float) INFINITY, flags); } clear_bit(rsc->flags, pe_rsc_merging); return nodes; } void group_append_meta(pe_resource_t * rsc, xmlNode * xml) { } diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c index b87bccf254..c26d0bd601 100644 --- a/lib/pacemaker/pcmk_sched_native.c +++ b/lib/pacemaker/pcmk_sched_native.c @@ -1,3463 +1,3463 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // The controller removes the resource from the CIB, making this redundant // #define DELETE_THEN_REFRESH 1 #define INFINITY_HACK (INFINITY * -100) #define VARIANT_NATIVE 1 #include static void Recurring(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, pe_working_set_t *data_set); static void RecurringOp(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, xmlNode *operation, pe_working_set_t *data_set); static void Recurring_Stopped(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, pe_working_set_t *data_set); static void RecurringOp_Stopped(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, xmlNode *operation, pe_working_set_t *data_set); void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set); gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set); gboolean StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean DemoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean RoleError(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean NullOp(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); /* This array says what the *next* role should be when transitioning from one * role to another. For example going from Stopped to Master, the next role is * RSC_ROLE_SLAVE, because the resource must be started before being promoted. * The current state then becomes Started, which is fed into this array again, * giving a next role of RSC_ROLE_MASTER. */ static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current state Next state*/ /* Unknown Stopped Started Slave Master */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, }; typedef gboolean (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *next, gboolean optional, pe_working_set_t *data_set); // This array picks the function needed to transition from one role to another static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current state Next state */ /* Unknown Stopped Started Slave Master */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Slave */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, }, /* Master */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp , }, }; static gboolean native_choose_node(pe_resource_t * rsc, pe_node_t * prefer, pe_working_set_t * data_set) { GListPtr nodes = NULL; pe_node_t *chosen = NULL; pe_node_t *best = NULL; int multiple = 1; int length = 0; gboolean result = FALSE; process_utilization(rsc, &prefer, data_set); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to ? TRUE : FALSE; } // Sort allowed nodes by weight if (rsc->allowed_nodes) { length = g_hash_table_size(rsc->allowed_nodes); } if (length > 0) { nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = sort_nodes_by_weight(nodes, pe__current_node(rsc), data_set); // First node in sorted list has the best score best = g_list_nth_data(nodes, 0); } if (prefer && nodes) { chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (chosen == NULL) { pe_rsc_trace(rsc, "Preferred node %s for %s was unknown", prefer->details->uname, rsc->id); /* Favor the preferred node as long as its weight is at least as good as * the best allowed node's. * * An alternative would be to favor the preferred node even if the best * node is better, when the best node's weight is less than INFINITY. */ } else if ((chosen->weight < 0) || (chosen->weight < best->weight)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable", chosen->details->uname, rsc->id); chosen = NULL; } else if (!can_run_resources(chosen)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable", chosen->details->uname, rsc->id); chosen = NULL; } else { pe_rsc_trace(rsc, "Chose preferred node %s for %s (ignoring %d candidates)", chosen->details->uname, rsc->id, length); } } if ((chosen == NULL) && nodes) { /* Either there is no preferred node, or the preferred node is not * available, but there are other nodes allowed to run the resource. */ chosen = best; pe_rsc_trace(rsc, "Chose node %s for %s from %d candidates", chosen ? chosen->details->uname : "", rsc->id, length); if (!pe_rsc_is_unique_clone(rsc->parent) && chosen && (chosen->weight > 0) && can_run_resources(chosen)) { /* If the resource is already running on a node, prefer that node if * it is just as good as the chosen node. * * We don't do this for unique clone instances, because * distribute_children() has already assigned instances to their * running nodes when appropriate, and if we get here, we don't want * remaining unallocated instances to prefer a node that's already * running another instance. */ pe_node_t *running = pe__current_node(rsc); if (running && (can_run_resources(running) == FALSE)) { pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", rsc->id, running->details->uname); } else if (running) { for (GList *iter = nodes->next; iter; iter = iter->next) { pe_node_t *tmp = (pe_node_t *) iter->data; if (tmp->weight != chosen->weight) { // The nodes are sorted by weight, so no more are equal break; } if (tmp->details == running->details) { // Scores are equal, so prefer the current node chosen = tmp; } multiple++; } } } } if (multiple > 1) { static char score[33]; int log_level = (chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO; score2char_stack(chosen->weight, score, sizeof(score)); do_crm_log(log_level, "Chose node %s for %s from %d nodes with score %s", chosen->details->uname, rsc->id, multiple, score); } result = native_assign_node(rsc, nodes, chosen, FALSE); g_list_free(nodes); return result; } /*! * \internal * \brief Find score of highest-scored node that matches colocation attribute * * \param[in] rsc Resource whose allowed nodes should be searched * \param[in] attr Colocation attribute name (must not be NULL) * \param[in] value Colocation attribute value to require */ static int best_node_score_matching_attr(const pe_resource_t *rsc, const char *attr, const char *value) { GHashTableIter iter; pe_node_t *node = NULL; int best_score = -INFINITY; const char *best_node = NULL; // Find best allowed node with matching attribute g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if ((node->weight > best_score) && can_run_resources(node) && pcmk__str_eq(value, pe_node_attribute_raw(node, attr), pcmk__str_casei)) { best_score = node->weight; best_node = node->details->uname; } } if (!pcmk__str_eq(attr, CRM_ATTR_UNAME, pcmk__str_casei)) { if (best_node == NULL) { crm_info("No allowed node for %s matches node attribute %s=%s", rsc->id, attr, value); } else { crm_info("Allowed node %s for %s had best score (%d) " "of those matching node attribute %s=%s", best_node, rsc->id, best_score, attr, value); } } return best_score; } /*! * \internal * \brief Add resource's colocation matches to current node allocation scores * * For each node in a given table, if any of a given resource's allowed nodes * have a matching value for the colocation attribute, add the highest of those * nodes' scores to the node's score. * * \param[in,out] nodes Hash table of nodes with allocation scores so far * \param[in] rsc Resource whose allowed nodes should be compared * \param[in] attr Colocation attribute that must match (NULL for default) * \param[in] factor Factor by which to multiply scores being added * \param[in] only_positive Whether to add only positive scores */ static void add_node_scores_matching_attr(GHashTable *nodes, const pe_resource_t *rsc, const char *attr, float factor, bool only_positive) { GHashTableIter iter; pe_node_t *node = NULL; if (attr == NULL) { attr = CRM_ATTR_UNAME; } // Iterate through each node g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { float weight_f = 0; int weight = 0; int score = 0; int new_score = 0; score = best_node_score_matching_attr(rsc, attr, pe_node_attribute_raw(node, attr)); if ((factor < 0) && (score < 0)) { /* Negative preference for a node with a negative score * should not become a positive preference. * * @TODO Consider filtering only if weight is -INFINITY */ crm_trace("%s: Filtering %d + %f * %d (double negative disallowed)", node->details->uname, node->weight, factor, score); continue; } if (node->weight == INFINITY_HACK) { crm_trace("%s: Filtering %d + %f * %d (node was marked unusable)", node->details->uname, node->weight, factor, score); continue; } weight_f = factor * score; // Round the number; see http://c-faq.com/fp/round.html weight = (int) ((weight_f < 0)? (weight_f - 0.5) : (weight_f + 0.5)); /* Small factors can obliterate the small scores that are often actually * used in configurations. If the score and factor are nonzero, ensure * that the result is nonzero as well. */ if ((weight == 0) && (score != 0)) { if (factor > 0.0) { weight = 1; } else if (factor < 0.0) { weight = -1; } } new_score = pe__add_scores(weight, node->weight); if (only_positive && (new_score < 0) && (node->weight > 0)) { crm_trace("%s: Filtering %d + %f * %d = %d " "(negative disallowed, marking node unusable)", node->details->uname, node->weight, factor, score, new_score); node->weight = INFINITY_HACK; continue; } if (only_positive && (new_score < 0) && (node->weight == 0)) { crm_trace("%s: Filtering %d + %f * %d = %d (negative disallowed)", node->details->uname, node->weight, factor, score, new_score); continue; } crm_trace("%s: %d + %f * %d = %d", node->details->uname, node->weight, factor, score, new_score); node->weight = new_score; } } static inline bool is_nonempty_group(pe_resource_t *rsc) { return rsc && (rsc->variant == pe_group) && (rsc->children != NULL); } /*! * \internal * \brief Incorporate colocation constraint scores into node weights * * \param[in,out] rsc Resource being placed * \param[in] rhs ID of 'with' resource * \param[in,out] nodes Nodes, with scores as of this point * \param[in] attr Colocation attribute (ID by default) * \param[in] factor Incorporate scores multiplied by this factor * \param[in] flags Bitmask of enum pe_weights values * * \return Nodes, with scores modified by this constraint * \note This function assumes ownership of the nodes argument. The caller * should free the returned copy rather than the original. */ GHashTable * pcmk__native_merge_weights(pe_resource_t *rsc, const char *rhs, GHashTable *nodes, const char *attr, float factor, uint32_t flags) { GHashTable *work = NULL; // Avoid infinite recursion if (is_set(rsc->flags, pe_rsc_merging)) { pe_rsc_info(rsc, "%s: Breaking dependency loop at %s", rhs, rsc->id); return nodes; } set_bit(rsc->flags, pe_rsc_merging); if (is_set(flags, pe_weights_init)) { if (is_nonempty_group(rsc)) { GList *last = g_list_last(rsc->children); pe_resource_t *last_rsc = last->data; pe_rsc_trace(rsc, "%s: Merging scores from group %s " "using last member %s (at %.6f)", rhs, rsc->id, last_rsc->id, factor); work = pcmk__native_merge_weights(last_rsc, rhs, NULL, attr, factor, flags); } else { work = pcmk__copy_node_table(rsc->allowed_nodes); } clear_bit(flags, pe_weights_init); } else if (is_nonempty_group(rsc)) { /* The first member of the group will recursively incorporate any * constraints involving other members (including the group internal * colocation). * * @TODO The indirect colocations from the dependent group's other * members will be incorporated at full strength rather than by * factor, so the group's combined stickiness will be treated as * (factor + (#members - 1)) * stickiness. It is questionable what * the right approach should be. */ pe_rsc_trace(rsc, "%s: Merging scores from first member of group %s " "(at %.6f)", rhs, rsc->id, factor); work = pcmk__copy_node_table(nodes); work = pcmk__native_merge_weights(rsc->children->data, rhs, work, attr, factor, flags); } else { pe_rsc_trace(rsc, "%s: Merging scores from %s (at %.6f)", rhs, rsc->id, factor); work = pcmk__copy_node_table(nodes); add_node_scores_matching_attr(work, rsc, attr, factor, is_set(flags, pe_weights_positive)); } if (can_run_any(work)) { GListPtr gIter = NULL; int multiplier = (factor < 0)? -1 : 1; if (is_set(flags, pe_weights_forward)) { gIter = rsc->rsc_cons; pe_rsc_trace(rsc, "Checking additional %d optional '%s with' constraints", g_list_length(gIter), rsc->id); } else if (is_nonempty_group(rsc)) { pe_resource_t *last_rsc = g_list_last(rsc->children)->data; gIter = last_rsc->rsc_cons_lhs; pe_rsc_trace(rsc, "Checking additional %d optional 'with group %s' " "constraints using last member %s", g_list_length(gIter), rsc->id, last_rsc->id); } else { gIter = rsc->rsc_cons_lhs; pe_rsc_trace(rsc, "Checking additional %d optional 'with %s' constraints", g_list_length(gIter), rsc->id); } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *other = NULL; rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score == 0) { continue; } if (is_set(flags, pe_weights_forward)) { other = constraint->rsc_rh; } else { other = constraint->rsc_lh; } pe_rsc_trace(rsc, "Optionally merging score of '%s' constraint (%s with %s)", constraint->id, constraint->rsc_lh->id, constraint->rsc_rh->id); work = pcmk__native_merge_weights(other, rhs, work, constraint->node_attribute, multiplier * constraint->score / (float) INFINITY, flags|pe_weights_rollback); pe__show_node_weights(true, NULL, rhs, work); } } else if (is_set(flags, pe_weights_rollback)) { pe_rsc_info(rsc, "%s: Rolling back optional scores from %s", rhs, rsc->id); g_hash_table_destroy(work); clear_bit(rsc->flags, pe_rsc_merging); return nodes; } if (is_set(flags, pe_weights_positive)) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->weight == INFINITY_HACK) { node->weight = 1; } } } if (nodes) { g_hash_table_destroy(nodes); } clear_bit(rsc->flags, pe_rsc_merging); return work; } static inline bool node_has_been_unfenced(pe_node_t *node) { const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED); return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches); } static inline bool is_unfence_device(pe_resource_t *rsc, pe_working_set_t *data_set) { return is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing); } pe_node_t * pcmk__native_allocate(pe_resource_t *rsc, pe_node_t *prefer, pe_working_set_t *data_set) { GListPtr gIter = NULL; if (rsc->parent && is_not_set(rsc->parent->flags, pe_rsc_allocating)) { /* never allocate children on their own */ pe_rsc_debug(rsc, "Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->allocate(rsc->parent, prefer, data_set); } if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if (is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); pe__show_node_weights(true, rsc, "Pre-alloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; GHashTable *archive = NULL; pe_resource_t *rsc_rh = constraint->rsc_rh; if (constraint->score == 0) { continue; } if (constraint->role_lh >= RSC_ROLE_MASTER || (constraint->score < 0 && constraint->score > -INFINITY)) { archive = pcmk__copy_node_table(rsc->allowed_nodes); } pe_rsc_trace(rsc, "%s: Allocating %s first (constraint=%s score=%d role=%s)", rsc->id, rsc_rh->id, constraint->id, constraint->score, role2text(constraint->role_lh)); rsc_rh->cmds->allocate(rsc_rh, NULL, data_set); rsc->cmds->rsc_colocation_lh(rsc, rsc_rh, constraint, data_set); if (archive && can_run_any(rsc->allowed_nodes) == FALSE) { pe_rsc_info(rsc, "%s: Rolling back scores from %s", rsc->id, rsc_rh->id); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = archive; archive = NULL; } if (archive) { g_hash_table_destroy(archive); } } pe__show_node_weights(true, rsc, "Post-coloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score == 0) { continue; } pe_rsc_trace(rsc, "Merging score of '%s' constraint (%s with %s)", constraint->id, constraint->rsc_lh->id, constraint->rsc_rh->id); rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, (float)constraint->score / INFINITY, pe_weights_rollback); } if (rsc->next_role == RSC_ROLE_STOPPED) { pe_rsc_trace(rsc, "Making sure %s doesn't get allocated", rsc->id); /* make sure it doesn't come up again */ resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set); } else if(rsc->next_role > rsc->role && is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); rsc->next_role = rsc->role; } pe__show_node_weights(!show_scores, rsc, __FUNCTION__, rsc->allowed_nodes); if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } if (is_not_set(rsc->flags, pe_rsc_managed)) { const char *reason = NULL; pe_node_t *assign_to = NULL; rsc->next_role = rsc->role; assign_to = pe__current_node(rsc); if (assign_to == NULL) { reason = "inactive"; } else if (rsc->role == RSC_ROLE_MASTER) { reason = "master"; } else if (is_set(rsc->flags, pe_rsc_failed)) { reason = "failed"; } else { reason = "active"; } pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id, (assign_to? assign_to->details->uname : "no node"), reason); native_assign_node(rsc, NULL, assign_to, TRUE); } else if (is_set(data_set->flags, pe_flag_stop_everything)) { pe_rsc_debug(rsc, "Forcing %s to stop", rsc->id); native_assign_node(rsc, NULL, NULL, TRUE); } else if (is_set(rsc->flags, pe_rsc_provisional) && native_choose_node(rsc, prefer, data_set)) { pe_rsc_trace(rsc, "Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if (rsc->allocated_to == NULL) { if (is_not_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); } else if (rsc->running_on != NULL) { pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id); } } else { pe_rsc_debug(rsc, "Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } clear_bit(rsc->flags, pe_rsc_allocating); if (rsc->is_remote_node) { pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); CRM_ASSERT(remote_node != NULL); if (rsc->allocated_to && rsc->next_role != RSC_ROLE_STOPPED) { crm_trace("Setting Pacemaker Remote node %s to ONLINE", remote_node->details->id); remote_node->details->online = TRUE; /* We shouldn't consider an unseen remote-node unclean if we are going * to try and connect to it. Otherwise we get an unnecessary fence */ if (remote_node->details->unseen == TRUE) { remote_node->details->unclean = FALSE; } } else { crm_trace("Setting Pacemaker Remote node %s to SHUTDOWN (next role %s, %sallocated)", remote_node->details->id, role2text(rsc->next_role), (rsc->allocated_to? "" : "un")); remote_node->details->shutdown = TRUE; } } return rsc->allocated_to; } static gboolean is_op_dup(pe_resource_t *rsc, const char *name, guint interval_ms) { gboolean dup = FALSE; const char *id = NULL; const char *value = NULL; xmlNode *operation = NULL; guint interval2_ms = 0; CRM_ASSERT(rsc); for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { value = crm_element_value(operation, "name"); if (!pcmk__str_eq(value, name, pcmk__str_casei)) { continue; } value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval2_ms = crm_parse_interval_spec(value); if (interval_ms != interval2_ms) { continue; } if (id == NULL) { id = ID(operation); } else { pcmk__config_err("Operation %s is duplicate of %s (do not use " "same name and interval combination more " "than once per resource)", ID(operation), id); dup = TRUE; } } } return dup; } static bool op_cannot_recur(const char *name) { return pcmk__strcase_any_of(name, RSC_STOP, RSC_START, RSC_DEMOTE, RSC_PROMOTE, NULL); } static void RecurringOp(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval_spec = NULL; const char *node_uname = node? node->details->uname : "n/a"; guint interval_ms = 0; pe_action_t *mon = NULL; gboolean is_optional = TRUE; GListPtr possible_matches = NULL; CRM_ASSERT(rsc); /* Only process for the operations without role="Stopped" */ role = crm_element_value(operation, "role"); if (role && text2role(role) == RSC_ROLE_STOPPED) { return; } interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_parse_interval_spec(interval_spec); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval_ms)) { crm_trace("Not creating duplicate recurring action %s for %dms %s", ID(operation), interval_ms, name); return; } if (op_cannot_recur(name)) { pcmk__config_err("Ignoring %s because action '%s' cannot be recurring", ID(operation), name); return; } key = pcmk__op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { crm_trace("Not creating recurring action %s for disabled resource %s", ID(operation), rsc->id); free(key); return; } pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on %s", ID(operation), rsc->id, role2text(rsc->next_role), node_uname); if (start != NULL) { pe_rsc_trace(rsc, "Marking %s %s due to %s", key, is_set(start->flags, pe_action_optional) ? "optional" : "mandatory", start->uuid); is_optional = (rsc->cmds->action_flags(start, NULL) & pe_action_optional); } else { pe_rsc_trace(rsc, "Marking %s optional", key); is_optional = TRUE; } /* start a monitor for an already active resource */ possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches == NULL) { is_optional = FALSE; pe_rsc_trace(rsc, "Marking %s mandatory: not active", key); } else { GListPtr gIter = NULL; for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) { pe_action_t *op = (pe_action_t *) gIter->data; if (is_set(op->flags, pe_action_reschedule)) { is_optional = FALSE; break; } } g_list_free(possible_matches); } if ((rsc->next_role == RSC_ROLE_MASTER && role == NULL) || (role != NULL && text2role(role) != rsc->next_role)) { int log_level = LOG_TRACE; const char *result = "Ignoring"; if (is_optional) { char *after_key = NULL; pe_action_t *cancel_op = NULL; // It's running, so cancel it log_level = LOG_INFO; result = "Cancelling"; cancel_op = pe_cancel_op(rsc, name, interval_ms, node, data_set); switch (rsc->role) { case RSC_ROLE_SLAVE: case RSC_ROLE_STARTED: if (rsc->next_role == RSC_ROLE_MASTER) { after_key = promote_key(rsc); } else if (rsc->next_role == RSC_ROLE_STOPPED) { after_key = stop_key(rsc); } break; case RSC_ROLE_MASTER: after_key = demote_key(rsc); break; default: break; } if (after_key) { custom_action_order(rsc, NULL, cancel_op, rsc, after_key, NULL, pe_order_runnable_left, data_set); } } do_crm_log(log_level, "%s action %s (%s vs. %s)", result, key, role ? role : role2text(RSC_ROLE_SLAVE), role2text(rsc->next_role)); free(key); return; } mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set); key = mon->uuid; if (is_optional) { pe_rsc_trace(rsc, "%s\t %s (optional)", node_uname, mon->uuid); } if (start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) { pe_rsc_debug(rsc, "%s\t %s (cancelled : start un-runnable)", node_uname, mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__); } else if (node == NULL || node->details->online == FALSE || node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", node_uname, mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__); } else if (is_set(mon->flags, pe_action_optional) == FALSE) { pe_rsc_info(rsc, " Start recurring %s (%us) for %s on %s", mon->task, interval_ms / 1000, rsc->id, node_uname); } if (rsc->next_role == RSC_ROLE_MASTER) { char *running_master = crm_itoa(PCMK_OCF_RUNNING_MASTER); add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master); free(running_master); } if (node == NULL || is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, start_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then | pe_order_runnable_left, data_set); custom_action_order(rsc, reload_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then | pe_order_runnable_left, data_set); if (rsc->next_role == RSC_ROLE_MASTER) { custom_action_order(rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } else if (rsc->role == RSC_ROLE_MASTER) { custom_action_order(rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } } } static void Recurring(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, pe_working_set_t * data_set) { if (is_not_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { RecurringOp(rsc, start, node, operation, data_set); } } } } static void RecurringOp_Stopped(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval_spec = NULL; const char *node_uname = node? node->details->uname : "n/a"; guint interval_ms = 0; GListPtr possible_matches = NULL; GListPtr gIter = NULL; /* Only process for the operations with role="Stopped" */ role = crm_element_value(operation, "role"); if (role == NULL || text2role(role) != RSC_ROLE_STOPPED) { return; } interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_parse_interval_spec(interval_spec); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval_ms)) { crm_trace("Not creating duplicate recurring action %s for %dms %s", ID(operation), interval_ms, name); return; } if (op_cannot_recur(name)) { pcmk__config_err("Ignoring %s because action '%s' cannot be recurring", ID(operation), name); return; } key = pcmk__op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { crm_trace("Not creating recurring action %s for disabled resource %s", ID(operation), rsc->id); free(key); return; } // @TODO add support if (is_set(rsc->flags, pe_rsc_unique) == FALSE) { crm_notice("Ignoring %s (recurring monitors for Stopped role are " "not supported for anonymous clones)", ID(operation)); return; } pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on nodes where it should not be running", ID(operation), rsc->id, role2text(rsc->next_role)); /* if the monitor exists on the node where the resource will be running, cancel it */ if (node != NULL) { possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches) { pe_action_t *cancel_op = NULL; g_list_free(possible_matches); cancel_op = pe_cancel_op(rsc, name, interval_ms, node, data_set); if (rsc->next_role == RSC_ROLE_STARTED || rsc->next_role == RSC_ROLE_SLAVE) { /* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */ /* rsc->role == RSC_ROLE_STARTED: for a migration, cancel the monitor on the target node before start */ custom_action_order(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL, pe_order_runnable_left, data_set); } pe_rsc_info(rsc, "Cancel action %s (%s vs. %s) on %s", key, role, role2text(rsc->next_role), node_uname); } } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *stop_node = (pe_node_t *) gIter->data; const char *stop_node_uname = stop_node->details->uname; gboolean is_optional = TRUE; gboolean probe_is_optional = TRUE; gboolean stop_is_optional = TRUE; pe_action_t *stopped_mon = NULL; char *rc_inactive = NULL; GListPtr probe_complete_ops = NULL; GListPtr stop_ops = NULL; GListPtr local_gIter = NULL; if (node && pcmk__str_eq(stop_node_uname, node_uname, pcmk__str_casei)) { continue; } pe_rsc_trace(rsc, "Creating recurring action %s for %s on %s", ID(operation), rsc->id, crm_str(stop_node_uname)); /* start a monitor for an already stopped resource */ possible_matches = find_actions_exact(rsc->actions, key, stop_node); if (possible_matches == NULL) { pe_rsc_trace(rsc, "Marking %s mandatory on %s: not active", key, crm_str(stop_node_uname)); is_optional = FALSE; } else { pe_rsc_trace(rsc, "Marking %s optional on %s: already active", key, crm_str(stop_node_uname)); is_optional = TRUE; g_list_free(possible_matches); } stopped_mon = custom_action(rsc, strdup(key), name, stop_node, is_optional, TRUE, data_set); rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); add_hash_param(stopped_mon->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); free(rc_inactive); if (is_set(rsc->flags, pe_rsc_managed)) { GList *probes = pe__resource_actions(rsc, stop_node, RSC_STATUS, FALSE); GListPtr pIter = NULL; for (pIter = probes; pIter != NULL; pIter = pIter->next) { pe_action_t *probe = (pe_action_t *) pIter->data; order_actions(probe, stopped_mon, pe_order_runnable_left); crm_trace("%s then %s on %s", probe->uuid, stopped_mon->uuid, stop_node->details->uname); } g_list_free(probes); } if (probe_complete_ops) { g_list_free(probe_complete_ops); } stop_ops = pe__resource_actions(rsc, stop_node, RSC_STOP, TRUE); for (local_gIter = stop_ops; local_gIter != NULL; local_gIter = local_gIter->next) { pe_action_t *stop = (pe_action_t *) local_gIter->data; if (is_set(stop->flags, pe_action_optional) == FALSE) { stop_is_optional = FALSE; } if (is_set(stop->flags, pe_action_runnable) == FALSE) { crm_debug("%s\t %s (cancelled : stop un-runnable)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__); } if (is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, stop_key(rsc), stop, NULL, strdup(key), stopped_mon, pe_order_implies_then | pe_order_runnable_left, data_set); } } if (stop_ops) { g_list_free(stop_ops); } if (is_optional == FALSE && probe_is_optional && stop_is_optional && is_set(rsc->flags, pe_rsc_managed) == FALSE) { pe_rsc_trace(rsc, "Marking %s optional on %s due to unmanaged", key, crm_str(stop_node_uname)); update_action_flags(stopped_mon, pe_action_optional, __FUNCTION__, __LINE__); } if (is_set(stopped_mon->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s\t %s (optional)", crm_str(stop_node_uname), stopped_mon->uuid); } if (stop_node->details->online == FALSE || stop_node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__); } if (is_set(stopped_mon->flags, pe_action_runnable) && is_set(stopped_mon->flags, pe_action_optional) == FALSE) { crm_notice(" Start recurring %s (%us) for %s on %s", stopped_mon->task, interval_ms / 1000, rsc->id, crm_str(stop_node_uname)); } } free(key); } static void Recurring_Stopped(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, pe_working_set_t * data_set) { if (is_not_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { RecurringOp_Stopped(rsc, start, node, operation, data_set); } } } } static void handle_migration_actions(pe_resource_t * rsc, pe_node_t *current, pe_node_t *chosen, pe_working_set_t * data_set) { pe_action_t *migrate_to = NULL; pe_action_t *migrate_from = NULL; pe_action_t *start = NULL; pe_action_t *stop = NULL; gboolean partial = rsc->partial_migration_target ? TRUE : FALSE; pe_rsc_trace(rsc, "Processing migration actions %s moving from %s to %s . partial migration = %s", rsc->id, current->details->id, chosen->details->id, partial ? "TRUE" : "FALSE"); start = start_action(rsc, chosen, TRUE); stop = stop_action(rsc, current, TRUE); if (partial == FALSE) { migrate_to = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), RSC_MIGRATE, current, TRUE, TRUE, data_set); } migrate_from = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), RSC_MIGRATED, chosen, TRUE, TRUE, data_set); if ((migrate_to && migrate_from) || (migrate_from && partial)) { - set_bit(start->flags, pe_action_migrate_runnable); - set_bit(stop->flags, pe_action_migrate_runnable); + pe__set_action_flags(start, pe_action_migrate_runnable); + pe__set_action_flags(stop, pe_action_migrate_runnable); update_action_flags(start, pe_action_pseudo, __FUNCTION__, __LINE__); /* easier than trying to delete it from the graph */ /* order probes before migrations */ if (partial) { - set_bit(migrate_from->flags, pe_action_migrate_runnable); + pe__set_action_flags(migrate_from, pe_action_migrate_runnable); migrate_from->needs = start->needs; custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional, data_set); } else { - set_bit(migrate_from->flags, pe_action_migrate_runnable); - set_bit(migrate_to->flags, pe_action_migrate_runnable); + pe__set_action_flags(migrate_from, pe_action_migrate_runnable); + pe__set_action_flags(migrate_to, pe_action_migrate_runnable); migrate_to->needs = start->needs; custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_optional, data_set); custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional|pe_order_implies_first_migratable, data_set); } custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_optional|pe_order_implies_first_migratable, data_set); custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional|pe_order_implies_first_migratable|pe_order_pseudo_left, data_set); } if (migrate_to) { add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); /* Pacemaker Remote connections don't require pending to be recorded in * the CIB. We can reduce CIB writes by not setting PENDING for them. */ if (rsc->is_remote_node == FALSE) { /* migrate_to takes place on the source node, but can * have an effect on the target node depending on how * the agent is written. Because of this, we have to maintain * a record that the migrate_to occurred, in case the source node * loses membership while the migrate_to action is still in-flight. */ add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true"); } } if (migrate_from) { add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); } } void native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set) { pe_action_t *start = NULL; pe_node_t *chosen = NULL; pe_node_t *current = NULL; gboolean need_stop = FALSE; bool need_promote = FALSE; gboolean is_moving = FALSE; gboolean allow_migrate = is_set(rsc->flags, pe_rsc_allow_migrate) ? TRUE : FALSE; GListPtr gIter = NULL; unsigned int num_all_active = 0; unsigned int num_clean_active = 0; bool multiply_active = FALSE; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; CRM_ASSERT(rsc); chosen = rsc->allocated_to; if (chosen != NULL && rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STARTED; pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role)); } else if (rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STOPPED; pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role)); } pe_rsc_trace(rsc, "Processing state transition for %s %p: %s->%s", rsc->id, rsc, role2text(rsc->role), role2text(rsc->next_role)); current = pe__find_active_on(rsc, &num_all_active, &num_clean_active); for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) { pe_node_t *dangling_source = (pe_node_t *) gIter->data; pe_action_t *stop = stop_action(rsc, dangling_source, FALSE); - set_bit(stop->flags, pe_action_dangle); + pe__set_action_flags(stop, pe_action_dangle); pe_rsc_trace(rsc, "Forcing a cleanup of %s on %s", rsc->id, dangling_source->details->uname); if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, dangling_source, FALSE, data_set); } } if ((num_all_active == 2) && (num_clean_active == 2) && chosen && rsc->partial_migration_source && rsc->partial_migration_target && (current->details == rsc->partial_migration_source->details) && (chosen->details == rsc->partial_migration_target->details)) { /* The chosen node is still the migration target from a partial * migration. Attempt to continue the migration instead of recovering * by stopping the resource everywhere and starting it on a single node. */ pe_rsc_trace(rsc, "Will attempt to continue with a partial migration to target %s from %s", rsc->partial_migration_target->details->id, rsc->partial_migration_source->details->id); } else if (is_not_set(rsc->flags, pe_rsc_needs_fencing)) { /* If a resource has "requires" set to nothing or quorum, don't consider * it active on unclean nodes (similar to how all resources behave when * stonith-enabled is false). We can start such resources elsewhere * before fencing completes, and if we considered the resource active on * the failed node, we would attempt recovery for being active on * multiple nodes. */ multiply_active = (num_clean_active > 1); } else { multiply_active = (num_all_active > 1); } if (multiply_active) { if (rsc->partial_migration_target && rsc->partial_migration_source) { // Migration was in progress, but we've chosen a different target crm_notice("Resource %s can no longer migrate to %s. Stopping on %s too", rsc->id, rsc->partial_migration_target->details->uname, rsc->partial_migration_source->details->uname); } else { // Resource was incorrectly multiply active pe_proc_err("Resource %s is active on %u nodes (%s)", rsc->id, num_all_active, recovery2text(rsc->recovery_type)); crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ#Resource_is_Too_Active for more information"); } if (rsc->recovery_type == recovery_stop_start) { need_stop = TRUE; } /* If by chance a partial migration is in process, but the migration * target is not chosen still, clear all partial migration data. */ rsc->partial_migration_source = rsc->partial_migration_target = NULL; allow_migrate = FALSE; } if (is_set(rsc->flags, pe_rsc_start_pending)) { start = start_action(rsc, chosen, TRUE); - set_bit(start->flags, pe_action_print_always); + pe__set_action_flags(start, pe_action_print_always); } if (current && chosen && current->details != chosen->details) { pe_rsc_trace(rsc, "Moving %s", rsc->id); is_moving = TRUE; need_stop = TRUE; } else if (is_set(rsc->flags, pe_rsc_failed)) { if (is_set(rsc->flags, pe_rsc_stop)) { need_stop = TRUE; pe_rsc_trace(rsc, "Recovering %s", rsc->id); } else { pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id); if (rsc->next_role == RSC_ROLE_MASTER) { need_promote = TRUE; } } } else if (is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Block %s", rsc->id); need_stop = TRUE; } else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) { /* Recovery of a promoted resource */ start = start_action(rsc, chosen, TRUE); if (is_set(start->flags, pe_action_optional) == FALSE) { pe_rsc_trace(rsc, "Forced start %s", rsc->id); need_stop = TRUE; } } pe_rsc_trace(rsc, "Creating actions for %s: %s->%s", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); /* Create any additional actions required when bringing resource down and * back up to same level. */ role = rsc->role; while (role != RSC_ROLE_STOPPED) { next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED]; pe_rsc_trace(rsc, "Down: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), rsc->id, need_stop ? " required" : ""); if (rsc_action_matrix[role][next_role] (rsc, current, !need_stop, data_set) == FALSE) { break; } role = next_role; } while (rsc->role <= rsc->next_role && role != rsc->role && is_not_set(rsc->flags, pe_rsc_block)) { bool required = need_stop; next_role = rsc_state_matrix[role][rsc->role]; if ((next_role == RSC_ROLE_MASTER) && need_promote) { required = true; } pe_rsc_trace(rsc, "Up: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), rsc->id, (required? " required" : "")); if (rsc_action_matrix[role][next_role](rsc, chosen, !required, data_set) == FALSE) { break; } role = next_role; } role = rsc->role; /* Required steps from this role to the next */ while (role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; pe_rsc_trace(rsc, "Role: Executing: %s->%s = (%s on %s)", role2text(role), role2text(next_role), rsc->id, chosen?chosen->details->uname:"NA"); if (rsc_action_matrix[role][next_role] (rsc, chosen, FALSE, data_set) == FALSE) { break; } role = next_role; } if(is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "No monitor additional ops for blocked resource"); } else if (rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) { pe_rsc_trace(rsc, "Monitor ops for active resource"); start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, data_set); Recurring_Stopped(rsc, start, chosen, data_set); } else { pe_rsc_trace(rsc, "Monitor ops for inactive resource"); Recurring_Stopped(rsc, NULL, NULL, data_set); } /* if we are stuck in a partial migration, where the target * of the partial migration no longer matches the chosen target. * A full stop/start is required */ if (rsc->partial_migration_target && (chosen == NULL || rsc->partial_migration_target->details != chosen->details)) { pe_rsc_trace(rsc, "Not allowing partial migration to continue. %s", rsc->id); allow_migrate = FALSE; } else if (is_moving == FALSE || is_not_set(rsc->flags, pe_rsc_managed) || is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending) || (current && current->details->unclean) || rsc->next_role < RSC_ROLE_STARTED) { allow_migrate = FALSE; } if (allow_migrate) { handle_migration_actions(rsc, current, chosen, data_set); } } static void rsc_avoids_remote_nodes(pe_resource_t *rsc) { GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->details->remote_rsc) { node->weight = -INFINITY; } } } /*! * \internal * \brief Return allowed nodes as (possibly sorted) list * * Convert a resource's hash table of allowed nodes to a list. If printing to * stdout, sort the list, to keep action ID numbers consistent for regression * test output (while avoiding the performance hit on a live cluster). * * \param[in] rsc Resource to check for allowed nodes * \param[in] data_set Cluster working set * * \return List of resource's allowed nodes * \note Callers should take care not to rely on the list being sorted. */ static GList * allowed_nodes_as_list(pe_resource_t *rsc, pe_working_set_t *data_set) { GList *allowed_nodes = NULL; if (rsc->allowed_nodes) { allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes); } if (is_set(data_set->flags, pe_flag_stdout)) { allowed_nodes = g_list_sort(allowed_nodes, sort_node_uname); } return allowed_nodes; } void native_internal_constraints(pe_resource_t * rsc, pe_working_set_t * data_set) { /* This function is on the critical path and worth optimizing as much as possible */ pe_resource_t *top = NULL; GList *allowed_nodes = NULL; bool check_unfencing = FALSE; bool check_utilization = FALSE; if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping native constraints for unmanaged resource: %s", rsc->id); return; } top = uber_parent(rsc); // Whether resource requires unfencing check_unfencing = is_not_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing) && is_set(rsc->flags, pe_rsc_needs_unfencing); // Whether a non-default placement strategy is used check_utilization = (g_hash_table_size(rsc->utilization) > 0) && !pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei); // Order stops before starts (i.e. restart) custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional|pe_order_implies_then|pe_order_restart, data_set); // Promotable ordering: demote before stop, start before promote if (is_set(top->flags, pe_rsc_promotable) || (rsc->role > RSC_ROLE_SLAVE)) { custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_DEMOTE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_implies_first_master, data_set); custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_PROMOTE, 0), NULL, pe_order_runnable_left, data_set); } // Don't clear resource history if probing on same node custom_action_order(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, pe_order_same_node|pe_order_then_cancels_first, data_set); // Certain checks need allowed nodes if (check_unfencing || check_utilization || rsc->container) { allowed_nodes = allowed_nodes_as_list(rsc, data_set); } if (check_unfencing) { /* Check if the node needs to be unfenced first */ for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *node = item->data; pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, data_set); crm_debug("Ordering any stops of %s before %s, and any starts after", rsc->id, unfence->uuid); /* * It would be more efficient to order clone resources once, * rather than order each instance, but ordering the instance * allows us to avoid unnecessary dependencies that might conflict * with user constraints. * * @TODO: This constraint can still produce a transition loop if the * resource has a stop scheduled on the node being unfenced, and * there is a user ordering constraint to start some other resource * (which will be ordered after the unfence) before stopping this * resource. An example is "start some slow-starting cloned service * before stopping an associated virtual IP that may be moving to * it": * stop this -> unfencing -> start that -> stop this */ custom_action_order(rsc, stop_key(rsc), NULL, NULL, strdup(unfence->uuid), unfence, pe_order_optional|pe_order_same_node, data_set); custom_action_order(NULL, strdup(unfence->uuid), unfence, rsc, start_key(rsc), NULL, pe_order_implies_then_on_node|pe_order_same_node, data_set); } } if (check_utilization) { GListPtr gIter = NULL; pe_rsc_trace(rsc, "Creating utilization constraints for %s - strategy: %s", rsc->id, data_set->placement_strategy); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *current = (pe_node_t *) gIter->data; char *load_stopped_task = crm_strdup_printf(LOAD_STOPPED "_%s", current->details->uname); pe_action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = pe__copy_node(current); update_action_flags(load_stopped, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__); } custom_action_order(rsc, stop_key(rsc), NULL, NULL, load_stopped_task, load_stopped, pe_order_load, data_set); } for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *next = item->data; char *load_stopped_task = crm_strdup_printf(LOAD_STOPPED "_%s", next->details->uname); pe_action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = pe__copy_node(next); update_action_flags(load_stopped, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__); } custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, start_key(rsc), NULL, pe_order_load, data_set); custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_load, data_set); free(load_stopped_task); } } if (rsc->container) { pe_resource_t *remote_rsc = NULL; if (rsc->is_remote_node) { // rsc is the implicit remote connection for a guest or bundle node /* Do not allow a guest resource to live on a Pacemaker Remote node, * to avoid nesting remotes. However, allow bundles to run on remote * nodes. */ if (is_not_set(rsc->flags, pe_rsc_allow_remote_remotes)) { rsc_avoids_remote_nodes(rsc->container); } /* If someone cleans up a guest or bundle node's container, we will * likely schedule a (re-)probe of the container and recovery of the * connection. Order the connection stop after the container probe, * so that if we detect the container running, we will trigger a new * transition and avoid the unnecessary recovery. */ new_rsc_order(rsc->container, RSC_STATUS, rsc, RSC_STOP, pe_order_optional, data_set); /* A user can specify that a resource must start on a Pacemaker Remote * node by explicitly configuring it with the container=NODENAME * meta-attribute. This is of questionable merit, since location * constraints can accomplish the same thing. But we support it, so here * we check whether a resource (that is not itself a remote connection) * has container set to a remote node or guest node resource. */ } else if (rsc->container->is_remote_node) { remote_rsc = rsc->container; } else { remote_rsc = pe__resource_contains_guest_node(data_set, rsc->container); } if (remote_rsc) { /* Force the resource on the Pacemaker Remote node instead of * colocating the resource with the container resource. */ for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *node = item->data; if (node->details->remote_rsc != remote_rsc) { node->weight = -INFINITY; } } } else { /* This resource is either a filler for a container that does NOT * represent a Pacemaker Remote node, or a Pacemaker Remote * connection resource for a guest node or bundle. */ int score; crm_trace("Order and colocate %s relative to its container %s", rsc->id, rsc->container->id); custom_action_order(rsc->container, pcmk__op_key(rsc->container->id, RSC_START, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_implies_then|pe_order_runnable_left, data_set); custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, rsc->container, pcmk__op_key(rsc->container->id, RSC_STOP, 0), NULL, pe_order_implies_first, data_set); if (is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { score = 10000; /* Highly preferred but not essential */ } else { score = INFINITY; /* Force them to run on the same host */ } rsc_colocation_new("resource-with-container", NULL, score, rsc, rsc->container, NULL, NULL, data_set); } } if (rsc->is_remote_node || is_set(rsc->flags, pe_rsc_fence_device)) { /* don't allow remote nodes to run stonith devices * or remote connection resources.*/ rsc_avoids_remote_nodes(rsc); } g_list_free(allowed_nodes); } void native_rsc_colocation_lh(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, rsc_colocation_t *constraint, pe_working_set_t *data_set) { if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if (constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } if (constraint->score == 0) { return; } pe_rsc_trace(rsc_lh, "Processing colocation constraint between %s and %s", rsc_lh->id, rsc_rh->id); rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint, data_set); } enum filter_colocation_res filter_colocation_constraint(pe_resource_t * rsc_lh, pe_resource_t * rsc_rh, rsc_colocation_t * constraint, gboolean preview) { if (constraint->score == 0) { return influence_nothing; } /* rh side must be allocated before we can process constraint */ if (preview == FALSE && is_set(rsc_rh->flags, pe_rsc_provisional)) { return influence_nothing; } if ((constraint->role_lh >= RSC_ROLE_SLAVE) && rsc_lh->parent && is_set(rsc_lh->parent->flags, pe_rsc_promotable) && is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* LH and RH resources have already been allocated, place the correct * priority on LH rsc for the given promotable clone resource role */ return influence_rsc_priority; } if (preview == FALSE && is_not_set(rsc_lh->flags, pe_rsc_provisional)) { // Log an error if we violated a mandatory colocation constraint const pe_node_t *rh_node = rsc_rh->allocated_to; if (rsc_lh->allocated_to == NULL) { // Dependent resource isn't allocated, so constraint doesn't matter return influence_nothing; } if (constraint->score >= INFINITY) { // Dependent resource must colocate with rh_node if ((rh_node == NULL) || (rh_node->details != rsc_lh->allocated_to->details)) { crm_err("%s must be colocated with %s but is not (%s vs. %s)", rsc_lh->id, rsc_rh->id, rsc_lh->allocated_to->details->uname, (rh_node? rh_node->details->uname : "unallocated")); } } else if (constraint->score <= -INFINITY) { // Dependent resource must anti-colocate with rh_node if ((rh_node != NULL) && (rsc_lh->allocated_to->details == rh_node->details)) { crm_err("%s and %s must be anti-colocated but are allocated " "to the same node (%s)", rsc_lh->id, rsc_rh->id, rh_node->details->uname); } } return influence_nothing; } if (constraint->score > 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) { crm_trace("LH: Skipping constraint: \"%s\" state filter nextrole is %s", role2text(constraint->role_lh), role2text(rsc_lh->next_role)); return influence_nothing; } if (constraint->score > 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) { crm_trace("RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return influence_nothing; } if (constraint->score < 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh == rsc_lh->next_role) { crm_trace("LH: Skipping negative constraint: \"%s\" state filter", role2text(constraint->role_lh)); return influence_nothing; } if (constraint->score < 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh == rsc_rh->next_role) { crm_trace("RH: Skipping negative constraint: \"%s\" state filter", role2text(constraint->role_rh)); return influence_nothing; } return influence_rsc_location; } static void influence_priority(pe_resource_t * rsc_lh, pe_resource_t * rsc_rh, rsc_colocation_t * constraint) { const char *rh_value = NULL; const char *lh_value = NULL; const char *attribute = CRM_ATTR_ID; int score_multiplier = 1; if (constraint->score == 0) { return; } if (!rsc_rh->allocated_to || !rsc_lh->allocated_to) { return; } if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } lh_value = pe_node_attribute_raw(rsc_lh->allocated_to, attribute); rh_value = pe_node_attribute_raw(rsc_rh->allocated_to, attribute); if (!pcmk__str_eq(lh_value, rh_value, pcmk__str_casei)) { if(constraint->score == INFINITY && constraint->role_lh == RSC_ROLE_MASTER) { rsc_lh->priority = -INFINITY; } return; } if (constraint->role_rh && (constraint->role_rh != rsc_rh->next_role)) { return; } if (constraint->role_lh == RSC_ROLE_SLAVE) { score_multiplier = -1; } rsc_lh->priority = pe__add_scores(score_multiplier * constraint->score, rsc_lh->priority); } static void colocation_match(pe_resource_t * rsc_lh, pe_resource_t * rsc_rh, rsc_colocation_t * constraint) { const char *attribute = CRM_ATTR_ID; const char *value = NULL; GHashTable *work = NULL; GHashTableIter iter; pe_node_t *node = NULL; if (constraint->score == 0) { return; } if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if (rsc_rh->allocated_to) { value = pe_node_attribute_raw(rsc_rh->allocated_to, attribute); } else if (constraint->score < 0) { // Nothing to do (anti-colocation with something that is not running) return; } work = pcmk__copy_node_table(rsc_lh->allowed_nodes); g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (rsc_rh->allocated_to == NULL) { pe_rsc_trace(rsc_lh, "%s: %s@%s -= %d (%s inactive)", constraint->id, rsc_lh->id, node->details->uname, constraint->score, rsc_rh->id); node->weight = pe__add_scores(-constraint->score, node->weight); } else if (pcmk__str_eq(pe_node_attribute_raw(node, attribute), value, pcmk__str_casei)) { if (constraint->score < CRM_SCORE_INFINITY) { pe_rsc_trace(rsc_lh, "%s: %s@%s += %d", constraint->id, rsc_lh->id, node->details->uname, constraint->score); node->weight = pe__add_scores(constraint->score, node->weight); } } else if (constraint->score >= CRM_SCORE_INFINITY) { pe_rsc_trace(rsc_lh, "%s: %s@%s -= %d (%s mismatch)", constraint->id, rsc_lh->id, node->details->uname, constraint->score, attribute); node->weight = pe__add_scores(-constraint->score, node->weight); } } if (can_run_any(work) || constraint->score <= -INFINITY || constraint->score >= INFINITY) { g_hash_table_destroy(rsc_lh->allowed_nodes); rsc_lh->allowed_nodes = work; work = NULL; } else { pe_rsc_info(rsc_lh, "%s: Rolling back scores from %s (no available nodes)", rsc_lh->id, rsc_rh->id); } if (work) { g_hash_table_destroy(work); } } void native_rsc_colocation_rh(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, rsc_colocation_t *constraint, pe_working_set_t *data_set) { enum filter_colocation_res filter_results; CRM_ASSERT(rsc_lh); CRM_ASSERT(rsc_rh); filter_results = filter_colocation_constraint(rsc_lh, rsc_rh, constraint, FALSE); pe_rsc_trace(rsc_lh, "%s %s with %s (%s, score=%d, filter=%d)", ((constraint->score >= 0)? "Colocating" : "Anti-colocating"), rsc_lh->id, rsc_rh->id, constraint->id, constraint->score, filter_results); switch (filter_results) { case influence_rsc_priority: influence_priority(rsc_lh, rsc_rh, constraint); break; case influence_rsc_location: colocation_match(rsc_lh, rsc_rh, constraint); break; case influence_nothing: default: return; } } static gboolean filter_rsc_ticket(pe_resource_t * rsc_lh, rsc_ticket_t * rsc_ticket) { if (rsc_ticket->role_lh != RSC_ROLE_UNKNOWN && rsc_ticket->role_lh != rsc_lh->role) { pe_rsc_trace(rsc_lh, "LH: Skipping constraint: \"%s\" state filter", role2text(rsc_ticket->role_lh)); return FALSE; } return TRUE; } void rsc_ticket_constraint(pe_resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working_set_t * data_set) { if (rsc_ticket == NULL) { pe_err("rsc_ticket was NULL"); return; } if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", rsc_ticket->id); return; } if (rsc_ticket->ticket->granted && rsc_ticket->ticket->standby == FALSE) { return; } if (rsc_lh->children) { GListPtr gIter = rsc_lh->children; pe_rsc_trace(rsc_lh, "Processing ticket dependencies from %s", rsc_lh->id); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; rsc_ticket_constraint(child_rsc, rsc_ticket, data_set); } return; } pe_rsc_trace(rsc_lh, "%s: Processing ticket dependency on %s (%s, %s)", rsc_lh->id, rsc_ticket->ticket->id, rsc_ticket->id, role2text(rsc_ticket->role_lh)); if ((rsc_ticket->ticket->granted == FALSE) && (rsc_lh->running_on != NULL)) { GListPtr gIter = NULL; switch (rsc_ticket->loss_policy) { case loss_ticket_stop: resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); break; case loss_ticket_demote: // Promotion score will be set to -INFINITY in promotion_order() if (rsc_ticket->role_lh != RSC_ROLE_MASTER) { resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); } break; case loss_ticket_fence: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); for (gIter = rsc_lh->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pe_fence_node(data_set, node, "deadman ticket was lost", FALSE); } break; case loss_ticket_freeze: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } if (rsc_lh->running_on != NULL) { clear_bit(rsc_lh->flags, pe_rsc_managed); set_bit(rsc_lh->flags, pe_rsc_block); } break; } } else if (rsc_ticket->ticket->granted == FALSE) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__no_ticket__", data_set); } } else if (rsc_ticket->ticket->standby) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__ticket_standby__", data_set); } } } enum pe_action_flags native_action_flags(pe_action_t * action, pe_node_t * node) { return action->flags; } static inline bool is_primitive_action(pe_action_t *action) { return action && action->rsc && (action->rsc->variant == pe_native); } /*! * \internal * \brief Set action bits appropriately when pe_restart_order is used * * \param[in] first 'First' action in an ordering with pe_restart_order * \param[in] then 'Then' action in an ordering with pe_restart_order * \param[in] filter What ordering flags to care about * * \note pe_restart_order is set for "stop resource before starting it" and * "stop later group member before stopping earlier group member" */ static void handle_restart_ordering(pe_action_t *first, pe_action_t *then, enum pe_action_flags filter) { const char *reason = NULL; CRM_ASSERT(is_primitive_action(first)); CRM_ASSERT(is_primitive_action(then)); // We need to update the action in two cases: // ... if 'then' is required if (is_set(filter, pe_action_optional) && is_not_set(then->flags, pe_action_optional)) { reason = "restart"; } /* ... if 'then' is unrunnable start of managed resource (if a resource * should restart but can't start, we still want to stop) */ if (is_set(filter, pe_action_runnable) && is_not_set(then->flags, pe_action_runnable) && is_set(then->rsc->flags, pe_rsc_managed) && pcmk__str_eq(then->task, RSC_START, pcmk__str_casei)) { reason = "stop"; } if (reason == NULL) { return; } pe_rsc_trace(first->rsc, "Handling %s -> %s for %s", first->uuid, then->uuid, reason); // Make 'first' required if it is runnable if (is_set(first->flags, pe_action_runnable)) { pe_action_implies(first, then, pe_action_optional); } // Make 'first' required if 'then' is required if (is_not_set(then->flags, pe_action_optional)) { pe_action_implies(first, then, pe_action_optional); } // Make 'first' unmigratable if 'then' is unmigratable if (is_not_set(then->flags, pe_action_migrate_runnable)) { pe_action_implies(first, then, pe_action_migrate_runnable); } // Make 'then' unrunnable if 'first' is required but unrunnable if (is_not_set(first->flags, pe_action_optional) && is_not_set(first->flags, pe_action_runnable)) { pe_action_implies(then, first, pe_action_runnable); } } enum pe_graph_flags native_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set) { /* flags == get_action_flags(first, then_node) called from update_action() */ enum pe_graph_flags changed = pe_graph_none; enum pe_action_flags then_flags = then->flags; enum pe_action_flags first_flags = first->flags; crm_trace( "Testing %s on %s (0x%.6x) with %s 0x%.6x", first->uuid, first->node ? first->node->details->uname : "[none]", first->flags, then->uuid, then->flags); if (type & pe_order_asymmetrical) { pe_resource_t *then_rsc = then->rsc; enum rsc_role_e then_rsc_role = then_rsc ? then_rsc->fns->state(then_rsc, TRUE) : 0; if (!then_rsc) { /* ignore */ } else if ((then_rsc_role == RSC_ROLE_STOPPED) && pcmk__str_eq(then->task, RSC_STOP, pcmk__str_casei)) { /* ignore... if 'then' is supposed to be stopped after 'first', but * then is already stopped, there is nothing to be done when non-symmetrical. */ } else if ((then_rsc_role >= RSC_ROLE_STARTED) && pcmk__str_eq(then->task, RSC_START, pcmk__str_casei) && is_set(then->flags, pe_action_optional) && then->node && pcmk__list_of_1(then_rsc->running_on) && then->node->details == ((pe_node_t *) then_rsc->running_on->data)->details) { /* Ignore. If 'then' is supposed to be started after 'first', but * 'then' is already started, there is nothing to be done when * asymmetrical -- unless the start is mandatory, which indicates * the resource is restarting, and the ordering is still needed. */ } else if (!(first->flags & pe_action_runnable)) { /* prevent 'then' action from happening if 'first' is not runnable and * 'then' has not yet occurred. */ pe_action_implies(then, first, pe_action_optional); pe_action_implies(then, first, pe_action_runnable); pe_rsc_trace(then->rsc, "Unset optional and runnable on %s", then->uuid); } else { /* ignore... then is allowed to start/stop if it wants to. */ } } if (type & pe_order_implies_first) { if (is_set(filter, pe_action_optional) && is_not_set(flags /* Should be then_flags? */, pe_action_optional)) { // Needs is_set(first_flags, pe_action_optional) too? pe_rsc_trace(first->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid); pe_action_implies(first, then, pe_action_optional); } if (is_set(flags, pe_action_migrate_runnable) && is_set(then->flags, pe_action_migrate_runnable) == FALSE && is_set(then->flags, pe_action_optional) == FALSE) { pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s", first->uuid, then->uuid); pe_action_implies(first, then, pe_action_migrate_runnable); } } if (type & pe_order_implies_first_master) { if ((filter & pe_action_optional) && ((then->flags & pe_action_optional) == FALSE) && then->rsc && (then->rsc->role == RSC_ROLE_MASTER)) { pe_action_implies(first, then, pe_action_optional); if (is_set(first->flags, pe_action_migrate_runnable) && is_set(then->flags, pe_action_migrate_runnable) == FALSE) { pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s", first->uuid, then->uuid); pe_action_implies(first, then, pe_action_migrate_runnable); } pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid); } } if ((type & pe_order_implies_first_migratable) && is_set(filter, pe_action_optional)) { if (((then->flags & pe_action_migrate_runnable) == FALSE) || ((then->flags & pe_action_runnable) == FALSE)) { pe_rsc_trace(then->rsc, "Unset runnable on %s because %s is neither runnable or migratable", first->uuid, then->uuid); pe_action_implies(first, then, pe_action_runnable); } if ((then->flags & pe_action_optional) == 0) { pe_rsc_trace(then->rsc, "Unset optional on %s because %s is not optional", first->uuid, then->uuid); pe_action_implies(first, then, pe_action_optional); } } if ((type & pe_order_pseudo_left) && is_set(filter, pe_action_optional)) { if ((first->flags & pe_action_runnable) == FALSE) { pe_action_implies(then, first, pe_action_migrate_runnable); pe__clear_action_flags(then, pe_action_pseudo); pe_rsc_trace(then->rsc, "Unset pseudo on %s because %s is not runnable", then->uuid, first->uuid); } } if (is_set(type, pe_order_runnable_left) && is_set(filter, pe_action_runnable) && is_set(then->flags, pe_action_runnable) && is_set(flags, pe_action_runnable) == FALSE) { pe_rsc_trace(then->rsc, "Unset runnable on %s because of %s", then->uuid, first->uuid); pe_action_implies(then, first, pe_action_runnable); pe_action_implies(then, first, pe_action_migrate_runnable); } if (is_set(type, pe_order_implies_then) && is_set(filter, pe_action_optional) && is_set(then->flags, pe_action_optional) && is_set(flags, pe_action_optional) == FALSE) { /* in this case, treat migrate_runnable as if first is optional */ if (is_set(first->flags, pe_action_migrate_runnable) == FALSE) { pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", then->uuid, first->uuid); pe_action_implies(then, first, pe_action_optional); } } if (is_set(type, pe_order_restart)) { handle_restart_ordering(first, then, filter); } if (then_flags != then->flags) { changed |= pe_graph_updated_then; pe_rsc_trace(then->rsc, "Then: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", then->uuid, then->node ? then->node->details->uname : "[none]", then->flags, then_flags, first->uuid, first->flags); if(then->rsc && then->rsc->parent) { /* "X_stop then X_start" doesn't get handled for cloned groups unless we do this */ update_action(then, data_set); } } if (first_flags != first->flags) { changed |= pe_graph_updated_first; pe_rsc_trace(first->rsc, "First: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", first->uuid, first->node ? first->node->details->uname : "[none]", first->flags, first_flags, then->uuid, then->flags); } return changed; } void native_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { GListPtr gIter = NULL; GHashTableIter iter; pe_node_t *node = NULL; if (constraint == NULL) { pe_err("Constraint is NULL"); return; } else if (rsc == NULL) { pe_err("LHS of rsc_to_node (%s) is NULL", constraint->id); return; } pe_rsc_trace(rsc, "Applying %s (%s) to %s", constraint->id, role2text(constraint->role_filter), rsc->id); /* take "lifetime" into account */ if (constraint->role_filter > RSC_ROLE_UNKNOWN && constraint->role_filter != rsc->next_role) { pe_rsc_debug(rsc, "Constraint (%s) is not active (role : %s vs. %s)", constraint->id, role2text(constraint->role_filter), role2text(rsc->next_role)); return; } if (constraint->node_list_rh == NULL) { pe_rsc_trace(rsc, "RHS of constraint %s is NULL", constraint->id); return; } for (gIter = constraint->node_list_rh; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pe_node_t *other_node = NULL; other_node = (pe_node_t *) pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (other_node != NULL) { pe_rsc_trace(rsc, "%s + %s: %d + %d", node->details->uname, other_node->details->uname, node->weight, other_node->weight); other_node->weight = pe__add_scores(other_node->weight, node->weight); } else { other_node = pe__copy_node(node); pe_rsc_trace(rsc, "%s: %d (insert %d)", other_node->details->uname, other_node->weight, constraint->discover_mode); g_hash_table_insert(rsc->allowed_nodes, (gpointer) other_node->details->id, other_node); } if (other_node->rsc_discover_mode < constraint->discover_mode) { if (constraint->discover_mode == pe_discover_exclusive) { rsc->exclusive_discover = TRUE; } /* exclusive > never > always... always is default */ other_node->rsc_discover_mode = constraint->discover_mode; } } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { pe_rsc_trace(rsc, "%s + %s : %d", rsc->id, node->details->uname, node->weight); } } void native_expand(pe_resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Processing actions from %s", rsc->id); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; crm_trace("processing action %d for rsc=%s", action->id, rsc->id); graph_element_from_action(action, data_set); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } } #define log_change(a, fmt, args...) do { \ if(a && a->reason && terminal) { \ printf(" * "fmt" \tdue to %s\n", ##args, a->reason); \ } else if(a && a->reason) { \ crm_notice(fmt" \tdue to %s", ##args, a->reason); \ } else if(terminal) { \ printf(" * "fmt"\n", ##args); \ } else { \ crm_notice(fmt, ##args); \ } \ } while(0) #define STOP_SANITY_ASSERT(lineno) do { \ if(current && current->details->unclean) { \ /* It will be a pseudo op */ \ } else if(stop == NULL) { \ crm_err("%s:%d: No stop action exists for %s", __FUNCTION__, lineno, rsc->id); \ CRM_ASSERT(stop != NULL); \ } else if(is_set(stop->flags, pe_action_optional)) { \ crm_err("%s:%d: Action %s is still optional", __FUNCTION__, lineno, stop->uuid); \ CRM_ASSERT(is_not_set(stop->flags, pe_action_optional)); \ } \ } while(0) static void LogAction(const char *change, pe_resource_t *rsc, pe_node_t *origin, pe_node_t *destination, pe_action_t *action, pe_action_t *source, gboolean terminal) { int len = 0; char *reason = NULL; char *details = NULL; bool same_host = FALSE; bool same_role = FALSE; bool need_role = FALSE; static int rsc_width = 5; static int detail_width = 5; CRM_ASSERT(action); CRM_ASSERT(destination != NULL || origin != NULL); if(source == NULL) { source = action; } len = strlen(rsc->id); if(len > rsc_width) { rsc_width = len + 2; } if(rsc->role > RSC_ROLE_STARTED || rsc->next_role > RSC_ROLE_SLAVE) { need_role = TRUE; } if(origin != NULL && destination != NULL && origin->details == destination->details) { same_host = TRUE; } if(rsc->role == rsc->next_role) { same_role = TRUE; } if (need_role && (origin == NULL)) { /* Starting and promoting a promotable clone instance */ details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role), role2text(rsc->next_role), destination->details->uname); } else if (origin == NULL) { /* Starting a resource */ details = crm_strdup_printf("%s", destination->details->uname); } else if (need_role && (destination == NULL)) { /* Stopping a promotable clone instance */ details = crm_strdup_printf("%s %s", role2text(rsc->role), origin->details->uname); } else if (destination == NULL) { /* Stopping a resource */ details = crm_strdup_printf("%s", origin->details->uname); } else if (need_role && same_role && same_host) { /* Recovering, restarting or re-promoting a promotable clone instance */ details = crm_strdup_printf("%s %s", role2text(rsc->role), origin->details->uname); } else if (same_role && same_host) { /* Recovering or Restarting a normal resource */ details = crm_strdup_printf("%s", origin->details->uname); } else if (need_role && same_role) { /* Moving a promotable clone instance */ details = crm_strdup_printf("%s -> %s %s", origin->details->uname, destination->details->uname, role2text(rsc->role)); } else if (same_role) { /* Moving a normal resource */ details = crm_strdup_printf("%s -> %s", origin->details->uname, destination->details->uname); } else if (same_host) { /* Promoting or demoting a promotable clone instance */ details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role), role2text(rsc->next_role), origin->details->uname); } else { /* Moving and promoting/demoting */ details = crm_strdup_printf("%s %s -> %s %s", role2text(rsc->role), origin->details->uname, role2text(rsc->next_role), destination->details->uname); } len = strlen(details); if(len > detail_width) { detail_width = len; } if(source->reason && is_not_set(action->flags, pe_action_runnable)) { reason = crm_strdup_printf(" due to %s (blocked)", source->reason); } else if(source->reason) { reason = crm_strdup_printf(" due to %s", source->reason); } else if(is_not_set(action->flags, pe_action_runnable)) { reason = strdup(" blocked"); } else { reason = strdup(""); } if(terminal) { printf(" * %-8s %-*s ( %*s ) %s\n", change, rsc_width, rsc->id, detail_width, details, reason); } else { crm_notice(" * %-8s %-*s ( %*s ) %s", change, rsc_width, rsc->id, detail_width, details, reason); } free(details); free(reason); } void LogActions(pe_resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) { pe_node_t *next = NULL; pe_node_t *current = NULL; pe_node_t *start_node = NULL; pe_action_t *stop = NULL; pe_action_t *start = NULL; pe_action_t *demote = NULL; pe_action_t *promote = NULL; char *key = NULL; gboolean moving = FALSE; GListPtr possible_matches = NULL; if(rsc->variant == pe_container) { pcmk__bundle_log_actions(rsc, data_set, terminal); return; } if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; LogActions(child_rsc, data_set, terminal); } return; } next = rsc->allocated_to; if (rsc->running_on) { current = pe__current_node(rsc); if (rsc->role == RSC_ROLE_STOPPED) { /* * This can occur when resources are being recovered * We fiddle with the current role in native_create_actions() */ rsc->role = RSC_ROLE_STARTED; } } if (current == NULL && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't log stopped orphans */ return; } if (is_not_set(rsc->flags, pe_rsc_managed) || (current == NULL && next == NULL)) { pe_rsc_info(rsc, "Leave %s\t(%s%s)", rsc->id, role2text(rsc->role), is_not_set(rsc->flags, pe_rsc_managed) ? " unmanaged" : ""); return; } if (current != NULL && next != NULL && !pcmk__str_eq(current->details->id, next->details->id, pcmk__str_casei)) { moving = TRUE; } possible_matches = pe__resource_actions(rsc, next, RSC_START, FALSE); if (possible_matches) { start = possible_matches->data; g_list_free(possible_matches); } if ((start == NULL) || is_not_set(start->flags, pe_action_runnable)) { start_node = NULL; } else { start_node = current; } possible_matches = pe__resource_actions(rsc, start_node, RSC_STOP, FALSE); if (possible_matches) { stop = possible_matches->data; g_list_free(possible_matches); } possible_matches = pe__resource_actions(rsc, next, RSC_PROMOTE, FALSE); if (possible_matches) { promote = possible_matches->data; g_list_free(possible_matches); } possible_matches = pe__resource_actions(rsc, next, RSC_DEMOTE, FALSE); if (possible_matches) { demote = possible_matches->data; g_list_free(possible_matches); } if (rsc->role == rsc->next_role) { pe_action_t *migrate_op = NULL; possible_matches = pe__resource_actions(rsc, next, RSC_MIGRATED, FALSE); if (possible_matches) { migrate_op = possible_matches->data; } CRM_CHECK(next != NULL,); if (next == NULL) { } else if (migrate_op && is_set(migrate_op->flags, pe_action_runnable) && current) { LogAction("Migrate", rsc, current, next, start, NULL, terminal); } else if (is_set(rsc->flags, pe_rsc_reload)) { LogAction("Reload", rsc, current, next, start, NULL, terminal); } else if (start == NULL || is_set(start->flags, pe_action_optional)) { if ((demote != NULL) && (promote != NULL) && is_not_set(demote->flags, pe_action_optional) && is_not_set(promote->flags, pe_action_optional)) { LogAction("Re-promote", rsc, current, next, promote, demote, terminal); } else { pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } } else if (is_not_set(start->flags, pe_action_runnable)) { LogAction("Stop", rsc, current, NULL, stop, (stop && stop->reason)? stop : start, terminal); STOP_SANITY_ASSERT(__LINE__); } else if (moving && current) { LogAction(is_set(rsc->flags, pe_rsc_failed) ? "Recover" : "Move", rsc, current, next, stop, NULL, terminal); } else if (is_set(rsc->flags, pe_rsc_failed)) { LogAction("Recover", rsc, current, NULL, stop, NULL, terminal); STOP_SANITY_ASSERT(__LINE__); } else { LogAction("Restart", rsc, current, next, start, NULL, terminal); /* STOP_SANITY_ASSERT(__LINE__); False positive for migrate-fail-7 */ } g_list_free(possible_matches); return; } if(stop && (rsc->next_role == RSC_ROLE_STOPPED || (start && is_not_set(start->flags, pe_action_runnable)))) { GListPtr gIter = NULL; key = stop_key(rsc); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pe_action_t *stop_op = NULL; possible_matches = find_actions(rsc->actions, key, node); if (possible_matches) { stop_op = possible_matches->data; g_list_free(possible_matches); } if (stop_op && (stop_op->flags & pe_action_runnable)) { STOP_SANITY_ASSERT(__LINE__); } LogAction("Stop", rsc, node, NULL, stop_op, (stop_op && stop_op->reason)? stop_op : start, terminal); } free(key); } else if (stop && is_set(rsc->flags, pe_rsc_failed) && is_set(rsc->flags, pe_rsc_stop)) { /* 'stop' may be NULL if the failure was ignored */ LogAction("Recover", rsc, current, next, stop, start, terminal); STOP_SANITY_ASSERT(__LINE__); } else if (moving) { LogAction("Move", rsc, current, next, stop, NULL, terminal); STOP_SANITY_ASSERT(__LINE__); } else if (is_set(rsc->flags, pe_rsc_reload)) { LogAction("Reload", rsc, current, next, start, NULL, terminal); } else if (stop != NULL && is_not_set(stop->flags, pe_action_optional)) { LogAction("Restart", rsc, current, next, start, NULL, terminal); STOP_SANITY_ASSERT(__LINE__); } else if (rsc->role == RSC_ROLE_MASTER) { CRM_LOG_ASSERT(current != NULL); LogAction("Demote", rsc, current, next, demote, NULL, terminal); } else if(rsc->next_role == RSC_ROLE_MASTER) { CRM_LOG_ASSERT(next); LogAction("Promote", rsc, current, next, promote, NULL, terminal); } else if (rsc->role == RSC_ROLE_STOPPED && rsc->next_role > RSC_ROLE_STOPPED) { LogAction("Start", rsc, current, next, start, NULL, terminal); } } gboolean StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *current = (pe_node_t *) gIter->data; pe_action_t *stop; if (rsc->partial_migration_target) { if (rsc->partial_migration_target->details == current->details) { pe_rsc_trace(rsc, "Filtered %s -> %s %s", current->details->uname, next->details->uname, rsc->id); continue; } else { pe_rsc_trace(rsc, "Forced on %s %s", current->details->uname, rsc->id); optional = FALSE; } } pe_rsc_trace(rsc, "%s on %s", rsc->id, current->details->uname); stop = stop_action(rsc, current, optional); if(rsc->allocated_to == NULL) { pe_action_set_reason(stop, "node availability", TRUE); } if (is_not_set(rsc->flags, pe_rsc_managed)) { update_action_flags(stop, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__); } if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, optional, data_set); } if(is_set(rsc->flags, pe_rsc_needs_unfencing)) { pe_action_t *unfence = pe_fence_op(current, "on", TRUE, NULL, FALSE, data_set); order_actions(stop, unfence, pe_order_implies_first); if (!node_has_been_unfenced(current)) { pe_proc_err("Stopping %s until %s can be unfenced", rsc->id, current->details->uname); } } } return TRUE; } static void order_after_unfencing(pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order, pe_working_set_t *data_set) { /* When unfencing is in use, we order unfence actions before any probe or * start of resources that require unfencing, and also of fence devices. * * This might seem to violate the principle that fence devices require * only quorum. However, fence agents that unfence often don't have enough * information to even probe or start unless the node is first unfenced. */ if (is_unfence_device(rsc, data_set) || is_set(rsc->flags, pe_rsc_needs_unfencing)) { /* Start with an optional ordering. Requiring unfencing would result in * the node being unfenced, and all its resources being stopped, * whenever a new resource is added -- which would be highly suboptimal. */ pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, data_set); order_actions(unfence, action, order); if (!node_has_been_unfenced(node)) { // But unfencing is required if it has never been done char *reason = crm_strdup_printf("required by %s %s", rsc->id, action->task); trigger_unfencing(NULL, node, reason, NULL, data_set); free(reason); } } } gboolean StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { pe_action_t *start = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s on %s %d %d", rsc->id, next ? next->details->uname : "N/A", optional, next ? next->weight : 0); start = start_action(rsc, next, TRUE); order_after_unfencing(rsc, next, start, pe_order_implies_then, data_set); if (is_set(start->flags, pe_action_runnable) && optional == FALSE) { update_action_flags(start, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__); } return TRUE; } gboolean PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; gboolean runnable = TRUE; GListPtr action_list = NULL; CRM_ASSERT(rsc); CRM_CHECK(next != NULL, return FALSE); pe_rsc_trace(rsc, "%s on %s", rsc->id, next->details->uname); action_list = pe__resource_actions(rsc, next, RSC_START, TRUE); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { pe_action_t *start = (pe_action_t *) gIter->data; if (is_set(start->flags, pe_action_runnable) == FALSE) { runnable = FALSE; } } g_list_free(action_list); if (runnable) { promote_action(rsc, next, optional); return TRUE; } pe_rsc_debug(rsc, "%s\tPromote %s (canceled)", next->details->uname, rsc->id); action_list = pe__resource_actions(rsc, next, RSC_PROMOTE, TRUE); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { pe_action_t *promote = (pe_action_t *) gIter->data; update_action_flags(promote, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__); } g_list_free(action_list); return TRUE; } gboolean DemoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */ for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *current = (pe_node_t *) gIter->data; pe_rsc_trace(rsc, "%s on %s", rsc->id, next ? next->details->uname : "N/A"); demote_action(rsc, current, optional); } return TRUE; } gboolean RoleError(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); crm_err("%s on %s", rsc->id, next ? next->details->uname : "N/A"); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); return FALSE; } gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set) { if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if (node == NULL) { pe_rsc_trace(rsc, "Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if (node->details->unclean || node->details->online == FALSE) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete_action(rsc, node, optional); new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE, optional ? pe_order_implies_then : pe_order_optional, data_set); new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START, optional ? pe_order_implies_then : pe_order_optional, data_set); return TRUE; } gboolean native_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete, gboolean force, pe_working_set_t * data_set) { enum pe_ordering flags = pe_order_optional; char *key = NULL; pe_action_t *probe = NULL; pe_node_t *running = NULL; pe_node_t *allowed = NULL; pe_resource_t *top = uber_parent(rsc); static const char *rc_master = NULL; static const char *rc_inactive = NULL; if (rc_inactive == NULL) { rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); rc_master = crm_itoa(PCMK_OCF_RUNNING_MASTER); } CRM_CHECK(node != NULL, return FALSE); if (force == FALSE && is_not_set(data_set->flags, pe_flag_startup_probes)) { pe_rsc_trace(rsc, "Skipping active resource detection for %s", rsc->id); return FALSE; } if (pe__is_guest_or_remote_node(node)) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { pe_rsc_trace(rsc, "Skipping probe for %s on %s because Pacemaker Remote nodes cannot run stonith agents", rsc->id, node->details->id); return FALSE; } else if (pe__is_guest_node(node) && pe__resource_contains_guest_node(data_set, rsc)) { pe_rsc_trace(rsc, "Skipping probe for %s on %s because guest nodes cannot run resources containing guest nodes", rsc->id, node->details->id); return FALSE; } else if (rsc->is_remote_node) { pe_rsc_trace(rsc, "Skipping probe for %s on %s because Pacemaker Remote nodes cannot host remote connections", rsc->id, node->details->id); return FALSE; } } if (rsc->children) { GListPtr gIter = NULL; gboolean any_created = FALSE; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; any_created = child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set) || any_created; } return any_created; } else if ((rsc->container) && (!rsc->is_remote_node)) { pe_rsc_trace(rsc, "Skipping %s: it is within container %s", rsc->id, rsc->container->id); return FALSE; } if (is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_trace(rsc, "Skipping orphan: %s", rsc->id); return FALSE; } // Check whether resource is already known on node if (!force && g_hash_table_lookup(rsc->known_on, node->details->id)) { pe_rsc_trace(rsc, "Skipping known: %s on %s", rsc->id, node->details->uname); return FALSE; } allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (rsc->exclusive_discover || top->exclusive_discover) { if (allowed == NULL) { /* exclusive discover is enabled and this node is not in the allowed list. */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, A", rsc->id, node->details->id); return FALSE; } else if (allowed->rsc_discover_mode != pe_discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, B", rsc->id, node->details->id); return FALSE; } } if(allowed == NULL && node->rsc_discover_mode == pe_discover_never) { /* If this node was allowed to host this resource it would * have been explicitly added to the 'allowed_nodes' list. * However it wasn't and the node has discovery disabled, so * no need to probe for this resource. */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, C", rsc->id, node->details->id); return FALSE; } if (allowed && allowed->rsc_discover_mode == pe_discover_never) { /* this resource is marked as not needing to be discovered on this node */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, discovery mode", rsc->id, node->details->id); return FALSE; } if (pe__is_guest_node(node)) { pe_resource_t *remote = node->details->remote_rsc->container; if(remote->role == RSC_ROLE_STOPPED) { /* If the container is stopped, then we know anything that * might have been inside it is also stopped and there is * no need to probe. * * If we don't know the container's state on the target * either: * * - the container is running, the transition will abort * and we'll end up in a different case next time, or * * - the container is stopped * * Either way there is no need to probe. * */ if(remote->allocated_to && g_hash_table_lookup(remote->known_on, remote->allocated_to->details->id) == NULL) { /* For safety, we order the 'rsc' start after 'remote' * has been probed. * * Using 'top' helps for groups, but we may need to * follow the start's ordering chain backwards. */ custom_action_order(remote, pcmk__op_key(remote->id, RSC_STATUS, 0), NULL, top, pcmk__op_key(top->id, RSC_START, 0), NULL, pe_order_optional, data_set); } pe_rsc_trace(rsc, "Skipping probe for %s on node %s, %s is stopped", rsc->id, node->details->id, remote->id); return FALSE; /* Here we really we want to check if remote->stop is required, * but that information doesn't exist yet */ } else if(node->details->remote_requires_reset || node->details->unclean || is_set(remote->flags, pe_rsc_failed) || remote->next_role == RSC_ROLE_STOPPED || (remote->allocated_to && pe_find_node(remote->running_on, remote->allocated_to->details->uname) == NULL) ) { /* The container is stopping or restarting, don't start * 'rsc' until 'remote' stops as this also implies that * 'rsc' is stopped - avoiding the need to probe */ custom_action_order(remote, pcmk__op_key(remote->id, RSC_STOP, 0), NULL, top, pcmk__op_key(top->id, RSC_START, 0), NULL, pe_order_optional, data_set); pe_rsc_trace(rsc, "Skipping probe for %s on node %s, %s is stopping, restarting or moving", rsc->id, node->details->id, remote->id); return FALSE; /* } else { * The container is running so there is no problem probing it */ } } key = pcmk__op_key(rsc->id, RSC_STATUS, 0); probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set); update_action_flags(probe, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__); order_after_unfencing(rsc, node, probe, pe_order_optional, data_set); /* * We need to know if it's running_on (not just known_on) this node * to correctly determine the target rc. */ running = pe_find_node_id(rsc->running_on, node->details->id); if (running == NULL) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); } else if (rsc->role == RSC_ROLE_MASTER) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_master); } crm_debug("Probing %s on %s (%s) %d %p", rsc->id, node->details->uname, role2text(rsc->role), is_set(probe->flags, pe_action_runnable), rsc->running_on); if (is_unfence_device(rsc, data_set) || !pe_rsc_is_clone(top)) { top = rsc; } else { crm_trace("Probing %s on %s (%s) as %s", rsc->id, node->details->uname, role2text(rsc->role), top->id); } if(is_not_set(probe->flags, pe_action_runnable) && rsc->running_on == NULL) { /* Prevent the start from occurring if rsc isn't active, but * don't cause it to stop if it was active already */ flags |= pe_order_runnable_left; } custom_action_order(rsc, NULL, probe, top, pcmk__op_key(top->id, RSC_START, 0), NULL, flags, data_set); /* Before any reloads, if they exist */ custom_action_order(rsc, NULL, probe, top, reload_key(rsc), NULL, pe_order_optional, data_set); #if 0 // complete is always null currently if (!is_unfence_device(rsc, data_set)) { /* Normally rsc.start depends on probe complete which depends * on rsc.probe. But this can't be the case for fence devices * with unfencing, as it would create graph loops. * * So instead we explicitly order 'rsc.probe then rsc.start' */ order_actions(probe, complete, pe_order_implies_then); } #endif return TRUE; } /*! * \internal * \brief Check whether a resource is known on a particular node * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return TRUE if resource (or parent if an anonymous clone) is known */ static bool rsc_is_known_on(pe_resource_t *rsc, const pe_node_t *node) { if (pe_hash_table_lookup(rsc->known_on, node->details->id)) { return TRUE; } else if ((rsc->variant == pe_native) && pe_rsc_is_anon_clone(rsc->parent) && pe_hash_table_lookup(rsc->parent->known_on, node->details->id)) { /* We check only the parent, not the uber-parent, because we cannot * assume that the resource is known if it is in an anonymously cloned * group (which may be only partially known). */ return TRUE; } return FALSE; } /*! * \internal * \brief Order a resource's start and promote actions relative to fencing * * \param[in] rsc Resource to be ordered * \param[in] stonith_op Fence action * \param[in] data_set Cluster information */ static void native_start_constraints(pe_resource_t * rsc, pe_action_t * stonith_op, pe_working_set_t * data_set) { pe_node_t *target; GListPtr gIter = NULL; CRM_CHECK(stonith_op && stonith_op->node, return); target = stonith_op->node; for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; switch (action->needs) { case rsc_req_nothing: // Anything other than start or promote requires nothing break; case rsc_req_stonith: order_actions(stonith_op, action, pe_order_optional); break; case rsc_req_quorum: if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei) && pe_hash_table_lookup(rsc->allowed_nodes, target->details->id) && !rsc_is_known_on(rsc, target)) { /* If we don't know the status of the resource on the node * we're about to shoot, we have to assume it may be active * there. Order the resource start after the fencing. This * is analogous to waiting for all the probes for a resource * to complete before starting it. * * The most likely explanation is that the DC died and took * its status with it. */ pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, target->details->uname); order_actions(stonith_op, action, pe_order_optional | pe_order_runnable_left); } break; } } } static void native_stop_constraints(pe_resource_t * rsc, pe_action_t * stonith_op, pe_working_set_t * data_set) { GListPtr gIter = NULL; GListPtr action_list = NULL; bool order_implicit = false; pe_resource_t *top = uber_parent(rsc); pe_action_t *parent_stop = NULL; pe_node_t *target; CRM_CHECK(stonith_op && stonith_op->node, return); target = stonith_op->node; /* Get a list of stop actions potentially implied by the fencing */ action_list = pe__resource_actions(rsc, target, RSC_STOP, FALSE); /* If resource requires fencing, implicit actions must occur after fencing. * * Implied stops and demotes of resources running on guest nodes are always * ordered after fencing, even if the resource does not require fencing, * because guest node "fencing" is actually just a resource stop. */ if (is_set(rsc->flags, pe_rsc_needs_fencing) || pe__is_guest_node(target)) { order_implicit = true; } if (action_list && order_implicit) { parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL); } for (gIter = action_list; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; // The stop would never complete, so convert it into a pseudo-action. update_action_flags(action, pe_action_pseudo|pe_action_runnable, __FUNCTION__, __LINE__); if (order_implicit) { update_action_flags(action, pe_action_implied_by_stonith, __FUNCTION__, __LINE__); /* Order the stonith before the parent stop (if any). * * Also order the stonith before the resource stop, unless the * resource is inside a bundle -- that would cause a graph loop. * We can rely on the parent stop's ordering instead. * * User constraints must not order a resource in a guest node * relative to the guest node container resource. The * pe_order_preserve flag marks constraints as generated by the * cluster and thus immune to that check (and is irrelevant if * target is not a guest). */ if (!pe_rsc_is_bundled(rsc)) { order_actions(stonith_op, action, pe_order_preserve); } order_actions(stonith_op, parent_stop, pe_order_preserve); } if (is_set(rsc->flags, pe_rsc_failed)) { crm_notice("Stop of failed resource %s is implicit %s %s is fenced", rsc->id, (order_implicit? "after" : "because"), target->details->uname); } else { crm_info("%s is implicit %s %s is fenced", action->uuid, (order_implicit? "after" : "because"), target->details->uname); } if (is_set(rsc->flags, pe_rsc_notify)) { /* Create a second notification that will be delivered * immediately after the node is fenced * * Basic problem: * - C is a clone active on the node to be shot and stopping on another * - R is a resource that depends on C * * + C.stop depends on R.stop * + C.stopped depends on STONITH * + C.notify depends on C.stopped * + C.healthy depends on C.notify * + R.stop depends on C.healthy * * The extra notification here changes * + C.healthy depends on C.notify * into: * + C.healthy depends on C.notify' * + C.notify' depends on STONITH' * thus breaking the loop */ create_secondary_notification(action, rsc, stonith_op, data_set); } /* From Bug #1601, successful fencing must be an input to a failed resources stop action. However given group(rA, rB) running on nodeX and B.stop has failed, A := stop healthy resource (rA.stop) B := stop failed resource (pseudo operation B.stop) C := stonith nodeX A requires B, B requires C, C requires A This loop would prevent the cluster from making progress. This block creates the "C requires A" dependency and therefore must (at least for now) be disabled. Instead, run the block above and treat all resources on nodeX as B would be (marked as a pseudo op depending on the STONITH). TODO: Break the "A requires B" dependency in update_action() and re-enable this block } else if(is_stonith == FALSE) { crm_info("Moving healthy resource %s" " off %s before fencing", rsc->id, node->details->uname); * stop healthy resources before the * stonith op * custom_action_order( rsc, stop_key(rsc), NULL, NULL,strdup(CRM_OP_FENCE),stonith_op, pe_order_optional, data_set); */ } g_list_free(action_list); /* Get a list of demote actions potentially implied by the fencing */ action_list = pe__resource_actions(rsc, target, RSC_DEMOTE, FALSE); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (action->node->details->online == FALSE || action->node->details->unclean == TRUE || is_set(rsc->flags, pe_rsc_failed)) { if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_info(rsc, "Demote of failed resource %s is implicit after %s is fenced", rsc->id, target->details->uname); } else { pe_rsc_info(rsc, "%s is implicit after %s is fenced", action->uuid, target->details->uname); } /* The demote would never complete and is now implied by the * fencing, so convert it into a pseudo-action. */ update_action_flags(action, pe_action_pseudo|pe_action_runnable, __FUNCTION__, __LINE__); if (pe_rsc_is_bundled(rsc)) { /* Do nothing, let the recovery be ordered after the parent's implied stop */ } else if (order_implicit) { order_actions(stonith_op, action, pe_order_preserve|pe_order_optional); } } } g_list_free(action_list); } void rsc_stonith_ordering(pe_resource_t * rsc, pe_action_t * stonith_op, pe_working_set_t * data_set) { if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; rsc_stonith_ordering(child_rsc, stonith_op, data_set); } } else if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id); } else { native_start_constraints(rsc, stonith_op, data_set); native_stop_constraints(rsc, stonith_op, data_set); } } void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set) { GListPtr gIter = NULL; pe_action_t *reload = NULL; if (rsc->children) { for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; ReloadRsc(child_rsc, node, data_set); } return; } else if (rsc->variant > pe_native) { /* Complex resource with no children */ return; } else if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "%s: unmanaged", rsc->id); return; } else if (is_set(rsc->flags, pe_rsc_failed)) { /* We don't need to specify any particular actions here, normal failure * recovery will apply. */ pe_rsc_trace(rsc, "%s: preventing reload because failed", rsc->id); return; } else if (is_set(rsc->flags, pe_rsc_start_pending)) { /* If a resource's configuration changed while a start was pending, * force a full restart. */ pe_rsc_trace(rsc, "%s: preventing reload because start pending", rsc->id); stop_action(rsc, node, FALSE); return; } else if (node == NULL) { pe_rsc_trace(rsc, "%s: not active", rsc->id); return; } pe_rsc_trace(rsc, "Processing %s", rsc->id); set_bit(rsc->flags, pe_rsc_reload); reload = custom_action( rsc, reload_key(rsc), CRMD_ACTION_RELOAD, node, FALSE, TRUE, data_set); pe_action_set_reason(reload, "resource definition change", FALSE); custom_action_order(NULL, NULL, reload, rsc, stop_key(rsc), NULL, pe_order_optional|pe_order_then_cancels_first, data_set); custom_action_order(NULL, NULL, reload, rsc, demote_key(rsc), NULL, pe_order_optional|pe_order_then_cancels_first, data_set); } void native_append_meta(pe_resource_t * rsc, xmlNode * xml) { char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); pe_resource_t *parent; if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_INCARNATION); crm_xml_add(xml, name, value); free(name); } value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE); if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE); crm_xml_add(xml, name, value); free(name); } for (parent = rsc; parent != NULL; parent = parent->parent) { if (parent->container) { crm_xml_add(xml, CRM_META"_"XML_RSC_ATTR_CONTAINER, parent->container->id); } } } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index a09061f0d3..43e78033ed 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,4022 +1,4022 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit(data_set->flags, flag); \ } else { \ clear_bit(data_set->flags, flag); \ } \ } \ } while(0) static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *failed, pe_working_set_t *data_set); static void determine_remote_online_status(pe_working_set_t *data_set, pe_node_t *this_node); static void add_node_attrs(xmlNode *attrs, pe_node_t *node, bool overwrite, pe_working_set_t *data_set); static void determine_online_status(xmlNode *node_state, pe_node_t *this_node, pe_working_set_t *data_set); static void unpack_lrm_resources(pe_node_t *node, xmlNode *lrm_state, pe_working_set_t *data_set); // Bitmask for warnings we only want to print once uint32_t pe_wo = 0; static gboolean is_dangling_guest_node(pe_node_t *node) { /* we are looking for a remote-node that was supposed to be mapped to a * container resource, but all traces of that container have disappeared * from both the config and the status section. */ if (pe__is_guest_or_remote_node(node) && node->details->remote_rsc && node->details->remote_rsc->container == NULL && is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) { return TRUE; } return FALSE; } /*! * \brief Schedule a fence action for a node * * \param[in,out] data_set Current working set of cluster * \param[in,out] node Node to fence * \param[in] reason Text description of why fencing is needed * \param[in] priority_delay Whether to consider `priority-fencing-delay` */ void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason, bool priority_delay) { CRM_CHECK(node, return); /* A guest node is fenced by marking its container as failed */ if (pe__is_guest_node(node)) { pe_resource_t *rsc = node->details->remote_rsc->container; if (is_set(rsc->flags, pe_rsc_failed) == FALSE) { if (!is_set(rsc->flags, pe_rsc_managed)) { crm_notice("Not fencing guest node %s " "(otherwise would because %s): " "its guest resource %s is unmanaged", node->details->uname, reason, rsc->id); } else { crm_warn("Guest node %s will be fenced " "(by recovering its guest resource %s): %s", node->details->uname, rsc->id, reason); /* We don't mark the node as unclean because that would prevent the * node from running resources. We want to allow it to run resources * in this transition if the recovery succeeds. */ node->details->remote_requires_reset = TRUE; set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); } } } else if (is_dangling_guest_node(node)) { crm_info("Cleaning up dangling connection for guest node %s: " "fencing was already done because %s, " "and guest resource no longer exists", node->details->uname, reason); set_bit(node->details->remote_rsc->flags, pe_rsc_failed); set_bit(node->details->remote_rsc->flags, pe_rsc_stop); } else if (pe__is_remote_node(node)) { pe_resource_t *rsc = node->details->remote_rsc; if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) { crm_notice("Not fencing remote node %s " "(otherwise would because %s): connection is unmanaged", node->details->uname, reason); } else if(node->details->remote_requires_reset == FALSE) { node->details->remote_requires_reset = TRUE; crm_warn("Remote node %s %s: %s", node->details->uname, pe_can_fence(data_set, node)? "will be fenced" : "is unclean", reason); } node->details->unclean = TRUE; // No need to apply `priority-fencing-delay` for remote nodes pe_fence_op(node, NULL, TRUE, reason, FALSE, data_set); } else if (node->details->unclean) { crm_trace("Cluster node %s %s because %s", node->details->uname, pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean", reason); } else { crm_warn("Cluster node %s %s: %s", node->details->uname, pe_can_fence(data_set, node)? "will be fenced" : "is unclean", reason); node->details->unclean = TRUE; pe_fence_op(node, NULL, TRUE, reason, priority_delay, data_set); } } // @TODO xpaths can't handle templates, rules, or id-refs // nvpair with provides or requires set to unfencing #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \ "[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \ "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \ "and @" XML_NVPAIR_ATTR_VALUE "='unfencing']" // unfencing in rsc_defaults or any resource #define XPATH_ENABLE_UNFENCING \ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \ "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \ "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \ "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR static void set_if_xpath(unsigned long long flag, const char *xpath, pe_working_set_t *data_set) { xmlXPathObjectPtr result = NULL; if (is_not_set(data_set->flags, flag)) { result = xpath_search(data_set->input, xpath); if (result && (numXpathResults(result) > 0)) { set_bit(data_set->flags, flag); } freeXpathObject(result); } } gboolean unpack_config(xmlNode * config, pe_working_set_t * data_set) { const char *value = NULL; GHashTable *config_hash = crm_str_table_new(); pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; data_set->config_hash = config_hash; pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set); verify_pe_options(data_set->config_hash); set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); if(is_not_set(data_set->flags, pe_flag_startup_probes)) { crm_info("Startup probes: disabled (dangerous)"); } value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_notice("Watchdog will be used via SBD if fencing is required " "and stonith-watchdog-timeout is nonzero"); set_bit(data_set->flags, pe_flag_have_stonith_resource); } /* Set certain flags via xpath here, so they can be used before the relevant * configuration sections are unpacked. */ set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set); value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = (int) crm_parse_interval_spec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); if (!strcmp(data_set->stonith_action, "poweroff")) { pe_warn_once(pe_wo_poweroff, "Support for stonith-action of 'poweroff' is deprecated " "and will be removed in a future release (use 'off' instead)"); data_set->stonith_action = "off"; } crm_trace("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing); crm_debug("Concurrent fencing is %s", is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled"); value = pe_pref(data_set->config_hash, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); if (value) { data_set->priority_fencing_delay = crm_parse_interval_spec(value) / 1000; crm_trace("Priority fencing delay is %ds", data_set->priority_fencing_delay); } set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if (is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "no-quorum-policy"); if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) { data_set->no_quorum_policy = no_quorum_ignore; } else if (pcmk__str_eq(value, "freeze", pcmk__str_casei)) { data_set->no_quorum_policy = no_quorum_freeze; } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { data_set->no_quorum_policy = no_quorum_demote; } else if (pcmk__str_eq(value, "suicide", pcmk__str_casei)) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { int do_panic = 0; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) { data_set->no_quorum_policy = no_quorum_suicide; } else { crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { pcmk__config_err("Resetting no-quorum-policy to 'stop' because " "fencing is disabled"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of quorum: Stop ALL resources"); break; case no_quorum_demote: crm_debug("On loss of quorum: " "Demote promotable resources and stop other resources"); break; case no_quorum_suicide: crm_notice("On loss of quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_trace("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_trace("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_trace("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_trace("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false"); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_trace("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount"); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing); } if (is_set(data_set->flags, pe_flag_startup_fencing)) { crm_trace("Unseen nodes will be fenced"); } else { pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes"); } pcmk__score_red = char2score(pe_pref(data_set->config_hash, "node-health-red")); pcmk__score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); pcmk__score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", pe_pref(data_set->config_hash, "node-health-red"), pe_pref(data_set->config_hash, "node-health-yellow"), pe_pref(data_set->config_hash, "node-health-green")); data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); crm_trace("Placement strategy: %s", data_set->placement_strategy); set_config_flag(data_set, "shutdown-lock", pe_flag_shutdown_lock); crm_trace("Resources will%s be locked to cleanly shut down nodes", (is_set(data_set->flags, pe_flag_shutdown_lock)? "" : " not")); if (is_set(data_set->flags, pe_flag_shutdown_lock)) { value = pe_pref(data_set->config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT); data_set->shutdown_lock = crm_parse_interval_spec(value) / 1000; crm_trace("Shutdown locks expire after %us", data_set->shutdown_lock); } return TRUE; } static void destroy_digest_cache(gpointer ptr) { op_digest_cache_t *data = ptr; free_xml(data->params_all); free_xml(data->params_secure); free_xml(data->params_restart); free(data->digest_all_calc); free(data->digest_restart_calc); free(data->digest_secure_calc); free(data); } pe_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set) { pe_node_t *new_node = NULL; if (pe_find_node(data_set->nodes, uname) != NULL) { pcmk__config_warn("More than one node entry has name '%s'", uname); } new_node = calloc(1, sizeof(pe_node_t)); if (new_node == NULL) { return NULL; } new_node->weight = char2score(score); new_node->fixed = FALSE; new_node->details = calloc(1, sizeof(struct pe_node_shared_s)); if (new_node->details == NULL) { free(new_node); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->rsc_discovery_enabled = TRUE; new_node->details->running_rsc = NULL; new_node->details->type = node_ping; if (pcmk__str_eq(type, "remote", pcmk__str_casei)) { new_node->details->type = node_remote; set_bit(data_set->flags, pe_flag_have_remote_nodes); } else if (pcmk__str_eq(type, "member", pcmk__str_null_matches | pcmk__str_casei)) { new_node->details->type = node_member; } new_node->details->attrs = crm_str_table_new(); if (pe__is_guest_or_remote_node(new_node)) { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("remote")); } else { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("cluster")); } new_node->details->utilization = crm_str_table_new(); new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_digest_cache); data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname); return new_node; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data) { xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = ID(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; const char *is_managed = NULL; for (attr_set = __xml_first_child_element(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) { if (!pcmk__str_eq((const char *)attr_set->name, XML_TAG_META_SETS, pcmk__str_casei)) { continue; } for (attr = __xml_first_child_element(attr_set); attr != NULL; attr = __xml_next_element(attr)) { const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); if (pcmk__str_eq(name, XML_RSC_ATTR_REMOTE_NODE, pcmk__str_casei)) { remote_name = value; } else if (pcmk__str_eq(name, "remote-addr", pcmk__str_casei)) { remote_server = value; } else if (pcmk__str_eq(name, "remote-port", pcmk__str_casei)) { remote_port = value; } else if (pcmk__str_eq(name, "remote-connect-timeout", pcmk__str_casei)) { connect_timeout = value; } else if (pcmk__str_eq(name, "remote-allow-migrate", pcmk__str_casei)) { remote_allow_migrate=value; } else if (pcmk__str_eq(name, XML_RSC_ATTR_MANAGED, pcmk__str_casei)) { is_managed = value; } } } if (remote_name == NULL) { return NULL; } if (pe_find_resource(data->resources, remote_name) != NULL) { return NULL; } pe_create_remote_xml(parent, remote_name, container_id, remote_allow_migrate, is_managed, connect_timeout, remote_server, remote_port); return remote_name; } static void handle_startup_fencing(pe_working_set_t *data_set, pe_node_t *new_node) { if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. */ return; } if (is_set(data_set->flags, pe_flag_startup_fencing)) { // All nodes are unclean until we've seen their status entry new_node->details->unclean = TRUE; } else { // Blind faith ... new_node->details->unclean = FALSE; } /* We need to be able to determine if a node's status section * exists or not separate from whether the node is unclean. */ new_node->details->unseen = TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; pe_node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; for (xml_obj = __xml_first_child_element(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, pcmk__str_none)) { new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { pcmk__config_err("Ignoring <" XML_CIB_TAG_NODE "> entry in configuration without id"); continue; } new_node = pe_create_node(id, uname, type, score, data_set); if (new_node == NULL) { return FALSE; } /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ handle_startup_fencing(data_set, new_node); add_node_attrs(xml_obj, new_node, FALSE, data_set); pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, &rule_data, new_node->details->utilization, NULL, FALSE, data_set); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { crm_info("Creating a fake local node"); pe_create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set); } return TRUE; } static void setup_container(pe_resource_t * rsc, pe_working_set_t * data_set) { const char *container_id = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; setup_container(child_rsc, data_set); } return; } container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER); if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) { pe_resource_t *container = pe_find_resource(data_set->resources, container_id); if (container) { rsc->container = container; set_bit(container->flags, pe_rsc_is_container); container->fillers = g_list_append(container->fillers, rsc); pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); } } } gboolean unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; /* Create remote nodes and guest nodes from the resource configuration * before unpacking resources. */ for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { const char *new_node_id = NULL; /* Check for remote nodes, which are defined by ocf:pacemaker:remote * primitives. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = ID(xml_obj); /* The "pe_find_node" check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found remote node %s defined by resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } /* Check for guest nodes, which are defined by special meta-attributes * of a primitive of any type (for example, VirtualDomain or Xen). */ if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, pcmk__str_none)) { /* This will add an ocf:pacemaker:remote primitive to the * configuration for the guest node's connection, to be unpacked * later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest node %s in resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } /* Check for guest nodes inside a group. Clones are currently not * supported as guest nodes. */ if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, pcmk__str_none)) { xmlNode *xml_obj2 = NULL; for (xml_obj2 = __xml_first_child_element(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest node %s in resource %s inside group %s", new_node_id, ID(xml_obj2), ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } } } } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the scheduler calculations. */ static void link_rsc2remotenode(pe_working_set_t *data_set, pe_resource_t *new_rsc) { pe_node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } if (is_set(data_set->flags, pe_flag_quick_location)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } remote_node = pe_find_node(data_set->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return;); pe_rsc_trace(new_rsc, "Linking remote connection resource %s to node %s", new_rsc->id, remote_node->details->uname); remote_node->details->remote_rsc = new_rsc; if (new_rsc->container == NULL) { /* Handle start-up fencing for remote nodes (as opposed to guest nodes) * the same as is done for cluster nodes. */ handle_startup_fencing(data_set, remote_node); } else { /* pe_create_node() marks the new node as "remote" or "cluster"; now * that we know the node is a guest node, update it correctly. */ g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("container")); } } static void destroy_tag(gpointer data) { pe_tag_t *tag = data; if (tag) { free(tag->id); g_list_free_full(tag->refs, free); free(tag); } } /*! * \internal * \brief Parse configuration XML for resource information * * \param[in] xml_resources Top of resource configuration XML * \param[in,out] data_set Where to put resource information * * \return TRUE * * \note unpack_remote_nodes() MUST be called before this, so that the nodes can * be used when common_unpack() calls resource_location() */ gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GListPtr gIter = NULL; data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_tag); for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { pe_resource_t *new_rsc = NULL; if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, pcmk__str_none)) { const char *template_id = ID(xml_obj); if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets, template_id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL); } continue; } crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if (common_unpack(xml_obj, &new_rsc, NULL, data_set) && (new_rsc != NULL)) { data_set->resources = g_list_append(data_set->resources, new_rsc); pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id); } else { pcmk__config_err("Ignoring <%s> resource '%s' " "because configuration is invalid", crm_element_name(xml_obj), crm_str(ID(xml_obj))); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; setup_container(rsc, data_set); link_rsc2remotenode(data_set, rsc); } data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority); if (is_set(data_set->flags, pe_flag_quick_location)) { /* Ignore */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined"); pcmk__config_err("Either configure some or disable STONITH with the stonith-enabled option"); pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } gboolean unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) { xmlNode *xml_tag = NULL; data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_tag); for (xml_tag = __xml_first_child_element(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = ID(xml_tag); if (!pcmk__str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, pcmk__str_none)) { continue; } if (tag_id == NULL) { pcmk__config_err("Ignoring <%s> without " XML_ATTR_ID, crm_element_name(xml_tag)); continue; } for (xml_obj_ref = __xml_first_child_element(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) { const char *obj_ref = ID(xml_obj_ref); if (!pcmk__str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, pcmk__str_none)) { continue; } if (obj_ref == NULL) { pcmk__config_err("Ignoring <%s> for tag '%s' without " XML_ATTR_ID, crm_element_name(xml_obj_ref), tag_id); continue; } if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) { return FALSE; } } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; pe_ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (pcmk__str_empty(ticket_id)) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_ticket, prop_name); if (pcmk__str_eq(prop_name, XML_ATTR_ID, pcmk__str_none)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; for (xml_obj = __xml_first_child_element(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (!pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, pcmk__str_none)) { continue; } unpack_ticket_state(xml_obj, data_set); } return TRUE; } static void unpack_handle_remote_attrs(pe_node_t *this_node, xmlNode *state, pe_working_set_t * data_set) { const char *resource_discovery_enabled = NULL; xmlNode *attrs = NULL; pe_resource_t *rsc = NULL; if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { return; } if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) { return; } crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname); this_node->details->remote_maintenance = crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0"); rsc = this_node->details->remote_rsc; if (this_node->details->remote_requires_reset == FALSE) { this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; } attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (pe__shutdown_requested(this_node)) { crm_info("Node %s is shutting down", this_node->details->uname); this_node->details->shutdown = TRUE; if (rsc) { rsc->next_role = RSC_ROLE_STOPPED; } } if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) || (rsc && !is_set(rsc->flags, pe_rsc_managed))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { if (pe__is_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_warn("Ignoring %s attribute on remote node %s because stonith is disabled", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } else { /* This is either a remote node with fencing enabled, or a guest * node. We don't care whether fencing is enabled when fencing guest * nodes, because they are "fenced" by recovering their containing * resource. */ crm_info("Node %s has resource discovery disabled", this_node->details->uname); this_node->details->rsc_discovery_enabled = FALSE; } } } static bool unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set) { bool changed = false; xmlNode *lrm_rsc = NULL; for (xmlNode *state = __xml_first_child_element(status); state != NULL; state = __xml_next_element(state)) { const char *id = NULL; const char *uname = NULL; pe_node_t *this_node = NULL; bool process = FALSE; if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (this_node == NULL) { crm_info("Node %s is unknown", id); continue; } else if (this_node->details->unpacked) { crm_trace("Node %s was already processed", id); continue; } else if (!pe__is_guest_or_remote_node(this_node) && is_set(data_set->flags, pe_flag_stonith_enabled)) { // A redundant test, but preserves the order for regression tests process = TRUE; } else if (pe__is_guest_or_remote_node(this_node)) { bool check = FALSE; pe_resource_t *rsc = this_node->details->remote_rsc; if(fence) { check = TRUE; } else if(rsc == NULL) { /* Not ready yet */ } else if (pe__is_guest_node(this_node) && rsc->role == RSC_ROLE_STARTED && rsc->container->role == RSC_ROLE_STARTED) { /* Both the connection and its containing resource need to be * known to be up before we process resources running in it. */ check = TRUE; crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED); } else if (!pe__is_guest_node(this_node) && ((rsc->role == RSC_ROLE_STARTED) || is_set(data_set->flags, pe_flag_shutdown_lock))) { check = TRUE; crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED); } if (check) { determine_remote_online_status(data_set, this_node); unpack_handle_remote_attrs(this_node, state, data_set); process = TRUE; } } else if (this_node->details->online) { process = TRUE; } else if (fence) { process = TRUE; } else if (is_set(data_set->flags, pe_flag_shutdown_lock)) { process = TRUE; } if(process) { crm_trace("Processing lrm resource entries on %shealthy%s node: %s", fence?"un":"", (pe__is_guest_or_remote_node(this_node)? " remote" : ""), this_node->details->uname); changed = TRUE; this_node->details->unpacked = TRUE; lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } } return changed; } /* remove nodes that are down, stopping */ /* create positive rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; xmlNode *state = NULL; pe_node_t *this_node = NULL; crm_trace("Beginning unpack"); if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_ticket); } for (state = __xml_first_child_element(status); state != NULL; state = __xml_next_element(state)) { if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, pcmk__str_none)) { unpack_tickets_state((xmlNode *) state, data_set); } else if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { xmlNode *attrs = NULL; const char *resource_discovery_enabled = NULL; id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (uname == NULL) { /* error */ continue; } else if (this_node == NULL) { pcmk__config_warn("Ignoring recorded node status for '%s' " "because no longer in configuration", uname); continue; } else if (pe__is_guest_or_remote_node(this_node)) { /* online state for remote nodes is determined by the * rsc state after all the unpacking is done. we do however * need to mark whether or not the node has been fenced as this plays * a role during unpacking cluster node resource state */ this_node->details->remote_was_fenced = crm_atoi(crm_element_value(state, XML_NODE_IS_FENCED), "0"); continue; } crm_trace("Processing node id=%s, uname=%s", id, uname); /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } crm_trace("determining node state"); determine_online_status(state, this_node, data_set); if (is_not_set(data_set->flags, pe_flag_have_quorum) && this_node->details->online && (data_set->no_quorum_policy == no_quorum_suicide)) { /* Everything else should flow from this automatically * (at least until the scheduler becomes able to migrate off * healthy resources) */ pe_fence_node(data_set, this_node, "cluster does not have quorum", FALSE); } } } while(unpack_node_loop(status, FALSE, data_set)) { crm_trace("Start another loop"); } // Now catch any nodes we didn't see unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set); /* Now that we know where resources are, we can schedule stops of containers * with failed bundle connections */ if (data_set->stop_needed != NULL) { for (GList *item = data_set->stop_needed; item; item = item->next) { pe_resource_t *container = item->data; pe_node_t *node = pe__current_node(container); if (node) { stop_action(container, node, FALSE); } } g_list_free(data_set->stop_needed); data_set->stop_needed = NULL; } for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *this_node = gIter->data; if (this_node == NULL) { continue; } else if (!pe__is_guest_or_remote_node(this_node)) { continue; } else if(this_node->details->unpacked) { continue; } determine_remote_online_status(data_set, this_node); } return TRUE; } static gboolean determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state, pe_node_t * this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (!crm_is_true(in_cluster)) { crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster)); } else if (pcmk__str_eq(is_peer, ONLINESTATUS, pcmk__str_casei)) { if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join); } } else if (this_node->details->expected_up == FALSE) { crm_trace("Controller is down: in_cluster=%s", crm_str(in_cluster)); crm_trace("\tis_peer=%s, join=%s, expected=%s", crm_str(is_peer), crm_str(join), crm_str(exp_state)); } else { /* mark it unclean */ pe_fence_node(data_set, this_node, "peer is unexpectedly down", FALSE); crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s", crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state, pe_node_t * this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; bool crmd_online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); const char *terminate = pe_node_attribute_raw(this_node, "terminate"); /* - XML_NODE_IN_CLUSTER ::= true|false - XML_NODE_IS_PEER ::= online|offline - XML_NODE_JOIN_STATE ::= member|down|pending|banned - XML_NODE_EXPECTED ::= member|down */ if (crm_is_true(terminate)) { do_terminate = TRUE; } else if (terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if (t != '0' && isdigit(t)) { do_terminate = TRUE; } } crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate); online = crm_is_true(in_cluster); crmd_online = pcmk__str_eq(is_peer, ONLINESTATUS, pcmk__str_casei); if (exp_state == NULL) { exp_state = CRMD_JOINSTATE_DOWN; } if (this_node->details->shutdown) { crm_debug("%s is shutting down", this_node->details->uname); /* Slightly different criteria since we can't shut down a dead peer */ online = crmd_online; } else if (in_cluster == NULL) { pe_fence_node(data_set, this_node, "peer has not been seen by the cluster", FALSE); } else if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_casei)) { pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria", FALSE); } else if (do_terminate == FALSE && pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN, pcmk__str_casei)) { if (crm_is_true(in_cluster) || crmd_online) { crm_info("- Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", this_node->details->uname); } } else if (do_terminate && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_casei) && crm_is_true(in_cluster) == FALSE && !crmd_online) { crm_info("Node %s was just shot", this_node->details->uname); online = FALSE; } else if (crm_is_true(in_cluster) == FALSE) { // Consider `priority-fencing-delay` for lost nodes pe_fence_node(data_set, this_node, "peer is no longer part of the cluster", TRUE); } else if (!crmd_online) { pe_fence_node(data_set, this_node, "peer process is no longer available", FALSE); /* Everything is running at this point, now check join state */ } else if (do_terminate) { pe_fence_node(data_set, this_node, "termination was requested", FALSE); } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { crm_info("Node %s is active", this_node->details->uname); } else if (pcmk__strcase_any_of(join, CRMD_JOINSTATE_PENDING, CRMD_JOINSTATE_DOWN, NULL)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { pe_fence_node(data_set, this_node, "peer was in an unknown state", FALSE); crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown); } return online; } static void determine_remote_online_status(pe_working_set_t * data_set, pe_node_t * this_node) { pe_resource_t *rsc = this_node->details->remote_rsc; pe_resource_t *container = NULL; pe_node_t *host = NULL; /* If there is a node state entry for a (former) Pacemaker Remote node * but no resource creating that node, the node's connection resource will * be NULL. Consider it an offline remote node in that case. */ if (rsc == NULL) { this_node->details->online = FALSE; goto remote_online_done; } container = rsc->container; if (container && pcmk__list_of_1(rsc->running_on)) { host = rsc->running_on->data; } /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { crm_trace("%s node %s presumed ONLINE because connection resource is started", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) { crm_trace("%s node %s shutting down because connection resource is stopping", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if(container && is_set(container->flags, pe_rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } else if(is_set(rsc->flags, pe_rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; } else if (rsc->role == RSC_ROLE_STOPPED || (container && container->role == RSC_ROLE_STOPPED)) { crm_trace("%s node %s OFFLINE because its resource is stopped", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = FALSE; } else if (host && (host->details->online == FALSE) && host->details->unclean) { crm_trace("Guest node %s UNCLEAN because host is unclean", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } remote_online_done: crm_trace("Remote node %s online=%s", this_node->details->id, this_node->details->online ? "TRUE" : "FALSE"); } static void determine_online_status(xmlNode * node_state, pe_node_t * this_node, pe_working_set_t * data_set) { gboolean online = FALSE; const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); CRM_CHECK(this_node != NULL, return); this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; if (pe__shutdown_requested(this_node)) { this_node->details->shutdown = TRUE; } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { this_node->details->expected_up = TRUE; } if (this_node->details->type == node_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing(data_set, node_state, this_node); } else { online = determine_online_status_fencing(data_set, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* don't run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (this_node->details->type == node_ping) { crm_info("Node %s is not a pacemaker node", this_node->details->uname); } else if (this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if (this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : this_node->details->maintenance ? "maintenance" : "online"); } else { crm_trace("Node %s is offline", this_node->details->uname); } } /*! * \internal * \brief Find the end of a resource's name, excluding any clone suffix * * \param[in] id Resource ID to check * * \return Pointer to last character of resource's base name */ const char * pe_base_name_end(const char *id) { if (!pcmk__str_empty(id)) { const char *end = id + strlen(id) - 1; for (const char *s = end; s > id; --s) { switch (*s) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': return (s == end)? s : (s - 1); default: return end; } } return end; } return NULL; } /*! * \internal * \brief Get a resource name excluding any clone suffix * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_strip(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); char *basename = NULL; CRM_ASSERT(end); basename = strndup(last_rsc_id, end - last_rsc_id + 1); CRM_ASSERT(basename); return basename; } /*! * \internal * \brief Get the name of the first instance of a cloned resource * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name plus :0 * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_zero(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); size_t base_name_len = end - last_rsc_id + 1; char *zero = NULL; CRM_ASSERT(end); zero = calloc(base_name_len + 3, sizeof(char)); CRM_ASSERT(zero); memcpy(zero, last_rsc_id, base_name_len); zero[base_name_len] = ':'; zero[base_name_len + 1] = '0'; return zero; } static pe_resource_t * create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set) { pe_resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { pe_node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); node = pe_find_node(data_set->nodes, rsc_id); if (node == NULL) { node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set); } link_rsc2remotenode(data_set, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); set_bit(rsc->flags, pe_rsc_orphan_container_filler); } set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } /*! * \internal * \brief Create orphan instance for anonymous clone resource history */ static pe_resource_t * create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id, pe_node_t *node, pe_working_set_t *data_set) { pe_resource_t *top = pe__create_clone_child(parent, data_set); // find_rsc() because we might be a cloned group pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, node->details->uname); return orphan; } /*! * \internal * \brief Check a node for an instance of an anonymous clone * * Return a child instance of the specified anonymous clone, in order of * preference: (1) the instance running on the specified node, if any; * (2) an inactive instance (i.e. within the total of clone-max instances); * (3) a newly created orphan (i.e. clone-max instances are already active). * * \param[in] data_set Cluster information * \param[in] node Node on which to check for instance * \param[in] parent Clone to check * \param[in] rsc_id Name of cloned resource in history (without instance) */ static pe_resource_t * find_anonymous_clone(pe_working_set_t * data_set, pe_node_t * node, pe_resource_t * parent, const char *rsc_id) { GListPtr rIter = NULL; pe_resource_t *rsc = NULL; pe_resource_t *inactive_instance = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(pe_rsc_is_clone(parent)); CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique)); // Check for active (or partially active, for cloned groups) instance pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr locations = NULL; pe_resource_t *child = rIter->data; /* Check whether this instance is already known to be active or pending * anywhere, at this stage of unpacking. Because this function is called * for a resource before the resource's individual operation history * entries are unpacked, locations will generally not contain the * desired node. * * However, there are three exceptions: * (1) when child is a cloned group and we have already unpacked the * history of another member of the group on the same node; * (2) when we've already unpacked the history of another numbered * instance on the same node (which can happen if globally-unique * was flipped from true to false); and * (3) when we re-run calculations on the same data set as part of a * simulation. */ child->fns->location(child, &locations, 2); if (locations) { /* We should never associate the same numbered anonymous clone * instance with multiple nodes, and clone instances can't migrate, * so there must be only one location, regardless of history. */ CRM_LOG_ASSERT(locations->next == NULL); if (((pe_node_t *)locations->data)->details == node->details) { /* This child instance is active on the requested node, so check * for a corresponding configured resource. We use find_rsc() * instead of child because child may be a cloned group, and we * need the particular member corresponding to rsc_id. * * If the history entry is orphaned, rsc will be NULL. */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); if (rsc) { /* If there are multiple instance history entries for an * anonymous clone in a single node's history (which can * happen if globally-unique is switched from true to * false), we want to consider the instances beyond the * first as orphans, even if there are inactive instance * numbers available. */ if (rsc->running_on) { crm_notice("Active (now-)anonymous clone %s has " "multiple (orphan) instance histories on %s", parent->id, node->details->uname); skip_inactive = TRUE; rsc = NULL; } else { pe_rsc_trace(parent, "Resource %s, active", rsc->id); } } } g_list_free(locations); } else { pe_rsc_trace(parent, "Resource %s, skip inactive", child->id); if (!skip_inactive && !inactive_instance && is_not_set(child->flags, pe_rsc_block)) { // Remember one inactive instance in case we don't find active inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); /* ... but don't use it if it was already associated with a * pending action on another node */ if (inactive_instance && inactive_instance->pending_node && (inactive_instance->pending_node->details != node->details)) { inactive_instance = NULL; } } } } if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) { pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id); rsc = inactive_instance; } /* If the resource has "requires" set to "quorum" or "nothing", and we don't * have a clone instance for every node, we don't want to consume a valid * instance number for unclean nodes. Such instances may appear to be active * according to the history, but should be considered inactive, so we can * start an instance elsewhere. Treat such instances as orphans. * * An exception is instances running on guest nodes -- since guest node * "fencing" is actually just a resource stop, requires shouldn't apply. * * @TODO Ideally, we'd use an inactive instance number if it is not needed * for any clean instances. However, we don't know that at this point. */ if ((rsc != NULL) && is_not_set(rsc->flags, pe_rsc_needs_fencing) && (!node->details->online || node->details->unclean) && !pe__is_guest_node(node) && !pe__is_universal_clone(parent, data_set)) { rsc = NULL; } if (rsc == NULL) { rsc = create_anonymous_orphan(parent, rsc_id, node, data_set); pe_rsc_trace(parent, "Resource %s, orphan", rsc->id); } return rsc; } static pe_resource_t * unpack_find_resource(pe_working_set_t * data_set, pe_node_t * node, const char *rsc_id, xmlNode * rsc_entry) { pe_resource_t *rsc = NULL; pe_resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { /* If we didn't find the resource by its name in the operation history, * check it again as a clone instance. Even when clone-max=0, we create * a single :0 orphan to match against here. */ char *clone0_id = clone_zero(rsc_id); pe_resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id); if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) { rsc = clone0; parent = uber_parent(clone0); crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id); } else { crm_trace("%s is not known as %s either (orphan)", rsc_id, clone0_id); } free(clone0_id); } else if (rsc->variant > pe_native) { crm_trace("Resource history for %s is orphaned because it is no longer primitive", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (pe_rsc_is_anon_clone(parent)) { if (pe_rsc_is_bundled(parent)) { rsc = pe__find_bundle_replica(parent->parent, node); } else { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(data_set, node, parent, base); free(base); CRM_ASSERT(rsc != NULL); } } if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_casei) && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_casei)) { free(rsc->clone_name); rsc->clone_name = strdup(rsc_id); pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, node->details->uname, rsc->id, (is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : "")); } return rsc; } static pe_resource_t * process_orphan_resource(xmlNode * rsc_entry, pe_node_t * node, pe_working_set_t * data_set) { pe_resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { CRM_CHECK(rsc != NULL, return NULL); pe_rsc_trace(rsc, "Added orphan %s", rsc->id); resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", data_set); } return rsc; } static void process_rsc_state(pe_resource_t * rsc, pe_node_t * node, enum action_fail_response on_fail, xmlNode * migrate_op, pe_working_set_t * data_set) { pe_node_t *tmpnode = NULL; char *reason = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if (rsc->role != RSC_ROLE_UNKNOWN) { pe_resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { pe_node_t *n = pe__copy_node(node); pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name, n->details->uname); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (is_set(iter->flags, pe_rsc_unique)) { break; } iter = iter->parent; } } /* If a managed resource is believed to be running, but node is down ... */ if (rsc->role > RSC_ROLE_STOPPED && node->details->online == FALSE && node->details->maintenance == FALSE && is_set(rsc->flags, pe_rsc_managed)) { gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is * enabled, because guest node fencing is done by recovery of the * container resource rather than by the fencer). Mark the resource * we're processing as failed. When the guest comes back up, its * operation history in the CIB will be cleared, freeing the affected * resource to run again once we are sure we know its state. */ if (pe__is_guest_node(node)) { set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); should_fence = TRUE; } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { if (pe__is_remote_node(node) && node->details->remote_rsc && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) { /* Setting unseen means that fencing of the remote node will * occur only if the connection resource is not going to start * somewhere. This allows connection resources on a failed * cluster node to move to another node without requiring the * remote nodes to be fenced as well. */ node->details->unseen = TRUE; reason = crm_strdup_printf("%s is active there (fencing will be" " revoked if remote connection can " "be re-established elsewhere)", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("%s is thought to be active there", rsc->id); } pe_fence_node(data_set, node, reason, FALSE); } free(reason); } if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch (on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_demote: set_bit(rsc->flags, pe_rsc_failed); demote_action(rsc, node, FALSE); break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ reason = crm_strdup_printf("%s failed there", rsc->id); pe_fence_node(data_set, node, reason, FALSE); free(reason); break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); stop_action(rsc, node, FALSE); } break; case action_fail_restart_container: set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); if (rsc->container && pe_rsc_is_bundled(rsc)) { /* A bundle's remote connection can run on a different node than * the bundle's container. We don't necessarily know where the * container is running yet, so remember it and add a stop * action for it later. */ data_set->stop_needed = g_list_prepend(data_set->stop_needed, rsc->container); } else if (rsc->container) { stop_action(rsc->container, node, FALSE); } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { stop_action(rsc, node, FALSE); } break; case action_fail_reset_remote: set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { tmpnode = NULL; if (rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); } if (tmpnode && pe__is_remote_node(tmpnode) && tmpnode->details->remote_was_fenced == 0) { /* The remote connection resource failed in a way that * should result in fencing the remote node. */ pe_fence_node(data_set, tmpnode, "remote connection is unrecoverable", FALSE); } } /* require the stop action regardless if fencing is occurring or not. */ if (rsc->role > RSC_ROLE_STOPPED) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->remote_reconnect_ms) { rsc->next_role = RSC_ROLE_STOPPED; } break; } /* ensure a remote-node connection failure forces an unclean remote-node * to be fenced. By setting unseen = FALSE, the remote-node failure will * result in a fencing operation regardless if we're going to attempt to * reconnect to the remote-node in this transition or not. */ if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); if (tmpnode && tmpnode->details->unclean) { tmpnode->details->unseen = FALSE; } } if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if (is_set(rsc->flags, pe_rsc_orphan)) { if (is_set(rsc->flags, pe_rsc_managed)) { pcmk__config_warn("Detected active orphan %s running on %s", rsc->id, node->details->uname); } else { pcmk__config_warn("Resource '%s' must be stopped manually on " "%s because cluster is configured not to " "stop active orphans", rsc->id, node->details->uname); } } native_add_running(rsc, node, data_set); switch (on_fail) { case action_fail_ignore: break; case action_fail_demote: case action_fail_block: set_bit(rsc->flags, pe_rsc_failed); break; default: set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); break; } } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { GList *possible_matches = pe__resource_actions(rsc, node, RSC_STOP, FALSE); GListPtr gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *stop = (pe_action_t *) gIter->data; - stop->flags |= pe_action_optional; + pe__set_action_flags(stop, pe_action_optional); } g_list_free(possible_matches); } } /* create active recurring operations as optional */ static void process_recurring(pe_node_t * node, pe_resource_t * rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t * data_set) { int counter = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; char *key = NULL; const char *id = ID(rsc_op); counter++; if (node->details->online == FALSE) { pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname); continue; } else if (counter < start_index) { pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter); continue; } crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if (interval_ms == 0) { pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (pcmk__str_eq(status, "-1", pcmk__str_casei)) { pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = pcmk__op_key(rsc->id, task, interval_ms); pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); } } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_clone_start = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; *stop_index = -1; *start_index = -1; for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei) && pcmk__str_eq(status, "0", pcmk__str_casei)) { *stop_index = counter; } else if (pcmk__strcase_any_of(task, CRMD_ACTION_START, CRMD_ACTION_MIGRATED, NULL)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if (pcmk__strcase_any_of(rc, "0", "8", NULL)) { implied_monitor_start = counter; } } else if (pcmk__strcase_any_of(task, CRMD_ACTION_PROMOTE, CRMD_ACTION_DEMOTE, NULL)) { implied_clone_start = counter; } } if (*start_index == -1) { if (implied_clone_start != -1) { *start_index = implied_clone_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } // If resource history entry has shutdown lock, remember lock node and time static void unpack_shutdown_lock(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { time_t lock_time = 0; // When lock started (i.e. node shutdown time) if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK, &lock_time) == pcmk_ok) && (lock_time != 0)) { if ((data_set->shutdown_lock > 0) && (get_effective_time(data_set) > (lock_time + data_set->shutdown_lock))) { pe_rsc_info(rsc, "Shutdown lock for %s on %s expired", rsc->id, node->details->uname); pe__clear_resource_history(rsc, node, data_set); } else { rsc->lock_node = node; rsc->lock_time = lock_time; } } } static pe_resource_t * unpack_lrm_rsc_state(pe_node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) { GListPtr gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *task = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); pe_resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum action_fail_response on_fail = action_fail_ignore; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_trace("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { op_list = g_list_prepend(op_list, rsc_op); } } if (is_not_set(data_set->flags, pe_flag_shutdown_lock)) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if (rsc == NULL) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } else { rsc = process_orphan_resource(rsc_entry, node, data_set); } } CRM_ASSERT(rsc != NULL); // Check whether the resource is "shutdown-locked" to this node if (is_set(data_set->flags, pe_flag_shutdown_lock)) { unpack_shutdown_lock(rsc_entry, rsc, node, data_set); } /* process operations */ saved_role = rsc->role; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); if (get_target_role(rsc, &req_role)) { if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); rsc->next_role = req_role; } else if (req_role > rsc->next_role) { pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } return rsc; } static void handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { pe_resource_t *rsc; pe_resource_t *container; const char *rsc_id; const char *container_id; if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_casei)) { continue; } container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER); rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); if (container_id == NULL || rsc_id == NULL) { continue; } container = pe_find_resource(data_set->resources, container_id); if (container == NULL) { continue; } rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL || is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE || rsc->container != NULL) { continue; } pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s", rsc->id, container_id); rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); } } static void unpack_lrm_resources(pe_node_t *node, xmlNode *lrm_rsc_list, pe_working_set_t *data_set) { xmlNode *rsc_entry = NULL; gboolean found_orphaned_container_filler = FALSE; for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { if (pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { pe_resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set); if (!rsc) { continue; } if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) { found_orphaned_container_filler = TRUE; } } } /* now that all the resource state has been unpacked for this node * we have to go back and map any orphaned container fillers to their * container resource */ if (found_orphaned_container_filler) { handle_orphaned_container_fillers(lrm_rsc_list, data_set); } } static void set_active(pe_resource_t * rsc) { pe_resource_t *top = uber_parent(rsc); if (top && is_set(top->flags, pe_rsc_promotable)) { rsc->role = RSC_ROLE_SLAVE; } else { rsc->role = RSC_ROLE_STARTED; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { pe_node_t *node = value; int *score = user_data; node->weight = *score; } #define STATUS_PATH_MAX 1024 static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, bool success_only, pe_working_set_t *data_set) { int offset = 0; char xpath[STATUS_PATH_MAX]; xmlNode *xml = NULL; offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node); offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']", resource); /* Need to check against transition_magic too? */ if (source && pcmk__str_eq(op, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op, source); } else if (source && pcmk__str_eq(op, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op, source); } else { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op); } CRM_LOG_ASSERT(offset > 0); xml = get_xpath_object(xpath, data_set->input, LOG_DEBUG); if (xml && success_only) { int rc = PCMK_OCF_UNKNOWN_ERROR; int status = PCMK_LRM_OP_ERROR; crm_element_value_int(xml, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS, &status); if ((rc != PCMK_OCF_OK) || (status != PCMK_LRM_OP_DONE)) { return NULL; } } return xml; } static int pe__call_id(xmlNode *op_xml) { int id = 0; if (op_xml) { crm_element_value_int(op_xml, XML_LRM_ATTR_CALLID, &id); } return id; } /*! * \brief Check whether a stop happened on the same node after some event * * \param[in] rsc Resource being checked * \param[in] node Node being checked * \param[in] xml_op Event that stop is being compared to * \param[in] data_set Cluster working set * * \return TRUE if stop happened after event, FALSE otherwise * * \note This is really unnecessary, but kept as a safety mechanism. We * currently don't save more than one successful event in history, so this * only matters when processing really old CIB files that we don't * technically support anymore, or as preparation for logging an extended * history in the future. */ static bool stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->uname, NULL, TRUE, data_set); return (stop_op && (pe__call_id(stop_op) > pe__call_id(xml_op))); } static void unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { /* A successful migration sequence is: * migrate_to on source node * migrate_from on target node * stop on source node * * If a migrate_to is followed by a stop, the entire migration (successful * or failed) is complete, and we don't care what happened on the target. * * If no migrate_from has happened, the migration is considered to be * "partial". If the migrate_from failed, make sure the resource gets * stopped on both source and target (if up). * * If the migrate_to and migrate_from both succeeded (which also implies the * resource is no longer running on the source), but there is no stop, the * migration is considered to be "dangling". Schedule a stop on the source * in this case. */ int from_rc = 0; int from_status = 0; pe_node_t *target_node = NULL; pe_node_t *source_node = NULL; xmlNode *migrate_from = NULL; const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); // Sanity check CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); if (stop_happened_after(rsc, node, xml_op, data_set)) { return; } // Clones are not allowed to migrate, so role can't be master rsc->role = RSC_ROLE_STARTED; target_node = pe_find_node(data_set->nodes, target); source_node = pe_find_node(data_set->nodes, source); // Check whether there was a migrate_from action on the target migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, source, FALSE, data_set); if (migrate_from) { crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", ID(migrate_from), target, from_status, from_rc); } if (migrate_from && from_rc == PCMK_OCF_OK && from_status == PCMK_LRM_OP_DONE) { /* The migrate_to and migrate_from both succeeded, so mark the migration * as "dangling". This will be used to schedule a stop action on the * source without affecting the target. */ pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), source); rsc->role = RSC_ROLE_STOPPED; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed if (target_node && target_node->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, target_node->details->online); native_add_running(rsc, target_node, data_set); } } else { // Pending, or complete but erased if (target_node && target_node->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, target_node->details->online); native_add_running(rsc, target_node, data_set); if (source_node && source_node->details->online) { /* This is a partial migration: the migrate_to completed * successfully on the source, but the migrate_from has not * completed. Remember the source and target; if the newly * chosen target remains the same when we schedule actions * later, we may continue with the migration. */ rsc->partial_migration_target = target_node; rsc->partial_migration_source = source_node; } } else { /* Consider it failed here - forces a restart, prevents migration */ set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); clear_bit(rsc->flags, pe_rsc_allow_migrate); } } } // Is there an action_name in node_name's rsc history newer than call_id? static bool newer_op(pe_resource_t *rsc, const char *action_name, const char *node_name, int call_id, pe_working_set_t *data_set) { xmlNode *action = find_lrm_op(rsc->id, action_name, node_name, NULL, TRUE, data_set); return pe__call_id(action) > call_id; } static void unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { int target_stop_id = 0; int target_migrate_from_id = 0; xmlNode *target_stop = NULL; xmlNode *target_migrate_from = NULL; const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); // Sanity check CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be master. */ rsc->role = RSC_ROLE_STARTED; // Check for stop on the target target_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, target, NULL, TRUE, data_set); target_stop_id = pe__call_id(target_stop); // Check for migrate_from on the target target_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, source, TRUE, data_set); target_migrate_from_id = pe__call_id(target_migrate_from); if ((target_stop == NULL) || (target_stop_id < target_migrate_from_id)) { /* There was no stop on the target, or a stop that happened before a * migrate_from, so assume the resource is still active on the target * (if it is up). */ pe_node_t *target_node = pe_find_node(data_set->nodes, target); pe_rsc_trace(rsc, "stop (%d) + migrate_from (%d)", target_stop_id, target_migrate_from_id); if (target_node && target_node->details->online) { native_add_running(rsc, target_node, data_set); } } else if (target_migrate_from == NULL) { /* We know there was a stop on the target, but there may not have been a * migrate_from (the stop could have happened before migrate_from was * scheduled or attempted). * * That means this could be a "dangling" migration. But first, check * whether there is a newer successful stop, start, or migrate_from on * the source node -- it's possible the failed migration was followed by * a successful stop, full restart, or migration in the reverse * direction, in which case we don't want to force a stop. */ int source_migrate_to_id = pe__call_id(xml_op); if (newer_op(rsc, CRMD_ACTION_MIGRATED, source, source_migrate_to_id, data_set) || newer_op(rsc, CRMD_ACTION_START, source, source_migrate_to_id, data_set) || newer_op(rsc, CRMD_ACTION_STOP, source, source_migrate_to_id, data_set)) { return; } // Mark node as having dangling migration so we can force a stop later rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } } static void unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { xmlNode *source_stop = NULL; xmlNode *source_migrate_to = NULL; const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); // Sanity check CRM_CHECK(source && target && !strcmp(target, node->details->uname), return); /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be master. */ rsc->role = RSC_ROLE_STARTED; // Check for a stop on the source source_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, source, NULL, TRUE, data_set); // Check for a migrate_to on the source source_migrate_to = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, source, target, TRUE, data_set); if ((source_stop == NULL) || (pe__call_id(source_stop) < pe__call_id(source_migrate_to))) { /* There was no stop on the source, or a stop that happened before * migrate_to, so assume the resource is still active on the source (if * it is up). */ pe_node_t *source_node = pe_find_node(data_set->nodes, source); if (source_node && source_node->details->online) { native_add_running(rsc, source_node, data_set); } } } static void record_failed_op(xmlNode *op, const pe_node_t *node, const pe_resource_t *rsc, pe_working_set_t *data_set) { xmlNode *xIter = NULL; const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY); if (node->details->online == FALSE) { return; } for (xIter = data_set->failed->children; xIter; xIter = xIter->next) { const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY); const char *uname = crm_element_value(xIter, XML_ATTR_UNAME); if(pcmk__str_eq(op_key, key, pcmk__str_casei) && pcmk__str_eq(uname, node->details->uname, pcmk__str_casei)) { crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname); return; } } crm_trace("Adding entry %s on %s", op_key, node->details->uname); crm_xml_add(op, XML_ATTR_UNAME, node->details->uname); crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id); add_node_copy(data_set->failed, op); } static const char *get_op_key(xmlNode *xml_op) { const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); if(key == NULL) { key = ID(xml_op); } return key; } static const char * last_change_str(xmlNode *xml_op) { time_t when; const char *when_s = NULL; if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &when) == pcmk_ok) { when_s = pcmk__epoch2str(&when); if (when_s) { // Skip day of week to make message shorter when_s = strchr(when_s, ' '); if (when_s) { ++when_s; } } } return ((when_s && *when_s)? when_s : "unknown time"); } /*! * \internal * \brief Compare two on-fail values * * \param[in] first One on-fail value to compare * \param[in] second The other on-fail value to compare * * \return A negative number if second is more severe than first, zero if they * are equal, or a positive number if first is more severe than second. * \note This is only needed until the action_fail_response values can be * renumbered at the next API compatibility break. */ static int cmp_on_fail(enum action_fail_response first, enum action_fail_response second) { switch (first) { case action_fail_demote: switch (second) { case action_fail_ignore: return 1; case action_fail_demote: return 0; default: return -1; } break; case action_fail_reset_remote: switch (second) { case action_fail_ignore: case action_fail_demote: case action_fail_recover: return 1; case action_fail_reset_remote: return 0; default: return -1; } break; case action_fail_restart_container: switch (second) { case action_fail_ignore: case action_fail_demote: case action_fail_recover: case action_fail_reset_remote: return 1; case action_fail_restart_container: return 0; default: return -1; } break; default: break; } switch (second) { case action_fail_demote: return (first == action_fail_ignore)? -1 : 1; case action_fail_reset_remote: switch (first) { case action_fail_ignore: case action_fail_demote: case action_fail_recover: return -1; default: return 1; } break; case action_fail_restart_container: switch (first) { case action_fail_ignore: case action_fail_demote: case action_fail_recover: case action_fail_reset_remote: return -1; default: return 1; } break; default: break; } return first - second; } static void unpack_rsc_op_failure(pe_resource_t * rsc, pe_node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { guint interval_ms = 0; bool is_probe = false; pe_action_t *action = NULL; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); CRM_ASSERT(rsc); CRM_CHECK(task != NULL, return); *last_failure = xml_op; crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) { is_probe = true; } if (exit_reason == NULL) { exit_reason = ""; } if (is_not_set(data_set->flags, pe_flag_symmetric_cluster) && (rc == PCMK_OCF_NOT_INSTALLED)) { crm_trace("Unexpected result (%s%s%s) was recorded for " "%s of %s on %s at %s " CRM_XS " rc=%d id=%s", services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, (is_probe? "probe" : task), rsc->id, node->details->uname, last_change_str(xml_op), rc, ID(xml_op)); } else { crm_warn("Unexpected result (%s%s%s) was recorded for " "%s of %s on %s at %s " CRM_XS " rc=%d id=%s", services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, (is_probe? "probe" : task), rsc->id, node->details->uname, last_change_str(xml_op), rc, ID(xml_op)); if (is_probe && (rc != PCMK_OCF_OK) && (rc != PCMK_OCF_NOT_RUNNING) && (rc != PCMK_OCF_RUNNING_MASTER)) { /* A failed (not just unexpected) probe result could mean the user * didn't know resources will be probed even where they can't run. */ crm_notice("If it is not possible for %s to run on %s, see " "the resource-discovery option for location constraints", rsc->id, node->details->uname); } record_failed_op(xml_op, node, rsc, data_set); } action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); if (cmp_on_fail(*on_fail, action->on_fail) < 0) { pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail), fail2text(action->on_fail), action->uuid, key); *on_fail = action->on_fail; } if (!strcmp(task, CRMD_ACTION_STOP)) { resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set); } else if (!strcmp(task, CRMD_ACTION_MIGRATE)) { unpack_migrate_to_failure(rsc, node, xml_op, data_set); } else if (!strcmp(task, CRMD_ACTION_MIGRATED)) { unpack_migrate_from_failure(rsc, node, xml_op, data_set); } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (!strcmp(task, CRMD_ACTION_DEMOTE)) { if (action->on_fail == action_fail_block) { rsc->role = RSC_ROLE_MASTER; rsc->next_role = RSC_ROLE_STOPPED; } else if(rc == PCMK_OCF_NOT_RUNNING) { rsc->role = RSC_ROLE_STOPPED; } else { /* Staying in master role would put the scheduler and controller * into a loop. Setting slave role is not dangerous because the * resource will be stopped as part of recovery, and any master * promotion will be ordered after that stop. */ rsc->role = RSC_ROLE_SLAVE; } } if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) { /* leave stopped */ pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id); rsc->role = RSC_ROLE_STOPPED; } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "Setting %s active", rsc->id); set_active(rsc); } pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean ? "true" : "false", fail2text(action->on_fail), role2text(action->fail_role)); if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if (action->fail_role == RSC_ROLE_STOPPED) { int score = -INFINITY; pe_resource_t *fail_rsc = rsc; if (fail_rsc->parent) { pe_resource_t *parent = uber_parent(fail_rsc); if (pe_rsc_is_clone(parent) && is_not_set(parent->flags, pe_rsc_unique)) { /* For clone resources, if a child fails on an operation * with on-fail = stop, all the resources fail. Do this by preventing * the parent from coming up again. */ fail_rsc = parent; } } crm_notice("%s will not be started under current conditions", fail_rsc->id); /* make sure it doesn't come up again */ if (fail_rsc->allowed_nodes != NULL) { g_hash_table_destroy(fail_rsc->allowed_nodes); } fail_rsc->allowed_nodes = pe__node_list2table(data_set->nodes); g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); } pe_free_action(action); } /*! * \internal * \brief Remap operation status based on action result * * Given an action result, determine an appropriate operation status for the * purposes of responding to the action (the status provided by the executor is * not directly usable since the executor does not know what was expected). * * \param[in,out] rsc Resource that operation history entry is for * \param[in] rc Actual return code of operation * \param[in] target_rc Expected return code of operation * \param[in] node Node where operation was executed * \param[in] xml_op Operation history entry XML from CIB status * \param[in,out] on_fail What should be done about the result * \param[in] data_set Current cluster working set * * \return Operation status based on return code and action info * \note This may update the resource's current and next role. */ static int determine_op_status( pe_resource_t *rsc, int rc, int target_rc, pe_node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) { guint interval_ms = 0; bool is_probe = false; int result = PCMK_LRM_OP_DONE; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); CRM_ASSERT(rsc); CRM_CHECK(task != NULL, return PCMK_LRM_OP_ERROR); if (exit_reason == NULL) { exit_reason = ""; } crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) { is_probe = true; task = "probe"; } if (target_rc < 0) { /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the * target_rc in the transition key, which (along with the similar case * of a corrupted transition key in the CIB) will be reported to this * function as -1. Pacemaker 2.0+ does not support rolling upgrades from * those versions or processing of saved CIB files from those versions, * so we do not need to care much about this case. */ result = PCMK_LRM_OP_ERROR; crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)", key, node->details->uname); } else if (target_rc != rc) { result = PCMK_LRM_OP_ERROR; pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", key, node->details->uname, target_rc, services_ocf_exitcode_str(target_rc), rc, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason); } switch (rc) { case PCMK_OCF_OK: if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Probe found %s active on %s at %s", rsc->id, node->details->uname, last_change_str(xml_op)); } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) { result = PCMK_LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } break; case PCMK_OCF_RUNNING_MASTER: if (is_probe && (rc != target_rc)) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Probe found %s active and promoted on %s at %s", rsc->id, node->details->uname, last_change_str(xml_op)); } rsc->role = RSC_ROLE_MASTER; break; case PCMK_OCF_DEGRADED_MASTER: case PCMK_OCF_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; result = PCMK_LRM_OP_ERROR; break; case PCMK_OCF_NOT_CONFIGURED: result = PCMK_LRM_OP_ERROR_FATAL; break; case PCMK_OCF_UNIMPLEMENT_FEATURE: if (interval_ms > 0) { result = PCMK_LRM_OP_NOTSUPPORTED; break; } // fall through case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: if (!pe_can_fence(data_set, node) && !strcmp(task, CRMD_ACTION_STOP)) { /* If a stop fails and we can't fence, there's nothing else we can do */ pe_proc_err("No further recovery can be attempted for %s " "because %s on %s failed (%s%s%s) at %s " CRM_XS " rc=%d id=%s", rsc->id, task, node->details->uname, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, last_change_str(xml_op), rc, ID(xml_op)); clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); } result = PCMK_LRM_OP_ERROR_HARD; break; default: if (result == PCMK_LRM_OP_DONE) { crm_info("Treating unknown exit status %d from %s of %s " "on %s at %s as failure", rc, task, rsc->id, node->details->uname, last_change_str(xml_op)); result = PCMK_LRM_OP_ERROR; } break; } return result; } // return TRUE if start or monitor last failure but parameters changed static bool should_clear_for_param_change(xmlNode *xml_op, const char *task, pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { if (!strcmp(task, "start") || !strcmp(task, "monitor")) { if (pe__bundle_needs_remote_name(rsc)) { /* We haven't allocated resources yet, so we can't reliably * substitute addr parameters for the REMOTE_CONTAINER_HACK. * When that's needed, defer the check until later. */ pe__add_param_check(xml_op, rsc, node, pe_check_last_failure, data_set); } else { op_digest_cache_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set); switch (digest_data->rc) { case RSC_DIGEST_UNKNOWN: crm_trace("Resource %s history entry %s on %s" " has no digest to compare", rsc->id, get_op_key(xml_op), node->details->id); break; case RSC_DIGEST_MATCH: break; default: return TRUE; } } } return FALSE; } // Order action after fencing of remote node, given connection rsc static void order_after_remote_fencing(pe_action_t *action, pe_resource_t *remote_conn, pe_working_set_t *data_set) { pe_node_t *remote_node = pe_find_node(data_set->nodes, remote_conn->id); if (remote_node) { pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, FALSE, data_set); order_actions(fence, action, pe_order_implies_then); } } static bool should_ignore_failure_timeout(pe_resource_t *rsc, xmlNode *xml_op, const char *task, guint interval_ms, bool is_last_failure, pe_working_set_t *data_set) { /* Clearing failures of recurring monitors has special concerns. The * executor reports only changes in the monitor result, so if the * monitor is still active and still getting the same failure result, * that will go undetected after the failure is cleared. * * Also, the operation history will have the time when the recurring * monitor result changed to the given code, not the time when the * result last happened. * * @TODO We probably should clear such failures only when the failure * timeout has passed since the last occurrence of the failed result. * However we don't record that information. We could maybe approximate * that by clearing only if there is a more recent successful monitor or * stop result, but we don't even have that information at this point * since we are still unpacking the resource's operation history. * * This is especially important for remote connection resources with a * reconnect interval, so in that case, we skip clearing failures * if the remote node hasn't been fenced. */ if (rsc->remote_reconnect_ms && is_set(data_set->flags, pe_flag_stonith_enabled) && (interval_ms != 0) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); if (remote_node && !remote_node->details->remote_was_fenced) { if (is_last_failure) { crm_info("Waiting to clear monitor failure for remote node %s" " until fencing has occurred", rsc->id); } return TRUE; } } return FALSE; } /*! * \internal * \brief Check operation age and schedule failure clearing when appropriate * * This function has two distinct purposes. The first is to check whether an * operation history entry is expired (i.e. the resource has a failure timeout, * the entry is older than the timeout, and the resource either has no fail * count or its fail count is entirely older than the timeout). The second is to * schedule fail count clearing when appropriate (i.e. the operation is expired * and either the resource has an expired fail count or the operation is a * last_failure for a remote connection resource with a reconnect interval, * or the operation is a last_failure for a start or monitor operation and the * resource's parameters have changed since the operation). * * \param[in] rsc Resource that operation happened to * \param[in] node Node that operation happened on * \param[in] rc Actual result of operation * \param[in] xml_op Operation history entry XML * \param[in] data_set Current working set * * \return TRUE if operation history entry is expired, FALSE otherwise */ static bool check_operation_expiry(pe_resource_t *rsc, pe_node_t *node, int rc, xmlNode *xml_op, pe_working_set_t *data_set) { bool expired = FALSE; bool is_last_failure = pcmk__ends_with(ID(xml_op), "_last_failure_0"); time_t last_run = 0; guint interval_ms = 0; int unexpired_fail_count = 0; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *clear_reason = NULL; crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((rsc->failure_timeout > 0) && (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0)) { // Resource has a failure-timeout, and history entry has a timestamp time_t now = get_effective_time(data_set); time_t last_failure = 0; // Is this particular operation history older than the failure timeout? if ((now >= (last_run + rsc->failure_timeout)) && !should_ignore_failure_timeout(rsc, xml_op, task, interval_ms, is_last_failure, data_set)) { expired = TRUE; } // Does the resource as a whole have an unexpired fail count? unexpired_fail_count = pe_get_failcount(node, rsc, &last_failure, pe_fc_effective, xml_op, data_set); // Update scheduler recheck time according to *last* failure crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds" " last-failure@%lld", ID(xml_op), (long long) last_run, (expired? "" : "not "), (long long) now, unexpired_fail_count, rsc->failure_timeout, (long long) last_failure); last_failure += rsc->failure_timeout + 1; if (unexpired_fail_count && (now < last_failure)) { pe__update_recheck_time(last_failure, data_set); } } if (expired) { if (pe_get_failcount(node, rsc, NULL, pe_fc_default, xml_op, data_set)) { // There is a fail count ignoring timeout if (unexpired_fail_count == 0) { // There is no fail count considering timeout clear_reason = "it expired"; } else { /* This operation is old, but there is an unexpired fail count. * In a properly functioning cluster, this should only be * possible if this operation is not a failure (otherwise the * fail count should be expired too), so this is really just a * failsafe. */ expired = FALSE; } } else if (is_last_failure && rsc->remote_reconnect_ms) { /* Clear any expired last failure when reconnect interval is set, * even if there is no fail count. */ clear_reason = "reconnect interval is set"; } } if (!expired && is_last_failure && should_clear_for_param_change(xml_op, task, rsc, node, data_set)) { clear_reason = "resource parameters have changed"; } if (clear_reason != NULL) { // Schedule clearing of the fail count pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason, data_set); if (is_set(data_set->flags, pe_flag_stonith_enabled) && rsc->remote_reconnect_ms) { /* If we're clearing a remote connection due to a reconnect * interval, we want to wait until any scheduled fencing * completes. * * We could limit this to remote_node->details->unclean, but at * this point, that's always true (it won't be reliable until * after unpack_node_loop() is done). */ crm_info("Clearing %s failure will wait until any scheduled " "fencing of %s completes", task, rsc->id); order_after_remote_fencing(clear_op, rsc, data_set); } } if (expired && (interval_ms == 0) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { switch(rc) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: // Don't expire probes that return these values expired = FALSE; break; } } return expired; } int pe__target_rc_from_xml(xmlNode *xml_op) { int target_rc = 0; const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, NULL, NULL, NULL, &target_rc); return target_rc; } static enum action_fail_response get_action_on_fail(pe_resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) { enum action_fail_response result = action_fail_recover; pe_action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); result = action->on_fail; pe_free_action(action); return result; } static void update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, const char * task, int rc, xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { gboolean clear_past_failure = FALSE; CRM_ASSERT(rsc); CRM_ASSERT(xml_op); if (rc == PCMK_OCF_NOT_RUNNING) { clear_past_failure = TRUE; } else if (rc == PCMK_OCF_NOT_INSTALLED) { rsc->role = RSC_ROLE_STOPPED; } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { if (last_failure) { const char *op_key = get_op_key(xml_op); const char *last_failure_key = get_op_key(last_failure); if (pcmk__str_eq(op_key, last_failure_key, pcmk__str_casei)) { clear_past_failure = TRUE; } } if (rsc->role < RSC_ROLE_STARTED) { set_active(rsc); } } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) { rsc->role = RSC_ROLE_STOPPED; clear_past_failure = TRUE; } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) { rsc->role = RSC_ROLE_MASTER; clear_past_failure = TRUE; } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) { if (*on_fail == action_fail_demote) { // Demote clears an error only if on-fail=demote clear_past_failure = TRUE; } rsc->role = RSC_ROLE_SLAVE; } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { unpack_migrate_to_success(rsc, node, xml_op, data_set); } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname); set_active(rsc); } /* clear any previous failure actions */ if (clear_past_failure) { switch (*on_fail) { case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_block: case action_fail_ignore: case action_fail_demote: case action_fail_recover: case action_fail_restart_container: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; break; case action_fail_reset_remote: if (rsc->remote_reconnect_ms == 0) { /* With no reconnect interval, the connection is allowed to * start again after the remote node is fenced and * completely stopped. (With a reconnect interval, we wait * for the failure to be cleared entirely before attempting * to reconnect.) */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } break; } } } /*! * \internal * \brief Remap informational monitor results to usual values * * Certain OCF result codes are for providing extended information to the * user about services that aren't yet failed but not entirely healthy either. * These must be treated as the "normal" result by pacemaker. * * \param[in] rc Actual result of a monitor action * \param[in] xml_op Operation history XML * \param[in] node Node that operation happened on * \param[in] rsc Resource that operation happened to * \param[in] data_set Cluster working set * * \return Result code that pacemaker should use * * \note If the result is remapped, and the node is not shutting down or failed, * the operation will be recorded in the data set's list of failed * operations, to highlight it for the user. */ static int remap_monitor_rc(int rc, xmlNode *xml_op, const pe_node_t *node, const pe_resource_t *rsc, pe_working_set_t *data_set) { int remapped_rc = rc; switch (rc) { case PCMK_OCF_DEGRADED: remapped_rc = PCMK_OCF_OK; break; case PCMK_OCF_DEGRADED_MASTER: remapped_rc = PCMK_OCF_RUNNING_MASTER; break; default: break; } if (rc != remapped_rc) { crm_trace("Remapping monitor result %d to %d", rc, remapped_rc); if (!node->details->shutdown || node->details->online) { record_failed_op(xml_op, node, rsc, data_set); } } return remapped_rc; } static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *on_fail, pe_working_set_t *data_set) { int rc = 0; int task_id = 0; int target_rc = 0; int status = PCMK_LRM_OP_UNKNOWN; guint interval_ms = 0; const char *task = NULL; const char *task_key = NULL; const char *exit_reason = NULL; bool expired = FALSE; pe_resource_t *parent = rsc; enum action_fail_response failure_strategy = action_fail_recover; CRM_CHECK(rsc && node && xml_op, return); target_rc = pe__target_rc_from_xml(xml_op); task_key = get_op_key(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); if (exit_reason == NULL) { exit_reason = ""; } crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); CRM_CHECK(task != NULL, return); CRM_CHECK(status <= PCMK_LRM_OP_INVALID, return); CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return); if (!strcmp(task, CRMD_ACTION_NOTIFY) || !strcmp(task, CRMD_ACTION_METADATA)) { /* safe to ignore these */ return; } if (is_not_set(rsc->flags, pe_rsc_unique)) { parent = uber_parent(rsc); } pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role)); if (node->details->unclean) { pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribute", node->details->uname, rsc->id); } /* It should be possible to call remap_monitor_rc() first then call * check_operation_expiry() only if rc != target_rc, because there should * never be a fail count without at least one unexpected result in the * resource history. That would be more efficient by avoiding having to call * check_operation_expiry() for expected results. * * However, we do have such configurations in the scheduler regression * tests, even if it shouldn't be possible with the current code. It's * probably a good idea anyway, but that would require updating the test * inputs to something currently possible. */ if ((status != PCMK_LRM_OP_NOT_INSTALLED) && check_operation_expiry(rsc, node, rc, xml_op, data_set)) { expired = TRUE; } if (!strcmp(task, CRMD_ACTION_STATUS)) { rc = remap_monitor_rc(rc, xml_op, node, rsc, data_set); } if (expired && (rc != target_rc)) { const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); if (interval_ms == 0) { crm_notice("Ignoring expired %s failure on %s " CRM_XS " actual=%d expected=%d magic=%s", task_key, node->details->uname, rc, target_rc, magic); goto done; } else if(node->details->online && node->details->unclean == FALSE) { /* Reschedule the recurring monitor. CancelXmlOp() won't work at * this stage, so as a hacky workaround, forcibly change the restart * digest so check_action_definition() does what we want later. * * @TODO We should skip this if there is a newer successful monitor. * Also, this causes rescheduling only if the history entry * has an op-digest (which the expire-non-blocked-failure * scheduler regression test doesn't, but that may not be a * realistic scenario in production). */ crm_notice("Rescheduling %s after failure expired on %s " CRM_XS " actual=%d expected=%d magic=%s", task_key, node->details->uname, rc, target_rc, magic); crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); goto done; } } /* If the executor reported an operation status of anything but done or * error, consider that final. But for done or error, we know better whether * it should be treated as a failure or not, because we know the expected * result. */ if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) { status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set); pe_rsc_trace(rsc, "Remapped %s status to %d", task_key, status); } switch (status) { case PCMK_LRM_OP_CANCELLED: // Should never happen pe_err("Resource history contains cancellation '%s' " "(%s of %s on %s at %s)", ID(xml_op), task, rsc->id, node->details->uname, last_change_str(xml_op)); break; case PCMK_LRM_OP_PENDING: if (!strcmp(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); set_active(rsc); } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (!strcmp(task, CRMD_ACTION_MIGRATE) && node->details->unclean) { /* If a pending migrate_to action is out on a unclean node, * we have to force the stop action on the target. */ const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); pe_node_t *target = pe_find_node(data_set->nodes, migrate_target); if (target) { stop_action(rsc, target, FALSE); } } if (rsc->pending_task == NULL) { if ((interval_ms != 0) || strcmp(task, CRMD_ACTION_STATUS)) { rsc->pending_task = strdup(task); rsc->pending_node = node; } else { /* Pending probes are not printed, even if pending * operations are requested. If someone ever requests that * behavior, enable the below and the corresponding part of * native.c:native_pending_task(). */ #if 0 rsc->pending_task = strdup("probe"); rsc->pending_node = node; #endif } } break; case PCMK_LRM_OP_DONE: pe_rsc_trace(rsc, "%s of %s on %s completed at %s " CRM_XS " id=%s", task, rsc->id, node->details->uname, last_change_str(xml_op), ID(xml_op)); update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); break; case PCMK_LRM_OP_NOT_INSTALLED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if (failure_strategy == action_fail_ignore) { crm_warn("Cannot ignore failed %s of %s on %s: " "Resource agent doesn't exist " CRM_XS " status=%d rc=%d id=%s", task, rsc->id, node->details->uname, status, rc, ID(xml_op)); /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */ *on_fail = action_fail_migrate; } resource_location(parent, node, -INFINITY, "hard-error", data_set); unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); break; case PCMK_LRM_OP_NOT_CONNECTED: if (pe__is_guest_or_remote_node(node) && is_set(node->details->remote_rsc->flags, pe_rsc_managed)) { /* We should never get into a situation where a managed remote * connection resource is considered OK but a resource action * behind the connection gets a "not connected" status. But as a * fail-safe in case a bug or unusual circumstances do lead to * that, ensure the remote connection is considered failed. */ set_bit(node->details->remote_rsc->flags, pe_rsc_failed); set_bit(node->details->remote_rsc->flags, pe_rsc_stop); } // fall through case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_ERROR_HARD: case PCMK_LRM_OP_ERROR_FATAL: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_NOTSUPPORTED: case PCMK_LRM_OP_INVALID: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if ((failure_strategy == action_fail_ignore) || (failure_strategy == action_fail_restart_container && !strcmp(task, CRMD_ACTION_STOP))) { crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s " "succeeded " CRM_XS " rc=%d id=%s", task, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, rsc->id, node->details->uname, last_change_str(xml_op), rc, ID(xml_op)); update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); set_bit(rsc->flags, pe_rsc_failure_ignored); record_failed_op(xml_op, node, rsc, data_set); if ((failure_strategy == action_fail_restart_container) && cmp_on_fail(*on_fail, action_fail_recover) <= 0) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); if(status == PCMK_LRM_OP_ERROR_HARD) { do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE, "Preventing %s from restarting on %s because " "of hard failure (%s%s%s)" CRM_XS " rc=%d id=%s", parent->id, node->details->uname, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, rc, ID(xml_op)); resource_location(parent, node, -INFINITY, "hard-error", data_set); } else if(status == PCMK_LRM_OP_ERROR_FATAL) { crm_err("Preventing %s from restarting anywhere because " "of fatal failure (%s%s%s) " CRM_XS " rc=%d id=%s", parent->id, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, rc, ID(xml_op)); resource_location(parent, NULL, -INFINITY, "fatal-error", data_set); } } break; } done: pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", rsc->id, task, role2text(rsc->role), role2text(rsc->next_role)); } static void add_node_attrs(xmlNode *xml_obj, pe_node_t *node, bool overwrite, pe_working_set_t *data_set) { const char *cluster_name = NULL; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_UNAME), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID), strdup(node->details->id)); if (pcmk__str_eq(node->details->id, data_set->dc_uuid, pcmk__str_casei)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE)); } cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name"); if (cluster_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME), strdup(cluster_name)); } pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, &rule_data, node->details->attrs, NULL, overwrite, data_set); if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) { const char *site_name = pe_node_attribute_raw(node, "site-name"); if (site_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(site_name)); } else if (cluster_name) { /* Default to cluster-name if unset */ g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(cluster_name)); } } } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GListPtr gIter = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); pe_node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = __xml_first_child_element(status); node_state != NULL; node_state = __xml_next_element(node_state)) { if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { const char *uname = crm_element_value(node_state, XML_ATTR_UNAME); if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) { continue; } this_node = pe_find_node(data_set->nodes, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (pe__is_guest_or_remote_node(this_node)) { determine_remote_online_status(data_set, this_node); } else { determine_online_status(node_state, this_node, data_set); } if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = __xml_first_child_element(tmp); lrm_rsc != NULL; lrm_rsc = __xml_next_element(lrm_rsc)) { if (pcmk__str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; } diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 2fbcb3bd92..a385302868 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,2844 +1,2843 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); static void unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container, pe_working_set_t * data_set, guint interval_ms); static xmlNode *find_rsc_op_entry_helper(pe_resource_t * rsc, const char *key, gboolean include_disabled); #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t * pe_rsc_action_details(pe_action_t *action) { pe_rsc_action_details_t *details; CRM_CHECK(action != NULL, return NULL); if (action->action_details == NULL) { action->action_details = calloc(1, sizeof(pe_rsc_action_details_t)); CRM_CHECK(action->action_details != NULL, return NULL); } details = (pe_rsc_action_details_t *) action->action_details; if (details->versioned_parameters == NULL) { details->versioned_parameters = create_xml_node(NULL, XML_TAG_OP_VER_ATTRS); } if (details->versioned_meta == NULL) { details->versioned_meta = create_xml_node(NULL, XML_TAG_OP_VER_META); } return details; } static void pe_free_rsc_action_details(pe_action_t *action) { pe_rsc_action_details_t *details; if ((action == NULL) || (action->action_details == NULL)) { return; } details = (pe_rsc_action_details_t *) action->action_details; if (details->versioned_parameters) { free_xml(details->versioned_parameters); } if (details->versioned_meta) { free_xml(details->versioned_meta); } action->action_details = NULL; } #endif /*! * \internal * \brief Check whether we can fence a particular node * * \param[in] data_set Working set for cluster * \param[in] node Name of node to check * * \return true if node can be fenced, false otherwise */ bool pe_can_fence(pe_working_set_t *data_set, pe_node_t *node) { if (pe__is_guest_node(node)) { /* Guest nodes are fenced by stopping their container resource. We can * do that if the container's host is either online or fenceable. */ pe_resource_t *rsc = node->details->remote_rsc->container; for (GList *n = rsc->running_on; n != NULL; n = n->next) { pe_node_t *container_node = n->data; if (!container_node->details->online && !pe_can_fence(data_set, container_node)) { return false; } } return true; } else if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) { return false; /* Turned off */ } else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) { return false; /* No devices */ } else if (is_set(data_set->flags, pe_flag_have_quorum)) { return true; } else if (data_set->no_quorum_policy == no_quorum_ignore) { return true; } else if(node == NULL) { return false; } else if(node->details->online) { crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname); return true; } crm_trace("Cannot fence %s", node->details->uname); return false; } /*! * \internal * \brief Copy a node object * * \param[in] this_node Node object to copy * * \return Newly allocated shallow copy of this_node * \note This function asserts on errors and is guaranteed to return non-NULL. */ pe_node_t * pe__copy_node(const pe_node_t *this_node) { pe_node_t *new_node = NULL; CRM_ASSERT(this_node != NULL); new_node = calloc(1, sizeof(pe_node_t)); CRM_ASSERT(new_node != NULL); new_node->rsc_discover_mode = this_node->rsc_discover_mode; new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ void node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores) { GHashTable *result = hash; pe_node_t *other_node = NULL; GListPtr gIter = list; GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { other_node = pe_find_node_id(list, node->details->id); if (other_node == NULL) { node->weight = -INFINITY; } else if (merge_scores) { node->weight = pe__add_scores(node->weight, other_node->weight); } } for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; other_node = pe_hash_table_lookup(result, node->details->id); if (other_node == NULL) { pe_node_t *new_node = pe__copy_node(node); new_node->weight = -INFINITY; g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } } } /*! * \internal * \brief Create a node hash table from a node list * * \param[in] list Node list * * \return Hash table equivalent of node list */ GHashTable * pe__node_list2table(GList *list) { GHashTable *result = NULL; result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); for (GList *gIter = list; gIter != NULL; gIter = gIter->next) { pe_node_t *new_node = pe__copy_node((pe_node_t *) gIter->data); g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } return result; } gint sort_node_uname(gconstpointer a, gconstpointer b) { return pcmk_numeric_strcasecmp(((const pe_node_t *) a)->details->uname, ((const pe_node_t *) b)->details->uname); } /*! * \internal * \brief Output node weights to stdout * * \param[in] rsc Use allowed nodes for this resource * \param[in] comment Text description to prefix lines with * \param[in] nodes If rsc is not specified, use these nodes */ static void pe__output_node_weights(pe_resource_t *rsc, const char *comment, GHashTable *nodes) { char score[128]; // Stack-allocated since this is called frequently // Sort the nodes so the output is consistent for regression tests GList *list = g_list_sort(g_hash_table_get_values(nodes), sort_node_uname); for (GList *gIter = list; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; score2char_stack(node->weight, score, sizeof(score)); if (rsc) { printf("%s: %s allocation score on %s: %s\n", comment, rsc->id, node->details->uname, score); } else { printf("%s: %s = %s\n", comment, node->details->uname, score); } } g_list_free(list); } /*! * \internal * \brief Log node weights at trace level * * \param[in] file Caller's filename * \param[in] function Caller's function name * \param[in] line Caller's line number * \param[in] rsc Use allowed nodes for this resource * \param[in] comment Text description to prefix lines with * \param[in] nodes If rsc is not specified, use these nodes */ static void pe__log_node_weights(const char *file, const char *function, int line, pe_resource_t *rsc, const char *comment, GHashTable *nodes) { GHashTableIter iter; pe_node_t *node = NULL; char score[128]; // Stack-allocated since this is called frequently // Don't waste time if we're not tracing at this point pcmk__log_else(LOG_TRACE, return); g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { score2char_stack(node->weight, score, sizeof(score)); if (rsc) { qb_log_from_external_source(function, file, "%s: %s allocation score on %s: %s", LOG_TRACE, line, 0, comment, rsc->id, node->details->uname, score); } else { qb_log_from_external_source(function, file, "%s: %s = %s", LOG_TRACE, line, 0, comment, node->details->uname, score); } } } /*! * \internal * \brief Log or output node weights * * \param[in] file Caller's filename * \param[in] function Caller's function name * \param[in] line Caller's line number * \param[in] to_log Log if true, otherwise output * \param[in] rsc Use allowed nodes for this resource * \param[in] comment Text description to prefix lines with * \param[in] nodes If rsc is not specified, use these nodes */ void pe__show_node_weights_as(const char *file, const char *function, int line, bool to_log, pe_resource_t *rsc, const char *comment, GHashTable *nodes) { if (rsc != NULL) { if (is_set(rsc->flags, pe_rsc_orphan)) { // Don't show allocation scores for orphans return; } nodes = rsc->allowed_nodes; } if (nodes == NULL) { // Nothing to show return; } if (to_log) { pe__log_node_weights(file, function, line, rsc, comment, nodes); } else { pe__output_node_weights(rsc, comment, nodes); } // If this resource has children, repeat recursively for each if (rsc && rsc->children) { for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; pe__show_node_weights_as(file, function, line, to_log, child, comment, nodes); } } } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; char *new_text = crm_strdup_printf("%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } void dump_node_capacity(int level, const char *comment, pe_node_t * node) { char *dump_text = crm_strdup_printf("%s: %s capacity:", comment, node->details->uname); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); if (level == LOG_STDOUT) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } void dump_rsc_utilization(int level, const char *comment, pe_resource_t * rsc, pe_node_t * node) { char *dump_text = crm_strdup_printf("%s: %s utilization on %s:", comment, rsc->id, node->details->uname); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); switch (level) { case LOG_STDOUT: fprintf(stdout, "%s\n", dump_text); break; case LOG_NEVER: break; default: crm_trace("%s", dump_text); } free(dump_text); } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->sort_index > resource2->sort_index) { return -1; } if (resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->priority > resource2->priority) { return -1; } if (resource1->priority < resource2->priority) { return 1; } return 0; } static enum pe_quorum_policy effective_quorum_policy(pe_resource_t *rsc, pe_working_set_t *data_set) { enum pe_quorum_policy policy = data_set->no_quorum_policy; if (is_set(data_set->flags, pe_flag_have_quorum)) { policy = no_quorum_ignore; } else if (data_set->no_quorum_policy == no_quorum_demote) { switch (rsc->role) { case RSC_ROLE_MASTER: case RSC_ROLE_SLAVE: if (rsc->next_role > RSC_ROLE_SLAVE) { rsc->next_role = RSC_ROLE_SLAVE; } policy = no_quorum_ignore; break; default: policy = no_quorum_stop; break; } } return policy; } pe_action_t * custom_action(pe_resource_t * rsc, char *key, const char *task, pe_node_t * on_node, gboolean optional, gboolean save_action, pe_working_set_t * data_set) { pe_action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, free(key); return NULL); if (save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } else if(save_action) { #if 0 action = g_hash_table_lookup(data_set->singletons, key); #else /* More expensive but takes 'node' into account */ possible_matches = find_actions(data_set->actions, key, on_node); #endif } if(data_set->singletons == NULL) { data_set->singletons = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); } if (possible_matches != NULL) { if (pcmk__list_of_multiple(possible_matches)) { pe_warn("Action %s for %s on %s exists %d times", task, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); pe_rsc_trace(rsc, "Found existing action %d (%s) for %s (%s) on %s", action->id, action->uuid, (rsc? rsc->id : "no resource"), task, (on_node? on_node->details->uname : "no node")); g_list_free(possible_matches); } if (action == NULL) { if (save_action) { pe_rsc_trace(rsc, "Creating %s action %d: %s for %s (%s) on %s", (optional? "optional" : "mandatory"), data_set->action_id, key, (rsc? rsc->id : "no resource"), task, (on_node? on_node->details->uname : "no node")); } action = calloc(1, sizeof(pe_action_t)); if (save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = strdup(task); if (on_node) { action->node = pe__copy_node(on_node); } action->uuid = strdup(key); if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { // Resource history deletion for a node can be done on the DC pe__set_action_flags(action, pe_action_dc); } pe__set_action_flags(action, pe_action_runnable); if (optional) { pe__set_action_flags(action, pe_action_optional); } else { pe__clear_action_flags(action, pe_action_optional); } action->extra = crm_str_table_new(); action->meta = crm_str_table_new(); if (save_action) { data_set->actions = g_list_prepend(data_set->actions, action); if(rsc == NULL) { g_hash_table_insert(data_set->singletons, action->uuid, action); } } if (rsc != NULL) { guint interval_ms = 0; action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE); parse_op_key(key, NULL, NULL, &interval_ms); unpack_operation(action, action->op_entry, rsc->container, data_set, interval_ms); if (save_action) { rsc->actions = g_list_prepend(rsc->actions, action); } } if (save_action) { pe_rsc_trace(rsc, "Action %d created", action->id); } } if (!optional && is_set(action->flags, pe_action_optional)) { pe_rsc_trace(rsc, "Unset optional on action %d", action->id); pe__clear_action_flags(action, pe_action_optional); } if (rsc != NULL) { enum action_tasks a_task = text2task(action->task); enum pe_quorum_policy quorum_policy = effective_quorum_policy(rsc, data_set); int warn_level = LOG_TRACE; if (save_action) { warn_level = LOG_WARNING; } if (is_set(action->flags, pe_action_have_node_attrs) == FALSE && action->node != NULL && action->op_entry != NULL) { pe_rule_eval_data_t rule_data = { .node_hash = action->node->details->attrs, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; pe__set_action_flags(action, pe_action_have_node_attrs); pe__unpack_dataset_nvpairs(action->op_entry, XML_TAG_ATTR_SETS, &rule_data, action->extra, NULL, FALSE, data_set); } if (is_set(action->flags, pe_action_pseudo)) { /* leave untouched */ } else if (action->node == NULL) { pe_rsc_trace(rsc, "Unset runnable on %s", action->uuid); pe__clear_action_flags(action, pe_action_runnable); } else if (is_not_set(rsc->flags, pe_rsc_managed) && g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS) == NULL) { crm_debug("Action %s (unmanaged)", action->uuid); pe_rsc_trace(rsc, "Set optional on %s", action->uuid); pe__set_action_flags(action, pe_action_optional); //pe__clear_action_flags(action, pe_action_runnable); } else if (is_not_set(action->flags, pe_action_dc) && !(action->node->details->online) && (!pe__is_guest_node(action->node) || action->node->details->remote_requires_reset)) { pe__clear_action_flags(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if (is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc && action->node->details->unclean == FALSE) { pe_fence_node(data_set, action->node, "resource actions are unrunnable", FALSE); } } else if (is_not_set(action->flags, pe_action_dc) && action->node->details->pending) { pe__clear_action_flags(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)", action->uuid, action->node->details->uname); } else if (action->needs == rsc_req_nothing) { pe_rsc_trace(rsc, "Action %s does not require anything", action->uuid); pe_action_set_reason(action, NULL, TRUE); if (pe__is_guest_node(action->node) && !pe_can_fence(data_set, action->node)) { /* An action that requires nothing usually does not require any * fencing in order to be runnable. However, there is an * exception: an action cannot be completed if it is on a guest * node whose host is unclean and cannot be fenced. */ pe__clear_action_flags(action, pe_action_runnable); crm_debug("%s\t%s (cancelled : host cannot be fenced)", action->node->details->uname, action->uuid); } else { pe__set_action_flags(action, pe_action_runnable); } #if 0 /* * No point checking this * - if we don't have quorum we can't stonith anyway */ } else if (action->needs == rsc_req_stonith) { crm_trace("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if (quorum_policy == no_quorum_stop) { pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "no quorum", pe_action_runnable, TRUE); crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if (quorum_policy == no_quorum_freeze) { pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role)); if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) { pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "quorum freeze", pe_action_runnable, TRUE); pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else if(is_not_set(action->flags, pe_action_runnable)) { pe_rsc_trace(rsc, "Action %s is runnable", action->uuid); //pe_action_set_reason(action, NULL, TRUE); pe__set_action_flags(action, pe_action_runnable); } if (save_action) { switch (a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if (is_set(action->flags, pe_action_runnable)) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } free(key); return action; } static bool valid_stop_on_fail(const char *value) { return !pcmk__strcase_any_of(value, "standby", "demote", "stop", NULL); } static const char * unpack_operation_on_fail(pe_action_t * action) { const char *name = NULL; const char *role = NULL; const char *on_fail = NULL; const char *interval_spec = NULL; const char *enabled = NULL; const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); if (pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei) && !valid_stop_on_fail(value)) { pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s stop " "action to default value because '%s' is not " "allowed for stop", action->rsc->id, value); return NULL; } else if (pcmk__str_eq(action->task, CRMD_ACTION_DEMOTE, pcmk__str_casei) && !value) { /* demote on_fail defaults to master monitor value if present */ xmlNode *operation = NULL; CRM_CHECK(action->rsc != NULL, return NULL); for (operation = __xml_first_child_element(action->rsc->ops_xml); operation && !value; operation = __xml_next_element(operation)) { if (!pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { continue; } name = crm_element_value(operation, "name"); role = crm_element_value(operation, "role"); on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL); enabled = crm_element_value(operation, "enabled"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (!on_fail) { continue; } else if (enabled && !crm_is_true(enabled)) { continue; } else if (!pcmk__str_eq(name, "monitor", pcmk__str_casei) || !pcmk__str_eq(role, "Master", pcmk__str_casei)) { continue; } else if (crm_parse_interval_spec(interval_spec) == 0) { continue; } else if (pcmk__str_eq(on_fail, "demote", pcmk__str_casei)) { continue; } value = on_fail; } } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { value = "ignore"; } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { name = crm_element_value(action->op_entry, "name"); role = crm_element_value(action->op_entry, "role"); on_fail = crm_element_value(action->op_entry, XML_OP_ATTR_ON_FAIL); interval_spec = crm_element_value(action->op_entry, XML_LRM_ATTR_INTERVAL); if (!pcmk__str_eq(name, CRMD_ACTION_PROMOTE, pcmk__str_casei) && (!pcmk__str_eq(name, CRMD_ACTION_STATUS, pcmk__str_casei) || !pcmk__str_eq(role, "Master", pcmk__str_casei) || (crm_parse_interval_spec(interval_spec) == 0))) { pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s %s " "action to default value because 'demote' is not " "allowed for it", action->rsc->id, name); return NULL; } } return value; } static xmlNode * find_min_interval_mon(pe_resource_t * rsc, gboolean include_disabled) { guint interval_ms = 0; guint min_interval_ms = G_MAXUINT; const char *name = NULL; const char *value = NULL; const char *interval_spec = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { name = crm_element_value(operation, "name"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } if (!pcmk__str_eq(name, RSC_STATUS, pcmk__str_casei)) { continue; } interval_ms = crm_parse_interval_spec(interval_spec); if (interval_ms && (interval_ms < min_interval_ms)) { min_interval_ms = interval_ms; op = operation; } } } return op; } static int unpack_start_delay(const char *value, GHashTable *meta) { int start_delay = 0; if (value != NULL) { start_delay = crm_get_msec(value); if (start_delay < 0) { start_delay = 0; } if (meta) { g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay)); } } return start_delay; } // true if value contains valid, non-NULL interval origin for recurring op static bool unpack_interval_origin(const char *value, xmlNode *xml_obj, guint interval_ms, crm_time_t *now, long long *start_delay) { long long result = 0; guint interval_sec = interval_ms / 1000; crm_time_t *origin = NULL; // Ignore unspecified values and non-recurring operations if ((value == NULL) || (interval_ms == 0) || (now == NULL)) { return false; } // Parse interval origin from text origin = crm_time_new(value); if (origin == NULL) { pcmk__config_err("Ignoring '" XML_OP_ATTR_ORIGIN "' for operation " "'%s' because '%s' is not valid", (ID(xml_obj)? ID(xml_obj) : "(missing ID)"), value); return false; } // Get seconds since origin (negative if origin is in the future) result = crm_time_get_seconds(now) - crm_time_get_seconds(origin); crm_time_free(origin); // Calculate seconds from closest interval to now result = result % interval_sec; // Calculate seconds remaining until next interval result = ((result <= 0)? 0 : interval_sec) - result; crm_info("Calculated a start delay of %llds for operation '%s'", result, (ID(xml_obj)? ID(xml_obj) : "(unspecified)")); if (start_delay != NULL) { *start_delay = result * 1000; // milliseconds } return true; } static int unpack_timeout(const char *value) { int timeout_ms = crm_get_msec(value); if (timeout_ms < 0) { timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); } return timeout_ms; } int pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set) { xmlNode *child = NULL; GHashTable *action_meta = NULL; const char *timeout_spec = NULL; int timeout_ms = 0; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { if (pcmk__str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME), pcmk__str_casei)) { timeout_spec = crm_element_value(child, XML_ATTR_TIMEOUT); break; } } if (timeout_spec == NULL && data_set->op_defaults) { action_meta = crm_str_table_new(); pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data, action_meta, NULL, FALSE, data_set); timeout_spec = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT); } // @TODO check meta-attributes (including versioned meta-attributes) // @TODO maybe use min-interval monitor timeout as default for monitors timeout_ms = crm_get_msec(timeout_spec); if (timeout_ms < 0) { timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); } if (action_meta != NULL) { g_hash_table_destroy(action_meta); } return timeout_ms; } #if ENABLE_VERSIONED_ATTRS static void unpack_versioned_meta(xmlNode *versioned_meta, xmlNode *xml_obj, guint interval_ms, crm_time_t *now) { xmlNode *attrs = NULL; xmlNode *attr = NULL; for (attrs = __xml_first_child_element(versioned_meta); attrs != NULL; attrs = __xml_next_element(attrs)) { for (attr = __xml_first_child_element(attrs); attr != NULL; attr = __xml_next_element(attr)) { const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); if (pcmk__str_eq(name, XML_OP_ATTR_START_DELAY, pcmk__str_casei)) { int start_delay = unpack_start_delay(value, NULL); crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay); } else if (pcmk__str_eq(name, XML_OP_ATTR_ORIGIN, pcmk__str_casei)) { long long start_delay = 0; if (unpack_interval_origin(value, xml_obj, interval_ms, now, &start_delay)) { crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_START_DELAY); crm_xml_add_ll(attr, XML_NVPAIR_ATTR_VALUE, start_delay); } } else if (pcmk__str_eq(name, XML_ATTR_TIMEOUT, pcmk__str_casei)) { int timeout_ms = unpack_timeout(value); crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, timeout_ms); } } } } #endif /*! * \brief Unpack operation XML into an action structure * * Unpack an operation's meta-attributes (normalizing the interval, timeout, * and start delay values as integer milliseconds), requirements, and * failure policy. * * \param[in,out] action Action to unpack into * \param[in] xml_obj Operation XML (or NULL if all defaults) * \param[in] container Resource that contains affected resource, if any * \param[in] data_set Cluster state * \param[in] interval_ms How frequently to perform the operation */ static void unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container, pe_working_set_t * data_set, guint interval_ms) { int timeout_ms = 0; const char *value = NULL; #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t *rsc_details = NULL; #endif pe_rsc_eval_data_t rsc_rule_data = { .standard = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_CLASS), .provider = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_PROVIDER), .agent = crm_element_value(action->rsc->xml, XML_EXPR_ATTR_TYPE) }; pe_op_eval_data_t op_rule_data = { .op_name = action->task, .interval = interval_ms }; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = &rsc_rule_data, .op_data = &op_rule_data }; CRM_CHECK(action && action->rsc, return); // Cluster-wide pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data, action->meta, NULL, FALSE, data_set); // Determine probe default timeout differently if (pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_casei) && (interval_ms == 0)) { xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); if (min_interval_mon) { value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); if (value) { crm_trace("\t%s: Setting default timeout to minimum-interval " "monitor's timeout '%s'", action->uuid, value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), strdup(value)); } } } if (xml_obj) { xmlAttrPtr xIter = NULL; // take precedence over defaults pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_META_SETS, &rule_data, action->meta, NULL, TRUE, data_set); #if ENABLE_VERSIONED_ATTRS rsc_details = pe_rsc_action_details(action); pe_eval_versioned_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, &rule_data, rsc_details->versioned_parameters, NULL); pe_eval_versioned_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, &rule_data, rsc_details->versioned_meta, NULL); #endif /* Anything set as an XML property has highest precedence. * This ensures we use the name and interval from the tag. */ for (xIter = xml_obj->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_obj, prop_name); g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value)); } } g_hash_table_remove(action->meta, "id"); // Normalize interval to milliseconds if (interval_ms > 0) { g_hash_table_replace(action->meta, strdup(XML_LRM_ATTR_INTERVAL), crm_strdup_printf("%u", interval_ms)); } else { g_hash_table_remove(action->meta, XML_LRM_ATTR_INTERVAL); } /* * Timeout order of precedence: * 1. pcmk_monitor_timeout (if rsc has pcmk_ra_cap_fence_params * and task is start or a probe; pcmk_monitor_timeout works * by default for a recurring monitor) * 2. explicit op timeout on the primitive * 3. default op timeout * a. if probe, then min-interval monitor's timeout * b. else, in XML_CIB_TAG_OPCONFIG * 4. CRM_DEFAULT_OP_TIMEOUT_S * * #1 overrides general rule of XML property having highest * precedence. */ if (is_set(pcmk_get_ra_caps(rsc_rule_data.standard), pcmk_ra_cap_fence_params) && (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei) || (pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_casei) && (interval_ms == 0))) && action->rsc->parameters) { value = g_hash_table_lookup(action->rsc->parameters, "pcmk_monitor_timeout"); if (value) { crm_trace("\t%s: Setting timeout to pcmk_monitor_timeout '%s', " "overriding default", action->uuid, value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), strdup(value)); } } // Normalize timeout to positive milliseconds value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT); timeout_ms = unpack_timeout(value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), crm_itoa(timeout_ms)); if (!pcmk__strcase_any_of(action->task, RSC_START, RSC_PROMOTE, NULL)) { action->needs = rsc_req_nothing; value = "nothing (not start/promote)"; } else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) { action->needs = rsc_req_stonith; value = "fencing (resource)"; } else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) { action->needs = rsc_req_quorum; value = "quorum (resource)"; } else { action->needs = rsc_req_nothing; value = "nothing (resource)"; } pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->uuid, value); value = unpack_operation_on_fail(action); if (value == NULL) { } else if (pcmk__str_eq(value, "block", pcmk__str_casei)) { action->on_fail = action_fail_block; g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block")); value = "block"; // The above could destroy the original string } else if (pcmk__str_eq(value, "fence", pcmk__str_casei)) { action->on_fail = action_fail_fence; value = "node fencing"; if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for " "operation '%s' to 'stop' because 'fence' is not " "valid when fencing is disabled", action->uuid); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if (pcmk__str_eq(value, "standby", pcmk__str_casei)) { action->on_fail = action_fail_standby; value = "node standby"; } else if (pcmk__strcase_any_of(value, "ignore", "nothing", NULL)) { action->on_fail = action_fail_ignore; value = "ignore"; } else if (pcmk__str_eq(value, "migrate", pcmk__str_casei)) { action->on_fail = action_fail_migrate; value = "force migration"; } else if (pcmk__str_eq(value, "stop", pcmk__str_casei)) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if (pcmk__str_eq(value, "restart", pcmk__str_casei)) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else if (pcmk__str_eq(value, "restart-container", pcmk__str_casei)) { if (container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate)"; } else { value = NULL; } } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { action->on_fail = action_fail_demote; value = "demote instance"; } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if (value == NULL && container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate) (default)"; /* For remote nodes, ensure that any failure that results in dropping an * active connection to the node results in fencing of the node. * * There are only two action failures that don't result in fencing. * 1. probes - probe failures are expected. * 2. start - a start failure indicates that an active connection does not already * exist. The user can set op on-fail=fence if they really want to fence start * failures. */ } else if (((value == NULL) || !is_set(action->rsc->flags, pe_rsc_managed)) && (pe__resource_is_remote_conn(action->rsc, data_set) && !(pcmk__str_eq(action->task, CRMD_ACTION_STATUS, pcmk__str_casei) && (interval_ms == 0)) && (!pcmk__str_eq(action->task, CRMD_ACTION_START, pcmk__str_casei)))) { if (!is_set(action->rsc->flags, pe_rsc_managed)) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop unmanaged remote node (enforcing default)"; } else { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { value = "fence remote node (default)"; } else { value = "recover remote node connection (default)"; } if (action->rsc->remote_reconnect_ms) { action->fail_role = RSC_ROLE_STOPPED; } action->on_fail = action_fail_reset_remote; } } else if (value == NULL && pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if (value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value); value = NULL; if (xml_obj != NULL) { value = g_hash_table_lookup(action->meta, "role_after_failure"); if (value) { pe_warn_once(pe_wo_role_after, "Support for role_after_failure is deprecated and will be removed in a future release"); } } if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if (action->fail_role == RSC_ROLE_UNKNOWN) { if (pcmk__str_eq(action->task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task, role2text(action->fail_role)); value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY); if (value) { unpack_start_delay(value, action->meta); } else { long long start_delay = 0; value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN); if (unpack_interval_origin(value, xml_obj, interval_ms, data_set->now, &start_delay)) { g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY), crm_strdup_printf("%lld", start_delay)); } } #if ENABLE_VERSIONED_ATTRS unpack_versioned_meta(rsc_details->versioned_meta, xml_obj, interval_ms, data_set->now); #endif } static xmlNode * find_rsc_op_entry_helper(pe_resource_t * rsc, const char *key, gboolean include_disabled) { guint interval_ms = 0; gboolean do_retry = TRUE; char *local_key = NULL; const char *name = NULL; const char *value = NULL; const char *interval_spec = NULL; char *match_key = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; retry: for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { name = crm_element_value(operation, "name"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } interval_ms = crm_parse_interval_spec(interval_spec); match_key = pcmk__op_key(rsc->id, name, interval_ms); if (pcmk__str_eq(key, match_key, pcmk__str_casei)) { op = operation; } free(match_key); if (rsc->clone_name) { match_key = pcmk__op_key(rsc->clone_name, name, interval_ms); if (pcmk__str_eq(key, match_key, pcmk__str_casei)) { op = operation; } free(match_key); } if (op != NULL) { free(local_key); return op; } } } free(local_key); if (do_retry == FALSE) { return NULL; } do_retry = FALSE; if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) { local_key = pcmk__op_key(rsc->id, "migrate", 0); key = local_key; goto retry; } else if (strstr(key, "_notify_")) { local_key = pcmk__op_key(rsc->id, "notify", 0); key = local_key; goto retry; } return NULL; } xmlNode * find_rsc_op_entry(pe_resource_t * rsc, const char *key) { return find_rsc_op_entry_helper(rsc, key, FALSE); } void print_node(const char *pre_text, pe_node_t * node, gboolean details) { if (node == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } CRM_ASSERT(node->details); crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", node->details->online ? "" : "Unavailable/Unclean ", node->details->uname, node->weight, node->fixed ? "True" : "False"); if (details) { int log_level = LOG_TRACE; char *pe_mutable = strdup("\t\t"); GListPtr gIter = node->details->running_rsc; crm_trace("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); free(pe_mutable); crm_trace("\t\t=== Resources"); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; rsc->fns->print(rsc, "\t\t", pe_print_log|pe_print_pending, &log_level); } } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_trace("%s%s %s ==> %s", user_data == NULL ? "" : (char *)user_data, user_data == NULL ? "" : ": ", (char *)key, (char *)value); } void pe_free_action(pe_action_t * action) { if (action == NULL) { return; } g_list_free_full(action->actions_before, free); /* pe_action_wrapper_t* */ g_list_free_full(action->actions_after, free); /* pe_action_wrapper_t* */ if (action->extra) { g_hash_table_destroy(action->extra); } if (action->meta) { g_hash_table_destroy(action->meta); } #if ENABLE_VERSIONED_ATTRS if (action->rsc) { pe_free_rsc_action_details(action); } #endif free(action->cancel_task); free(action->reason); free(action->task); free(action->uuid); free(action->node); free(action); } GListPtr find_recurring_actions(GListPtr input, pe_node_t * not_on_node) { const char *value = NULL; GListPtr result = NULL; GListPtr gIter = input; CRM_CHECK(input != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS); if (value == NULL) { /* skip */ } else if (pcmk__str_eq(value, "0", pcmk__str_casei)) { /* skip */ } else if (pcmk__str_eq(CRMD_ACTION_CANCEL, action->task, pcmk__str_casei)) { /* skip */ } else if (not_on_node == NULL) { crm_trace("(null) Found: %s", action->uuid); result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ } else if (action->node->details != not_on_node->details) { crm_trace("Found: %s", action->uuid); result = g_list_prepend(result, action); } } return result; } enum action_tasks get_complex_task(pe_resource_t * rsc, const char *name, gboolean allow_non_atomic) { enum action_tasks task = text2task(name); if (rsc == NULL) { return task; } else if (allow_non_atomic == FALSE || rsc->variant == pe_native) { switch (task) { case stopped_rsc: case started_rsc: case action_demoted: case action_promoted: crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id); return task - 1; default: break; } } return task; } pe_action_t * find_first_action(GListPtr input, const char *uuid, const char *task, pe_node_t * on_node) { GListPtr gIter = NULL; CRM_CHECK(uuid || task, return NULL); for (gIter = input; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (uuid != NULL && !pcmk__str_eq(uuid, action->uuid, pcmk__str_casei)) { continue; } else if (task != NULL && !pcmk__str_eq(task, action->task, pcmk__str_casei)) { continue; } else if (on_node == NULL) { return action; } else if (action->node == NULL) { continue; } else if (on_node->details == action->node->details) { return action; } } return NULL; } GListPtr find_actions(GListPtr input, const char *key, const pe_node_t *on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (!pcmk__str_eq(key, action->uuid, pcmk__str_casei)) { crm_trace("%s does not match action %s", key, action->uuid); continue; } else if (on_node == NULL) { crm_trace("Action %s matches (ignoring node)", key); result = g_list_prepend(result, action); } else if (action->node == NULL) { crm_trace("Action %s matches (unallocated, assigning to %s)", key, on_node->details->uname); action->node = pe__copy_node(on_node); result = g_list_prepend(result, action); } else if (on_node->details == action->node->details) { crm_trace("Action %s on %s matches", key, on_node->details->uname); result = g_list_prepend(result, action); } else { crm_trace("Action %s on node %s does not match requested node %s", key, action->node->details->uname, on_node->details->uname); } } return result; } GList * find_actions_exact(GList *input, const char *key, const pe_node_t *on_node) { GList *result = NULL; CRM_CHECK(key != NULL, return NULL); if (on_node == NULL) { crm_trace("Not searching for action %s because node not specified", key); return NULL; } for (GList *gIter = input; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (action->node == NULL) { crm_trace("Skipping comparison of %s vs action %s without node", key, action->uuid); } else if (!pcmk__str_eq(key, action->uuid, pcmk__str_casei)) { crm_trace("Desired action %s doesn't match %s", key, action->uuid); } else if (!pcmk__str_eq(on_node->details->id, action->node->details->id, pcmk__str_casei)) { crm_trace("Action %s desired node ID %s doesn't match %s", key, on_node->details->id, action->node->details->id); } else { crm_trace("Action %s matches", key); result = g_list_prepend(result, action); } } return result; } /*! * \brief Find all actions of given type for a resource * * \param[in] rsc Resource to search * \param[in] node Find only actions scheduled on this node * \param[in] task Action name to search for * \param[in] require_node If TRUE, NULL node or action node will not match * * \return List of actions found (or NULL if none) * \note If node is not NULL and require_node is FALSE, matching actions * without a node will be assigned to node. */ GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node) { GList *result = NULL; char *key = pcmk__op_key(rsc->id, task, 0); if (require_node) { result = find_actions_exact(rsc->actions, key, node); } else { result = find_actions(rsc->actions, key, node); } free(key); return result; } static void resource_node_score(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag) { pe_node_t *match = NULL; if ((rsc->exclusive_discover || (node->rsc_discover_mode == pe_discover_never)) && pcmk__str_eq(tag, "symmetric_default", pcmk__str_casei)) { /* This string comparision may be fragile, but exclusive resources and * exclusive nodes should not have the symmetric_default constraint * applied to them. */ return; } else if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; resource_node_score(child_rsc, node, score, tag); } } pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match == NULL) { match = pe__copy_node(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match); } match->weight = pe__add_scores(match->weight, score); } void resource_location(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag, pe_working_set_t * data_set) { if (node != NULL) { resource_node_score(rsc, node, score, tag); } else if (data_set != NULL) { GListPtr gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node_iter = (pe_node_t *) gIter->data; resource_node_score(rsc, node_iter, score, tag); } } else { GHashTableIter iter; pe_node_t *node_iter = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) { resource_node_score(rsc, node_iter, score, tag); } } if (node == NULL && score == -INFINITY) { if (rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int, why) do { \ free(a_uuid); \ free(b_uuid); \ crm_trace("%s (%d) %c %s (%d) : %s", \ a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \ b_xml_id, b_call_id, why); \ return an_int; \ } while(0) gint sort_op_by_callid(gconstpointer a, gconstpointer b) { int a_call_id = -1; int b_call_id = -1; char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID); if (pcmk__str_eq(a_xml_id, b_xml_id, pcmk__str_casei)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unlikely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why it's happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0, "duplicate"); } crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id); crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id); if (a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesn't matter since * stops are never pending */ sort_return(0, "pending"); } else if (a_call_id >= 0 && a_call_id < b_call_id) { sort_return(-1, "call id"); } else if (b_call_id >= 0 && a_call_id > b_call_id) { sort_return(1, "call id"); } else if (b_call_id >= 0 && a_call_id == b_call_id) { /* * The op and last_failed_op are the same * Order on last-rc-change */ time_t last_a = -1; time_t last_b = -1; crm_element_value_epoch(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a); crm_element_value_epoch(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b); crm_trace("rc-change: %lld vs %lld", (long long) last_a, (long long) last_b); if (last_a >= 0 && last_a < last_b) { sort_return(-1, "rc-change"); } else if (last_b >= 0 && last_a > last_b) { sort_return(1, "rc-change"); } sort_return(0, "rc-change"); } else { /* One of the inputs is a pending operation * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other */ int a_id = -1; int b_id = -1; const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic")); if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL, NULL)) { sort_return(0, "bad magic a"); } if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL, NULL)) { sort_return(0, "bad magic b"); } /* try to determine the relative age of the operation... * some pending operations (e.g. a start) may have been superseded * by a subsequent stop * * [a|b]_id == -1 means it's a shutdown operation and _always_ comes last */ if (!pcmk__str_eq(a_uuid, b_uuid, pcmk__str_casei) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesn't match then one better * be a pending operation. * pending operations don't survive between elections and joins * because we query the LRM directly */ if (b_call_id == -1) { sort_return(-1, "transition + call"); } else if (a_call_id == -1) { sort_return(1, "transition + call"); } } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) { sort_return(-1, "transition"); } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) { sort_return(1, "transition"); } } /* we should never end up here */ CRM_CHECK(FALSE, sort_return(0, "default")); } time_t get_effective_time(pe_working_set_t * data_set) { if(data_set) { if (data_set->now == NULL) { crm_trace("Recording a new 'now'"); data_set->now = crm_time_new(NULL); } return crm_time_get_seconds_since_epoch(data_set->now); } crm_trace("Defaulting to 'now'"); return time(NULL); } gboolean get_target_role(pe_resource_t * rsc, enum rsc_role_e * role) { enum rsc_role_e local_role = RSC_ROLE_UNKNOWN; const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); CRM_CHECK(role != NULL, return FALSE); if (pcmk__str_eq(value, "started", pcmk__str_null_matches | pcmk__str_casei) || pcmk__str_eq("default", value, pcmk__str_casei)) { return FALSE; } local_role = text2role(value); if (local_role == RSC_ROLE_UNKNOWN) { pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s " "because '%s' is not valid", rsc->id, value); return FALSE; } else if (local_role > RSC_ROLE_STARTED) { if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable)) { if (local_role > RSC_ROLE_SLAVE) { /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */ return FALSE; } } else { pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s " "because '%s' only makes sense for promotable " "clones", rsc->id, value); return FALSE; } } *role = local_role; return TRUE; } gboolean order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order) { GListPtr gIter = NULL; pe_action_wrapper_t *wrapper = NULL; GListPtr list = NULL; if (order == pe_order_none) { return FALSE; } if (lh_action == NULL || rh_action == NULL) { return FALSE; } crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid); /* Ensure we never create a dependency on ourselves... it's happened */ CRM_ASSERT(lh_action != rh_action); /* Filter dups, otherwise update_action_states() has too much work to do */ gIter = lh_action->actions_after; for (; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *after = (pe_action_wrapper_t *) gIter->data; if (after->action == rh_action && (after->type & order)) { return FALSE; } } wrapper = calloc(1, sizeof(pe_action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_prepend(list, wrapper); lh_action->actions_after = list; wrapper = NULL; /* order |= pe_order_implies_then; */ /* order ^= pe_order_implies_then; */ wrapper = calloc(1, sizeof(pe_action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_prepend(list, wrapper); rh_action->actions_before = list; return TRUE; } pe_action_t * get_pseudo_op(const char *name, pe_working_set_t * data_set) { pe_action_t *op = NULL; if(data_set->singletons) { op = g_hash_table_lookup(data_set->singletons, name); } if (op == NULL) { op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set); - set_bit(op->flags, pe_action_pseudo); - set_bit(op->flags, pe_action_runnable); + pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable); } return op; } void destroy_ticket(gpointer data) { pe_ticket_t *ticket = data; if (ticket->state) { g_hash_table_destroy(ticket->state); } free(ticket->id); free(ticket); } pe_ticket_t * ticket_new(const char *ticket_id, pe_working_set_t * data_set) { pe_ticket_t *ticket = NULL; if (pcmk__str_empty(ticket_id)) { return NULL; } if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_ticket); } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = calloc(1, sizeof(pe_ticket_t)); if (ticket == NULL) { crm_err("Cannot allocate ticket '%s'", ticket_id); return NULL; } crm_trace("Creaing ticket entry for %s", ticket_id); ticket->id = strdup(ticket_id); ticket->granted = FALSE; ticket->last_granted = -1; ticket->standby = FALSE; ticket->state = crm_str_table_new(); g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket); } return ticket; } static void filter_parameters(xmlNode * param_set, const char *param_string, bool need_present) { if (param_set && param_string) { xmlAttrPtr xIter = param_set->properties; while (xIter) { const char *prop_name = (const char *)xIter->name; char *name = crm_strdup_printf(" %s ", prop_name); char *match = strstr(param_string, name); free(name); // Do now, because current entry might get removed below xIter = xIter->next; if (need_present && match == NULL) { crm_trace("%s not found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } else if (need_present == FALSE && match) { crm_trace("%s found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } } } } #if ENABLE_VERSIONED_ATTRS static void append_versioned_params(xmlNode *versioned_params, const char *ra_version, xmlNode *params) { GHashTable *hash = pe_unpack_versioned_parameters(versioned_params, ra_version); char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { crm_xml_add(params, key, value); } g_hash_table_destroy(hash); } #endif /*! * \internal * \brief Calculate action digests and store in node's digest cache * * \param[in] rsc Resource that action was for * \param[in] task Name of action performed * \param[in] key Action's task key * \param[in] node Node action was performed on * \param[in] xml_op XML of operation in CIB status (if available) * \param[in] calc_secure Whether to calculate secure digest * \param[in] data_set Cluster working set * * \return Pointer to node's digest cache entry */ static op_digest_cache_t * rsc_action_digest(pe_resource_t *rsc, const char *task, const char *key, pe_node_t *node, xmlNode *xml_op, bool calc_secure, pe_working_set_t *data_set) { op_digest_cache_t *data = NULL; data = g_hash_table_lookup(node->details->digest_cache, key); if (data == NULL) { GHashTable *local_rsc_params = crm_str_table_new(); pe_action_t *action = custom_action(rsc, strdup(key), task, node, TRUE, FALSE, data_set); #if ENABLE_VERSIONED_ATTRS xmlNode *local_versioned_params = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS); const char *ra_version = NULL; #endif const char *op_version; const char *restart_list = NULL; const char *secure_list = " passwd password "; data = calloc(1, sizeof(op_digest_cache_t)); CRM_ASSERT(data != NULL); get_rsc_attributes(local_rsc_params, rsc, node, data_set); #if ENABLE_VERSIONED_ATTRS pe_get_versioned_attributes(local_versioned_params, rsc, node, data_set); #endif data->params_all = create_xml_node(NULL, XML_TAG_PARAMS); // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside if (pe__add_bundle_remote_name(rsc, data->params_all, XML_RSC_ATTR_REMOTE_RA_ADDR)) { crm_trace("Set address for bundle connection %s (on %s)", rsc->id, node->details->uname); } g_hash_table_foreach(local_rsc_params, hash2field, data->params_all); g_hash_table_foreach(action->extra, hash2field, data->params_all); g_hash_table_foreach(rsc->parameters, hash2field, data->params_all); g_hash_table_foreach(action->meta, hash2metafield, data->params_all); if(xml_op) { secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE); restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); #if ENABLE_VERSIONED_ATTRS ra_version = crm_element_value(xml_op, XML_ATTR_RA_VERSION); #endif } else { op_version = CRM_FEATURE_SET; } #if ENABLE_VERSIONED_ATTRS append_versioned_params(local_versioned_params, ra_version, data->params_all); append_versioned_params(rsc->versioned_parameters, ra_version, data->params_all); { pe_rsc_action_details_t *details = pe_rsc_action_details(action); append_versioned_params(details->versioned_parameters, ra_version, data->params_all); } #endif pcmk__filter_op_for_digest(data->params_all); g_hash_table_destroy(local_rsc_params); pe_free_action(action); data->digest_all_calc = calculate_operation_digest(data->params_all, op_version); if (calc_secure) { data->params_secure = copy_xml(data->params_all); if(secure_list) { filter_parameters(data->params_secure, secure_list, FALSE); } data->digest_secure_calc = calculate_operation_digest(data->params_secure, op_version); } if(xml_op && crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST) != NULL) { data->params_restart = copy_xml(data->params_all); if (restart_list) { filter_parameters(data->params_restart, restart_list, TRUE); } data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version); } g_hash_table_insert(node->details->digest_cache, strdup(key), data); } return data; } op_digest_cache_t * rsc_action_digest_cmp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * node, pe_working_set_t * data_set) { op_digest_cache_t *data = NULL; char *key = NULL; guint interval_ms = 0; const char *op_version; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *digest_all; const char *digest_restart; CRM_ASSERT(node != NULL); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); key = pcmk__op_key(rsc->id, task, interval_ms); data = rsc_action_digest(rsc, task, key, node, xml_op, is_set(data_set->flags, pe_flag_sanitized), data_set); data->rc = RSC_DIGEST_MATCH; if (digest_restart && data->digest_restart_calc && strcmp(data->digest_restart_calc, digest_restart) != 0) { pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s", key, node->details->uname, crm_str(digest_restart), data->digest_restart_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); data->rc = RSC_DIGEST_RESTART; } else if (digest_all == NULL) { /* it is unknown what the previous op digest was */ data->rc = RSC_DIGEST_UNKNOWN; } else if (strcmp(digest_all, data->digest_all_calc) != 0) { pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (%s:%s) %s", key, node->details->uname, crm_str(digest_all), data->digest_all_calc, (interval_ms > 0)? "reschedule" : "reload", op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); data->rc = RSC_DIGEST_ALL; } free(key); return data; } /*! * \internal * \brief Create an unfencing summary for use in special node attribute * * Create a string combining a fence device's resource ID, agent type, and * parameter digest (whether for all parameters or just non-private parameters). * This can be stored in a special node attribute, allowing us to detect changes * in either the agent type or parameters, to know whether unfencing must be * redone or can be safely skipped when the device's history is cleaned. * * \param[in] rsc_id Fence device resource ID * \param[in] agent_type Fence device agent * \param[in] param_digest Fence device parameter digest * * \return Newly allocated string with unfencing digest * \note The caller is responsible for freeing the result. */ static inline char * create_unfencing_summary(const char *rsc_id, const char *agent_type, const char *param_digest) { return crm_strdup_printf("%s:%s:%s", rsc_id, agent_type, param_digest); } /*! * \internal * \brief Check whether a node can skip unfencing * * Check whether a fence device's current definition matches a node's * stored summary of when it was last unfenced by the device. * * \param[in] rsc_id Fence device's resource ID * \param[in] agent Fence device's agent type * \param[in] digest_calc Fence device's current parameter digest * \param[in] node_summary Value of node's special unfencing node attribute * (a comma-separated list of unfencing summaries for * all devices that have unfenced this node) * * \return TRUE if digest matches, FALSE otherwise */ static bool unfencing_digest_matches(const char *rsc_id, const char *agent, const char *digest_calc, const char *node_summary) { bool matches = FALSE; if (rsc_id && agent && digest_calc && node_summary) { char *search_secure = create_unfencing_summary(rsc_id, agent, digest_calc); /* The digest was calculated including the device ID and agent, * so there is no risk of collision using strstr(). */ matches = (strstr(node_summary, search_secure) != NULL); crm_trace("Calculated unfencing digest '%s' %sfound in '%s'", search_secure, matches? "" : "not ", node_summary); free(search_secure); } return matches; } /* Magic string to use as action name for digest cache entries used for * unfencing checks. This is not a real action name (i.e. "on"), so * check_action_definition() won't confuse these entries with real actions. */ #define STONITH_DIGEST_TASK "stonith-on" /*! * \internal * \brief Calculate fence device digests and digest comparison result * * \param[in] rsc Fence device resource * \param[in] agent Fence device's agent type * \param[in] node Node with digest cache to use * \param[in] data_set Cluster working set * * \return Node's digest cache entry */ static op_digest_cache_t * fencing_action_digest_cmp(pe_resource_t *rsc, const char *agent, pe_node_t *node, pe_working_set_t *data_set) { const char *node_summary = NULL; // Calculate device's current parameter digests char *key = pcmk__op_key(rsc->id, STONITH_DIGEST_TASK, 0); op_digest_cache_t *data = rsc_action_digest(rsc, STONITH_DIGEST_TASK, key, node, NULL, TRUE, data_set); free(key); // Check whether node has special unfencing summary node attribute node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_ALL); if (node_summary == NULL) { data->rc = RSC_DIGEST_UNKNOWN; return data; } // Check whether full parameter digest matches if (unfencing_digest_matches(rsc->id, agent, data->digest_all_calc, node_summary)) { data->rc = RSC_DIGEST_MATCH; return data; } // Check whether secure parameter digest matches node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_SECURE); if (unfencing_digest_matches(rsc->id, agent, data->digest_secure_calc, node_summary)) { data->rc = RSC_DIGEST_MATCH; if (is_set(data_set->flags, pe_flag_stdout)) { printf("Only 'private' parameters to %s for unfencing %s changed\n", rsc->id, node->details->uname); } return data; } // Parameters don't match data->rc = RSC_DIGEST_ALL; if (is_set(data_set->flags, (pe_flag_sanitized|pe_flag_stdout)) && data->digest_secure_calc) { char *digest = create_unfencing_summary(rsc->id, agent, data->digest_secure_calc); printf("Parameters to %s for unfencing %s changed, try '%s'\n", rsc->id, node->details->uname, digest); free(digest); } return data; } const char *rsc_printable_id(pe_resource_t *rsc) { if (is_not_set(rsc->flags, pe_rsc_unique)) { return ID(rsc->xml); } return rsc->id; } void clear_bit_recursive(pe_resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; clear_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; clear_bit_recursive(child_rsc, flag); } } void set_bit_recursive(pe_resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; set_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; set_bit_recursive(child_rsc, flag); } } static GListPtr find_unfencing_devices(GListPtr candidates, GListPtr matches) { for (GListPtr gIter = candidates; gIter != NULL; gIter = gIter->next) { pe_resource_t *candidate = gIter->data; const char *provides = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_PROVIDES); const char *requires = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_REQUIRES); if(candidate->children) { matches = find_unfencing_devices(candidate->children, matches); } else if (is_not_set(candidate->flags, pe_rsc_fence_device)) { continue; } else if (pcmk__str_eq(provides, "unfencing", pcmk__str_casei) || pcmk__str_eq(requires, "unfencing", pcmk__str_casei)) { matches = g_list_prepend(matches, candidate); } } return matches; } static int node_priority_fencing_delay(pe_node_t * node, pe_working_set_t * data_set) { int member_count = 0; int online_count = 0; int top_priority = 0; int lowest_priority = 0; GListPtr gIter = NULL; // `priority-fencing-delay` is disabled if (data_set->priority_fencing_delay <= 0) { return 0; } /* No need to request a delay if the fencing target is not a normal cluster * member, for example if it's a remote node or a guest node. */ if (node->details->type != node_member) { return 0; } // No need to request a delay if the fencing target is in our partition if (node->details->online) { return 0; } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *n = gIter->data; if (n->details->type != node_member) { continue; } member_count ++; if (n->details->online) { online_count++; } if (member_count == 1 || n->details->priority > top_priority) { top_priority = n->details->priority; } if (member_count == 1 || n->details->priority < lowest_priority) { lowest_priority = n->details->priority; } } // No need to delay if we have more than half of the cluster members if (online_count > member_count / 2) { return 0; } /* All the nodes have equal priority. * Any configured corresponding `pcmk_delay_base/max` will be applied. */ if (lowest_priority == top_priority) { return 0; } if (node->details->priority < top_priority) { return 0; } return data_set->priority_fencing_delay; } pe_action_t * pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t * data_set) { char *op_key = NULL; pe_action_t *stonith_op = NULL; if(op == NULL) { op = data_set->stonith_action; } op_key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op); if(data_set->singletons) { stonith_op = g_hash_table_lookup(data_set->singletons, op_key); } if(stonith_op == NULL) { stonith_op = custom_action(NULL, op_key, CRM_OP_FENCE, node, TRUE, TRUE, data_set); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param(stonith_op->meta, "stonith_action", op); if (pe__is_guest_or_remote_node(node) && is_set(data_set->flags, pe_flag_enable_unfencing)) { /* Extra work to detect device changes on remotes * * We may do this for all nodes in the future, but for now * the check_action_definition() based stuff works fine. */ long max = 1024; long digests_all_offset = 0; long digests_secure_offset = 0; char *digests_all = calloc(max, sizeof(char)); char *digests_secure = calloc(max, sizeof(char)); GListPtr matches = find_unfencing_devices(data_set->resources, NULL); for (GListPtr gIter = matches; gIter != NULL; gIter = gIter->next) { pe_resource_t *match = gIter->data; const char *agent = g_hash_table_lookup(match->meta, XML_ATTR_TYPE); op_digest_cache_t *data = NULL; data = fencing_action_digest_cmp(match, agent, node, data_set); if(data->rc == RSC_DIGEST_ALL) { optional = FALSE; crm_notice("Unfencing %s (remote): because the definition of %s changed", node->details->uname, match->id); if (is_set(data_set->flags, pe_flag_stdout)) { fprintf(stdout, " notice: Unfencing %s (remote): because the definition of %s changed\n", node->details->uname, match->id); } } digests_all_offset += snprintf( digests_all+digests_all_offset, max-digests_all_offset, "%s:%s:%s,", match->id, agent, data->digest_all_calc); digests_secure_offset += snprintf( digests_secure+digests_secure_offset, max-digests_secure_offset, "%s:%s:%s,", match->id, agent, data->digest_secure_calc); } g_hash_table_insert(stonith_op->meta, strdup(XML_OP_ATTR_DIGESTS_ALL), digests_all); g_hash_table_insert(stonith_op->meta, strdup(XML_OP_ATTR_DIGESTS_SECURE), digests_secure); } } else { free(op_key); } if (data_set->priority_fencing_delay > 0 /* It's a suitable case where `priority-fencing-delay` applies. * At least add `priority-fencing-delay` field as an indicator. */ && (priority_delay /* Re-calculate priority delay for the suitable case when * pe_fence_op() is called again by stage6() after node priority has * been actually calculated with native_add_running() */ || g_hash_table_lookup(stonith_op->meta, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY) != NULL)) { /* Add `priority-fencing-delay` to the fencing op even if it's 0 for * the targeting node. So that it takes precedence over any possible * `pcmk_delay_base/max`. */ char *delay_s = crm_itoa(node_priority_fencing_delay(node, data_set)); g_hash_table_insert(stonith_op->meta, strdup(XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY), delay_s); } if(optional == FALSE && pe_can_fence(data_set, node)) { pe_action_required(stonith_op, NULL, reason); } else if(reason && stonith_op->reason == NULL) { stonith_op->reason = strdup(reason); } return stonith_op; } void trigger_unfencing( pe_resource_t * rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t * data_set) { if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { /* No resources require it */ return; } else if (rsc != NULL && is_not_set(rsc->flags, pe_rsc_fence_device)) { /* Wasn't a stonith device */ return; } else if(node && node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { pe_action_t *unfence = pe_fence_op(node, "on", FALSE, reason, FALSE, data_set); if(dependency) { order_actions(unfence, dependency, pe_order_optional); } } else if(rsc) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { trigger_unfencing(rsc, node, reason, dependency, data_set); } } } } gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref) { pe_tag_t *tag = NULL; GListPtr gIter = NULL; gboolean is_existing = FALSE; CRM_CHECK(tags && tag_name && obj_ref, return FALSE); tag = g_hash_table_lookup(tags, tag_name); if (tag == NULL) { tag = calloc(1, sizeof(pe_tag_t)); if (tag == NULL) { return FALSE; } tag->id = strdup(tag_name); tag->refs = NULL; g_hash_table_insert(tags, strdup(tag_name), tag); } for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *existing_ref = (const char *) gIter->data; if (pcmk__str_eq(existing_ref, obj_ref, pcmk__str_none)){ is_existing = TRUE; break; } } if (is_existing == FALSE) { tag->refs = g_list_append(tag->refs, strdup(obj_ref)); crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref); } return TRUE; } void pe_action_set_flag_reason(const char *function, long line, pe_action_t *action, pe_action_t *reason, const char *text, enum pe_action_flags flags, bool overwrite) { bool unset = FALSE; bool update = FALSE; const char *change = NULL; if(is_set(flags, pe_action_runnable)) { unset = TRUE; change = "unrunnable"; } else if(is_set(flags, pe_action_optional)) { unset = TRUE; change = "required"; } else if(is_set(flags, pe_action_migrate_runnable)) { unset = TRUE; overwrite = TRUE; change = "unrunnable"; } else if(is_set(flags, pe_action_dangle)) { change = "dangling"; } else if(is_set(flags, pe_action_requires_any)) { change = "required"; } else { crm_err("Unknown flag change to %x by %s: 0x%s", flags, action->uuid, (reason? reason->uuid : "0")); } if(unset) { if(is_set(action->flags, flags)) { pe__clear_action_flags_as(function, line, action, flags); update = TRUE; } } else { if(is_not_set(action->flags, flags)) { pe__set_action_flags_as(function, line, action, flags); update = TRUE; } } if((change && update) || text) { char *reason_text = NULL; if(reason == NULL) { pe_action_set_reason(action, text, overwrite); } else if(reason->rsc == NULL) { reason_text = crm_strdup_printf("%s %s%c %s", change, reason->task, text?':':0, text?text:""); } else { reason_text = crm_strdup_printf("%s %s %s%c %s", change, reason->rsc->id, reason->task, text?':':0, text?text:"NA"); } if(reason_text && action->rsc != reason->rsc) { pe_action_set_reason(action, reason_text, overwrite); } free(reason_text); } } void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite) { if (action->reason != NULL && overwrite) { pe_rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'", action->uuid, action->reason, crm_str(reason)); free(action->reason); } else if (action->reason == NULL) { pe_rsc_trace(action->rsc, "Set %s reason to '%s'", action->uuid, crm_str(reason)); } else { // crm_assert(action->reason != NULL && !overwrite); return; } if (reason != NULL) { action->reason = strdup(reason); } else { action->reason = NULL; } } /*! * \internal * \brief Check whether shutdown has been requested for a node * * \param[in] node Node to check * * \return TRUE if node has shutdown attribute set and nonzero, FALSE otherwise * \note This differs from simply using node->details->shutdown in that it can * be used before that has been determined (and in fact to determine it), * and it can also be used to distinguish requested shutdown from implicit * shutdown of remote nodes by virtue of their connection stopping. */ bool pe__shutdown_requested(pe_node_t *node) { const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); return !pcmk__str_eq(shutdown, "0", pcmk__str_null_matches); } /*! * \internal * \brief Update a data set's "recheck by" time * * \param[in] recheck Epoch time when recheck should happen * \param[in,out] data_set Current working set */ void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set) { if ((recheck > get_effective_time(data_set)) && ((data_set->recheck_by == 0) || (data_set->recheck_by > recheck))) { data_set->recheck_by = recheck; } } /*! * \internal * \brief Wrapper for pe_unpack_nvpairs() using a cluster working set */ void pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name, pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pe_working_set_t *data_set) { crm_time_t *next_change = crm_time_new_undefined(); pe_eval_nvpairs(data_set->input, xml_obj, set_name, rule_data, hash, always_first, overwrite, next_change); if (crm_time_is_defined(next_change)) { time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change); pe__update_recheck_time(recheck, data_set); } crm_time_free(next_change); } bool pe__resource_is_disabled(pe_resource_t *rsc) { const char *target_role = NULL; CRM_CHECK(rsc != NULL, return false); target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if (target_role) { enum rsc_role_e target_role_e = text2role(target_role); if ((target_role_e == RSC_ROLE_STOPPED) || ((target_role_e == RSC_ROLE_SLAVE) && is_set(uber_parent(rsc)->flags, pe_rsc_promotable))) { return true; } } return false; } /*! * \internal * \brief Create an action to clear a resource's history from CIB * * \param[in] rsc Resource to clear * \param[in] node Node to clear history on * * \return New action to clear resource history */ pe_action_t * pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { char *key = NULL; CRM_ASSERT(rsc && node); key = pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0); return custom_action(rsc, key, CRM_OP_LRM_DELETE, node, FALSE, TRUE, data_set); } bool pe__rsc_running_on_any_node_in_list(pe_resource_t *rsc, GListPtr node_list) { for (GListPtr ele = rsc->running_on; ele; ele = ele->next) { pe_node_t *node = (pe_node_t *) ele->data; if (pcmk__str_in_list(node_list, node->details->uname)) { return true; } } return false; } bool pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GListPtr only_node) { return (rsc->fns->active(rsc, FALSE) && !pe__rsc_running_on_any_node_in_list(rsc, only_node)); } GListPtr pe__filter_rsc_list(GListPtr rscs, GListPtr filter) { GListPtr retval = NULL; for (GListPtr gIter = rscs; gIter; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* I think the second condition is safe here for all callers of this * function. If not, it needs to move into pe__node_text. */ if (pcmk__str_in_list(filter, rsc_printable_id(rsc)) || (rsc->parent && pcmk__str_in_list(filter, rsc_printable_id(rsc->parent)))) { retval = g_list_prepend(retval, rsc); } } return retval; } diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c index ed68a19744..99a1bebb4d 100644 --- a/tools/crm_simulate.c +++ b/tools/crm_simulate.c @@ -1,1134 +1,1134 @@ /* * Copyright 2009-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "crm_simulate - simulate a Pacemaker cluster's response to events" /* show_scores and show_utilization can't be added to this struct. They * actually come from include/pcmki/pcmki_scheduler.h where they are * defined as extern. */ struct { gboolean all_actions; char *dot_file; char *graph_file; gchar *input_file; guint modified; GListPtr node_up; GListPtr node_down; GListPtr node_fail; GListPtr op_fail; GListPtr op_inject; gchar *output_file; gboolean print_pending; gboolean process; char *quorum; long long repeat; gboolean simulate; gboolean store; gchar *test_dir; GListPtr ticket_grant; GListPtr ticket_revoke; GListPtr ticket_standby; GListPtr ticket_activate; char *use_date; char *watchdog; char *xml_file; } options = { .print_pending = TRUE, .repeat = 1 }; cib_t *global_cib = NULL; bool action_numbers = FALSE; gboolean quiet = FALSE; char *temp_shadow = NULL; extern gboolean bringing_nodes_online; #define quiet_log(fmt, args...) do { \ if(quiet == FALSE) { \ printf(fmt , ##args); \ } \ } while(0) #define INDENT " " static gboolean in_place_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.store = TRUE; options.process = TRUE; options.simulate = TRUE; return TRUE; } static gboolean live_check_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.xml_file) { free(options.xml_file); } options.xml_file = NULL; return TRUE; } static gboolean node_down_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.node_down = g_list_append(options.node_down, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean node_fail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.node_fail = g_list_append(options.node_fail, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean node_up_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; bringing_nodes_online = TRUE; options.node_up = g_list_append(options.node_up, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean op_fail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.process = TRUE; options.simulate = TRUE; options.op_fail = g_list_append(options.op_fail, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean op_inject_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.op_inject = g_list_append(options.op_inject, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean quorum_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.quorum) { free(options.quorum); } options.modified++; options.quorum = strdup(optarg); return TRUE; } static gboolean save_dotfile_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.dot_file) { free(options.dot_file); } options.process = TRUE; options.dot_file = strdup(optarg); return TRUE; } static gboolean save_graph_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.graph_file) { free(options.graph_file); } options.process = TRUE; options.graph_file = strdup(optarg); return TRUE; } static gboolean show_scores_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.process = TRUE; show_scores = TRUE; return TRUE; } static gboolean simulate_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.process = TRUE; options.simulate = TRUE; return TRUE; } static gboolean ticket_activate_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.ticket_activate = g_list_append(options.ticket_activate, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean ticket_grant_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.ticket_grant = g_list_append(options.ticket_grant, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean ticket_revoke_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.ticket_revoke = g_list_append(options.ticket_revoke, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean ticket_standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.modified++; options.ticket_standby = g_list_append(options.ticket_standby, (gchar *) g_strdup(optarg)); return TRUE; } static gboolean utilization_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.process = TRUE; show_utilization = TRUE; return TRUE; } static gboolean watchdog_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.watchdog) { free(options.watchdog); } options.modified++; options.watchdog = strdup(optarg); return TRUE; } static gboolean xml_pipe_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.xml_file) { free(options.xml_file); } options.xml_file = strdup("-"); return TRUE; } static GOptionEntry operation_entries[] = { { "run", 'R', 0, G_OPTION_ARG_NONE, &options.process, "Determine cluster's response to the given configuration and status", NULL }, { "simulate", 'S', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, simulate_cb, "Simulate transition's execution and display resulting cluster status", NULL }, { "in-place", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, in_place_cb, "Simulate transition's execution and store result back to input file", NULL }, { "show-scores", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_scores_cb, "Show allocation scores", NULL }, { "show-utilization", 'U', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, utilization_cb, "Show utilization information", NULL }, { "profile", 'P', 0, G_OPTION_ARG_FILENAME, &options.test_dir, "Run all tests in the named directory to create profiling data", NULL }, { "repeat", 'N', 0, G_OPTION_ARG_INT, &options.repeat, "With --profile, repeat each test N times and print timings", "N" }, { "pending", 'j', 0, G_OPTION_ARG_NONE, &options.print_pending, "Display pending state if 'record-pending' is enabled", NULL }, { NULL } }; static GOptionEntry synthetic_entries[] = { { "node-up", 'u', 0, G_OPTION_ARG_CALLBACK, node_up_cb, "Bring a node online", "NODE" }, { "node-down", 'd', 0, G_OPTION_ARG_CALLBACK, node_down_cb, "Take a node offline", "NODE" }, { "node-fail", 'f', 0, G_OPTION_ARG_CALLBACK, node_fail_cb, "Mark a node as failed", "NODE" }, { "op-inject", 'i', 0, G_OPTION_ARG_CALLBACK, op_inject_cb, "Generate a failure for the cluster to react to in the simulation.\n" INDENT "See `Operation Specification` help for more information.", "OPSPEC" }, { "op-fail", 'F', 0, G_OPTION_ARG_CALLBACK, op_fail_cb, "If the specified task occurs during the simulation, have it fail with return code ${rc}.\n" INDENT "The transition will normally stop at the failed action.\n" INDENT "Save the result with --save-output and re-run with --xml-file.\n" INDENT "See `Operation Specification` help for more information.", "OPSPEC" }, { "set-datetime", 't', 0, G_OPTION_ARG_STRING, &options.use_date, "Set date/time (ISO 8601 format, see https://en.wikipedia.org/wiki/ISO_8601)", "DATETIME" }, { "quorum", 'q', 0, G_OPTION_ARG_CALLBACK, quorum_cb, "Specify a value for quorum", "QUORUM" }, { "watchdog", 'w', 0, G_OPTION_ARG_CALLBACK, watchdog_cb, "Assume a watchdog device is active", "DEVICE" }, { "ticket-grant", 'g', 0, G_OPTION_ARG_CALLBACK, ticket_grant_cb, "Grant a ticket", "TICKET" }, { "ticket-revoke", 'r', 0, G_OPTION_ARG_CALLBACK, ticket_revoke_cb, "Revoke a ticket", "TICKET" }, { "ticket-standby", 'b', 0, G_OPTION_ARG_CALLBACK, ticket_standby_cb, "Make a ticket standby", "TICKET" }, { "ticket-activate", 'e', 0, G_OPTION_ARG_CALLBACK, ticket_activate_cb, "Activate a ticket", "TICKET" }, { NULL } }; static GOptionEntry output_entries[] = { { "save-input", 'I', 0, G_OPTION_ARG_FILENAME, &options.input_file, "Save the input configuration to the named file", "FILE" }, { "save-output", 'O', 0, G_OPTION_ARG_FILENAME, &options.output_file, "Save the output configuration to the named file", "FILE" }, { "save-graph", 'G', 0, G_OPTION_ARG_CALLBACK, save_graph_cb, "Save the transition graph (XML format) to the named file", "FILE" }, { "save-dotfile", 'D', 0, G_OPTION_ARG_CALLBACK, save_dotfile_cb, "Save the transition graph (DOT format) to the named file", "FILE" }, { "all-actions", 'a', 0, G_OPTION_ARG_NONE, &options.all_actions, "Display all possible actions in DOT graph (even if not part of transition)", NULL }, { NULL } }; static GOptionEntry source_entries[] = { { "live-check", 'L', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, live_check_cb, "Connect to CIB mamager and use the current CIB contents as input", NULL }, { "xml-file", 'x', 0, G_OPTION_ARG_FILENAME, &options.xml_file, "Retrieve XML from the named file", "FILE" }, { "xml-pipe", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, xml_pipe_cb, "Retrieve XML from stdin", NULL }, { NULL } }; static void get_date(pe_working_set_t *data_set, bool print_original, char *use_date) { time_t original_date = 0; crm_element_value_epoch(data_set->input, "execution-date", &original_date); if (use_date) { data_set->now = crm_time_new(use_date); quiet_log(" + Setting effective cluster time: %s", use_date); crm_time_log(LOG_NOTICE, "Pretending 'now' is", data_set->now, crm_time_log_date | crm_time_log_timeofday); } else if (original_date) { data_set->now = crm_time_new(NULL); crm_time_set_timet(data_set->now, &original_date); if (print_original) { char *when = crm_time_as_string(data_set->now, crm_time_log_date|crm_time_log_timeofday); printf("Using the original execution date of: %s\n", when); free(when); } } } static void print_cluster_status(pe_working_set_t * data_set, long options) { char *online_nodes = NULL; char *online_remote_nodes = NULL; char *online_guest_nodes = NULL; char *offline_nodes = NULL; char *offline_remote_nodes = NULL; GListPtr gIter = NULL; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; const char *node_mode = NULL; char *node_name = NULL; if (pe__is_guest_node(node)) { node_name = crm_strdup_printf("%s:%s", node->details->uname, node->details->remote_rsc->container->id); } else { node_name = crm_strdup_printf("%s", node->details->uname); } if (node->details->unclean) { if (node->details->online && node->details->unclean) { node_mode = "UNCLEAN (online)"; } else if (node->details->pending) { node_mode = "UNCLEAN (pending)"; } else { node_mode = "UNCLEAN (offline)"; } } else if (node->details->pending) { node_mode = "pending"; } else if (node->details->standby_onfail && node->details->online) { node_mode = "standby (on-fail)"; } else if (node->details->standby) { if (node->details->online) { node_mode = "standby"; } else { node_mode = "OFFLINE (standby)"; } } else if (node->details->maintenance) { if (node->details->online) { node_mode = "maintenance"; } else { node_mode = "OFFLINE (maintenance)"; } } else if (node->details->online) { if (pe__is_guest_node(node)) { online_guest_nodes = pcmk__add_word(online_guest_nodes, node_name); } else if (pe__is_remote_node(node)) { online_remote_nodes = pcmk__add_word(online_remote_nodes, node_name); } else { online_nodes = pcmk__add_word(online_nodes, node_name); } free(node_name); continue; } else { if (pe__is_remote_node(node)) { offline_remote_nodes = pcmk__add_word(offline_remote_nodes, node_name); } else if (pe__is_guest_node(node)) { /* ignore offline container nodes */ } else { offline_nodes = pcmk__add_word(offline_nodes, node_name); } free(node_name); continue; } if (pe__is_guest_node(node)) { printf("GuestNode %s: %s\n", node_name, node_mode); } else if (pe__is_remote_node(node)) { printf("RemoteNode %s: %s\n", node_name, node_mode); } else if (pcmk__str_eq(node->details->uname, node->details->id, pcmk__str_casei)) { printf("Node %s: %s\n", node_name, node_mode); } else { printf("Node %s (%s): %s\n", node_name, node->details->id, node_mode); } free(node_name); } if (online_nodes) { printf("Online: [%s ]\n", online_nodes); free(online_nodes); } if (offline_nodes) { printf("OFFLINE: [%s ]\n", offline_nodes); free(offline_nodes); } if (online_remote_nodes) { printf("RemoteOnline: [%s ]\n", online_remote_nodes); free(online_remote_nodes); } if (offline_remote_nodes) { printf("RemoteOFFLINE: [%s ]\n", offline_remote_nodes); free(offline_remote_nodes); } if (online_guest_nodes) { printf("GuestOnline: [%s ]\n", online_guest_nodes); free(online_guest_nodes); } fprintf(stdout, "\n"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; if (is_set(rsc->flags, pe_rsc_orphan) && rsc->role == RSC_ROLE_STOPPED) { continue; } rsc->fns->print(rsc, NULL, pe_print_printf | options, stdout); } fprintf(stdout, "\n"); } static char * create_action_name(pe_action_t *action) { char *action_name = NULL; const char *prefix = ""; const char *action_host = NULL; const char *clone_name = NULL; const char *task = action->task; if (action->node) { action_host = action->node->details->uname; } else if (is_not_set(action->flags, pe_action_pseudo)) { action_host = ""; } if (pcmk__str_eq(action->task, RSC_CANCEL, pcmk__str_casei)) { prefix = "Cancel "; task = action->cancel_task; } if (action->rsc && action->rsc->clone_name) { clone_name = action->rsc->clone_name; } if (clone_name) { char *key = NULL; guint interval_ms = 0; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } if (pcmk__strcase_any_of(action->task, RSC_NOTIFY, RSC_NOTIFIED, NULL)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_key_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_key_operation"); CRM_ASSERT(n_type != NULL); CRM_ASSERT(n_task != NULL); key = pcmk__notify_key(clone_name, n_type, n_task); } else { key = pcmk__op_key(clone_name, task, interval_ms); } if (action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, key, action_host); } else { action_name = crm_strdup_printf("%s%s", prefix, key); } free(key); } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { const char *op = g_hash_table_lookup(action->meta, "stonith_action"); action_name = crm_strdup_printf("%s%s '%s' %s", prefix, action->task, op, action_host); } else if (action->rsc && action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, action->uuid, action_host); } else if (action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, action->task, action_host); } else { action_name = crm_strdup_printf("%s", action->uuid); } if (action_numbers) { // i.e. verbose char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id); free(action_name); action_name = with_id; } return action_name; } static bool create_dotfile(pe_working_set_t * data_set, const char *dot_file, gboolean all_actions, GError **error) { GListPtr gIter = NULL; FILE *dot_strm = fopen(dot_file, "w"); if (dot_strm == NULL) { g_set_error(error, PCMK__RC_ERROR, errno, "Could not open %s for writing: %s", dot_file, pcmk_rc_str(errno)); return false; } fprintf(dot_strm, " digraph \"g\" {\n"); for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; const char *style = "dashed"; const char *font = "black"; const char *color = "black"; char *action_name = create_action_name(action); crm_trace("Action %d: %s %s %p", action->id, action_name, action->uuid, action); if (is_set(action->flags, pe_action_pseudo)) { font = "orange"; } if (is_set(action->flags, pe_action_dumped)) { style = "bold"; color = "green"; } else if (action->rsc != NULL && is_not_set(action->rsc->flags, pe_rsc_managed)) { color = "red"; font = "purple"; if (all_actions == FALSE) { goto do_not_write; } } else if (is_set(action->flags, pe_action_optional)) { color = "blue"; if (all_actions == FALSE) { goto do_not_write; } } else { color = "red"; CRM_CHECK(is_set(action->flags, pe_action_runnable) == FALSE,; ); } - set_bit(action->flags, pe_action_dumped); + pe__set_action_flags(action, pe_action_dumped); crm_trace("\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]", action_name, style, color, font); fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n", action_name, style, color, font); do_not_write: free(action_name); } for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; GListPtr gIter2 = NULL; for (gIter2 = action->actions_before; gIter2 != NULL; gIter2 = gIter2->next) { pe_action_wrapper_t *before = (pe_action_wrapper_t *) gIter2->data; char *before_name = NULL; char *after_name = NULL; const char *style = "dashed"; gboolean optional = TRUE; if (before->state == pe_link_dumped) { optional = FALSE; style = "bold"; } else if (is_set(action->flags, pe_action_pseudo) && (before->type & pe_order_stonith_stop)) { continue; } else if (before->type == pe_order_none) { continue; } else if (is_set(before->action->flags, pe_action_dumped) && is_set(action->flags, pe_action_dumped) && before->type != pe_order_load) { optional = FALSE; } if (all_actions || optional == FALSE) { before_name = create_action_name(before->action); after_name = create_action_name(action); crm_trace("\"%s\" -> \"%s\" [ style = %s]", before_name, after_name, style); fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n", before_name, after_name, style); free(before_name); free(after_name); } } } fprintf(dot_strm, "}\n"); fflush(dot_strm); fclose(dot_strm); return true; } static int setup_input(const char *input, const char *output, GError **error) { int rc = pcmk_rc_ok; cib_t *cib_conn = NULL; xmlNode *cib_object = NULL; char *local_output = NULL; if (input == NULL) { /* Use live CIB */ cib_conn = cib_new(); rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { rc = cib_conn->cmds->query(cib_conn, NULL, &cib_object, cib_scope_local | cib_sync_call); } cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); cib_conn = NULL; if (rc != pcmk_rc_ok) { rc = pcmk_legacy2rc(rc); g_set_error(error, PCMK__RC_ERROR, rc, "Live CIB query failed: %s (%d)", pcmk_rc_str(rc), rc); return rc; } else if (cib_object == NULL) { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_NOINPUT, "Live CIB query failed: empty result"); return pcmk_rc_no_input; } } else if (pcmk__str_eq(input, "-", pcmk__str_casei)) { cib_object = filename2xml(NULL); } else { cib_object = filename2xml(input); } if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) { create_xml_node(cib_object, XML_CIB_TAG_STATUS); } if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { free_xml(cib_object); return pcmk_rc_transform_failed; } if (validate_xml(cib_object, NULL, FALSE) != TRUE) { free_xml(cib_object); return pcmk_rc_schema_validation; } if (output == NULL) { char *pid = pcmk__getpid_s(); local_output = get_shadow_file(pid); temp_shadow = strdup(local_output); output = local_output; free(pid); } rc = write_xml_file(cib_object, output, FALSE); free_xml(cib_object); cib_object = NULL; if (rc < 0) { rc = pcmk_legacy2rc(rc); g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_CANTCREAT, "Could not create '%s': %s", output, pcmk_rc_str(rc)); return rc; } else { setenv("CIB_file", output, 1); free(local_output); return pcmk_rc_ok; } } static void profile_one(const char *xml_file, long long repeat, pe_working_set_t *data_set, char *use_date) { xmlNode *cib_object = NULL; clock_t start = 0; printf("* Testing %s ...", xml_file); fflush(stdout); cib_object = filename2xml(xml_file); start = clock(); if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) { create_xml_node(cib_object, XML_CIB_TAG_STATUS); } if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { free_xml(cib_object); return; } if (validate_xml(cib_object, NULL, FALSE) != TRUE) { free_xml(cib_object); return; } for (int i = 0; i < repeat; ++i) { xmlNode *input = (repeat == 1)? cib_object : copy_xml(cib_object); data_set->input = input; get_date(data_set, false, use_date); pcmk__schedule_actions(data_set, input, NULL); pe_reset_working_set(data_set); } printf(" %.2f secs\n", (clock() - start) / (float) CLOCKS_PER_SEC); } #ifndef FILENAME_MAX # define FILENAME_MAX 512 #endif static void profile_all(const char *dir, long long repeat, pe_working_set_t *data_set, char *use_date) { struct dirent **namelist; int file_num = scandir(dir, &namelist, 0, alphasort); if (file_num > 0) { struct stat prop; char buffer[FILENAME_MAX]; while (file_num--) { if ('.' == namelist[file_num]->d_name[0]) { free(namelist[file_num]); continue; } else if (!pcmk__ends_with_ext(namelist[file_num]->d_name, ".xml")) { free(namelist[file_num]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", dir, namelist[file_num]->d_name); if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { profile_one(buffer, repeat, data_set, use_date); } free(namelist[file_num]); } free(namelist); } } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Display only essential output", NULL }, { NULL } }; const char *description = "Operation Specification:\n\n" "The OPSPEC in any command line option is of the form ${resource}_${task}_${interval_in_ms}@${node}=${rc} " "memcached_monitor_20000@bart.example.com=7, for example). ${rc} is an OCF return code. For more " "information on these return codes, refer to " "https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/2.0/html/Pacemaker_Administration/s-ocf-return-codes.html\n\n" "Examples:\n\n" "Pretend a recurring monitor action found memcached stopped on node " "fred.example.com and, during recovery, that the memcached stop " "action failed:\n\n" "\tcrm_simulate -LS --op-inject memcached:0_monitor_20000@bart.example.com=7 " "--op-fail memcached:0_stop_0@fred.example.com=1 --save-output /tmp/memcached-test.xml\n\n" "Now see what the reaction to the stop failed would be:\n\n" "\tcrm_simulate -S --xml-file /tmp/memcached-test.xml\n\n"; context = pcmk__build_arg_context(args, NULL, group, NULL); pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, description); pcmk__add_arg_group(context, "operations", "Operations:", "Show operations options", operation_entries); pcmk__add_arg_group(context, "synthetic", "Synthetic Cluster Events:", "Show synthetic cluster event options", synthetic_entries); pcmk__add_arg_group(context, "output", "Output Options:", "Show output options", output_entries); pcmk__add_arg_group(context, "source", "Data Source:", "Show data source options", source_entries); return context; } int main(int argc, char **argv) { GError *error = NULL; GOptionGroup *output_group = NULL; gchar **processed_args = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); GOptionContext *context = build_arg_context(args, &output_group); int rc = pcmk_rc_ok; pe_working_set_t *data_set = NULL; xmlNode *input = NULL; options.xml_file = strdup("-"); crm_log_cli_init("crm_simulate"); processed_args = pcmk__cmdline_preproc(argv, "bdefgiqrtuwxDFGINO"); if (!g_option_context_parse_strv(context, &processed_args, &error)) { fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message); goto done; } for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (args->version) { /* FIXME: When crm_simulate is converted to use formatted output, this can go. */ pcmk__cli_help('v', CRM_EX_USAGE); goto done; } if (optind > argc) { gchar *help = g_option_context_get_help(context, TRUE, NULL); fprintf(stderr, "%s", help); g_free(help); goto done; } if (args->verbosity > 0) { /* Redirect stderr to stdout so we can grep the output */ close(STDERR_FILENO); dup2(STDOUT_FILENO, STDERR_FILENO); action_numbers = TRUE; } if (args->quiet) { quiet = TRUE; } data_set = pe_new_working_set(); if (data_set == NULL) { rc = ENOMEM; g_set_error(&error, PCMK__RC_ERROR, rc, "Could not allocate working set"); goto done; } set_bit(data_set->flags, pe_flag_no_compat); if (options.test_dir != NULL) { profile_all(options.test_dir, options.repeat, data_set, options.use_date); rc = pcmk_rc_ok; goto done; } rc = setup_input(options.xml_file, options.store ? options.xml_file : options.output_file, &error); if (rc != pcmk_rc_ok) { goto done; } global_cib = cib_new(); rc = global_cib->cmds->signon(global_cib, crm_system_name, cib_command); if (rc != pcmk_rc_ok) { rc = pcmk_legacy2rc(rc); g_set_error(&error, PCMK__RC_ERROR, rc, "Could not connect to the CIB: %s", pcmk_rc_str(rc)); goto done; } rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call | cib_scope_local); if (rc != pcmk_rc_ok) { rc = pcmk_legacy2rc(rc); g_set_error(&error, PCMK__RC_ERROR, rc, "Could not get local CIB: %s", pcmk_rc_str(rc)); goto done; } data_set->input = input; get_date(data_set, true, options.use_date); if(options.xml_file) { set_bit(data_set->flags, pe_flag_sanitized); } set_bit(data_set->flags, pe_flag_stdout); cluster_status(data_set); if (quiet == FALSE) { int opts = options.print_pending ? pe_print_pending : 0; if (is_set(data_set->flags, pe_flag_maintenance_mode)) { quiet_log("\n *** Resource management is DISABLED ***"); quiet_log("\n The cluster will not attempt to start, stop or recover services"); quiet_log("\n"); } if (data_set->disabled_resources || data_set->blocked_resources) { quiet_log("%d of %d resource instances DISABLED and %d BLOCKED " "from further action due to failure\n", data_set->disabled_resources, data_set->ninstances, data_set->blocked_resources); } quiet_log("\nCurrent cluster status:\n"); print_cluster_status(data_set, opts); } if (options.modified) { quiet_log("Performing requested modifications\n"); modify_configuration(data_set, global_cib, options.quorum, options.watchdog, options.node_up, options.node_down, options.node_fail, options.op_inject, options.ticket_grant, options.ticket_revoke, options.ticket_standby, options.ticket_activate); rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call); if (rc != pcmk_rc_ok) { rc = pcmk_legacy2rc(rc); g_set_error(&error, PCMK__RC_ERROR, rc, "Could not get modified CIB: %s", pcmk_rc_str(rc)); goto done; } cleanup_calculations(data_set); data_set->input = input; get_date(data_set, true, options.use_date); if(options.xml_file) { set_bit(data_set->flags, pe_flag_sanitized); } set_bit(data_set->flags, pe_flag_stdout); cluster_status(data_set); } if (options.input_file != NULL) { rc = write_xml_file(input, options.input_file, FALSE); if (rc < 0) { rc = pcmk_legacy2rc(rc); g_set_error(&error, PCMK__RC_ERROR, rc, "Could not create '%s': %s", options.input_file, pcmk_rc_str(rc)); goto done; } } if (options.process || options.simulate) { crm_time_t *local_date = NULL; if (show_scores && show_utilization) { printf("Allocation scores and utilization information:\n"); } else if (show_scores) { fprintf(stdout, "Allocation scores:\n"); } else if (show_utilization) { printf("Utilization information:\n"); } pcmk__schedule_actions(data_set, input, local_date); input = NULL; /* Don't try and free it twice */ if (options.graph_file != NULL) { write_xml_file(data_set->graph, options.graph_file, FALSE); } if (options.dot_file != NULL) { if (!create_dotfile(data_set, options.dot_file, options.all_actions, &error)) { goto done; } } if (quiet == FALSE) { GListPtr gIter = NULL; quiet_log("%sTransition Summary:\n", show_scores || show_utilization || options.modified ? "\n" : ""); fflush(stdout); LogNodeActions(data_set, TRUE); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; LogActions(rsc, data_set, TRUE); } } } rc = pcmk_rc_ok; if (options.simulate) { if (run_simulation(data_set, global_cib, options.op_fail, quiet) != pcmk_rc_ok) { rc = pcmk_rc_error; } if(quiet == FALSE) { get_date(data_set, true, options.use_date); quiet_log("\nRevised cluster status:\n"); set_bit(data_set->flags, pe_flag_stdout); cluster_status(data_set); print_cluster_status(data_set, 0); } } done: if (error != NULL) { fprintf(stderr, "%s\n", error->message); g_clear_error(&error); } /* There sure is a lot to free in options. */ free(options.dot_file); free(options.graph_file); g_free(options.input_file); g_list_free_full(options.node_up, g_free); g_list_free_full(options.node_down, g_free); g_list_free_full(options.node_fail, g_free); g_list_free_full(options.op_fail, g_free); g_list_free_full(options.op_inject, g_free); g_free(options.output_file); free(options.quorum); g_free(options.test_dir); g_list_free_full(options.ticket_grant, g_free); g_list_free_full(options.ticket_revoke, g_free); g_list_free_full(options.ticket_standby, g_free); g_list_free_full(options.ticket_activate, g_free); free(options.use_date); free(options.watchdog); free(options.xml_file); pcmk__free_arg_context(context); g_strfreev(processed_args); if (data_set) { pe_free_working_set(data_set); } if (global_cib) { global_cib->cmds->signoff(global_cib); cib_delete(global_cib); } fflush(stderr); if (temp_shadow) { unlink(temp_shadow); free(temp_shadow); } crm_exit(pcmk_rc2exitc(rc)); }