diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h index 8aba23a9ad..9c71a1a921 100644 --- a/include/crm/pengine/internal.h +++ b/include/crm/pengine/internal.h @@ -1,706 +1,707 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PE_INTERNAL__H # define PE_INTERNAL__H # include # include # include # include # include # include # include enum pe__clone_flags { // Whether instances should be started sequentially pe__clone_ordered = (1 << 0), // Whether promotion scores have been added pe__clone_promotion_added = (1 << 1), // Whether promotion constraints have been added pe__clone_promotion_constrained = (1 << 2), }; bool pe__clone_is_ordered(const pe_resource_t *clone); int pe__set_clone_flag(pe_resource_t *clone, enum pe__clone_flags flag); enum pe__group_flags { pe__group_ordered = (1 << 0), // Members start sequentially pe__group_colocated = (1 << 1), // Members must be on same node }; bool pe__group_flag_is_set(const pe_resource_t *group, uint32_t flags); pe_resource_t *pe__last_group_member(const pe_resource_t *group); # define pe_rsc_info(rsc, fmt, args...) crm_log_tag(LOG_INFO, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_debug(rsc, fmt, args...) crm_log_tag(LOG_DEBUG, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_trace(rsc, fmt, args...) crm_log_tag(LOG_TRACE, rsc ? rsc->id : "", fmt, ##args) # define pe_err(fmt...) do { \ was_processing_error = TRUE; \ pcmk__config_err(fmt); \ } while (0) # define pe_warn(fmt...) do { \ was_processing_warning = TRUE; \ pcmk__config_warn(fmt); \ } while (0) # define pe_proc_err(fmt...) { was_processing_error = TRUE; crm_err(fmt); } # define pe_proc_warn(fmt...) { was_processing_warning = TRUE; crm_warn(fmt); } #define pe__set_working_set_flags(working_set, flags_to_set) do { \ (working_set)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Working set", crm_system_name, \ (working_set)->flags, (flags_to_set), #flags_to_set); \ } while (0) #define pe__clear_working_set_flags(working_set, flags_to_clear) do { \ (working_set)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Working set", crm_system_name, \ (working_set)->flags, (flags_to_clear), #flags_to_clear); \ } while (0) #define pe__set_resource_flags(resource, flags_to_set) do { \ (resource)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Resource", (resource)->id, (resource)->flags, \ (flags_to_set), #flags_to_set); \ } while (0) #define pe__clear_resource_flags(resource, flags_to_clear) do { \ (resource)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Resource", (resource)->id, (resource)->flags, \ (flags_to_clear), #flags_to_clear); \ } while (0) #define pe__set_action_flags(action, flags_to_set) do { \ (action)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_set), \ #flags_to_set); \ } while (0) #define pe__clear_action_flags(action, flags_to_clear) do { \ (action)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) #define pe__set_raw_action_flags(action_flags, action_name, flags_to_set) do { \ action_flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Action", action_name, \ (action_flags), \ (flags_to_set), #flags_to_set); \ } while (0) #define pe__clear_raw_action_flags(action_flags, action_name, flags_to_clear) do { \ action_flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", action_name, \ (action_flags), \ (flags_to_clear), \ #flags_to_clear); \ } while (0) #define pe__set_action_flags_as(function, line, action, flags_to_set) do { \ (action)->flags = pcmk__set_flags_as((function), (line), \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_set), \ #flags_to_set); \ } while (0) #define pe__clear_action_flags_as(function, line, action, flags_to_clear) do { \ (action)->flags = pcmk__clear_flags_as((function), (line), \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) #define pe__set_order_flags(order_flags, flags_to_set) do { \ order_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Ordering", "constraint", \ order_flags, (flags_to_set), \ #flags_to_set); \ } while (0) #define pe__clear_order_flags(order_flags, flags_to_clear) do { \ order_flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Ordering", "constraint", \ order_flags, (flags_to_clear), \ #flags_to_clear); \ } while (0) // Some warnings we don't want to print every transition enum pe_warn_once_e { pe_wo_blind = (1 << 0), pe_wo_restart_type = (1 << 1), pe_wo_role_after = (1 << 2), pe_wo_poweroff = (1 << 3), pe_wo_require_all = (1 << 4), pe_wo_order_score = (1 << 5), pe_wo_neg_threshold = (1 << 6), pe_wo_remove_after = (1 << 7), pe_wo_ping_node = (1 << 8), pe_wo_order_inst = (1 << 9), pe_wo_coloc_inst = (1 << 10), pe_wo_group_order = (1 << 11), pe_wo_group_coloc = (1 << 12), }; extern uint32_t pe_wo; #define pe_warn_once(pe_wo_bit, fmt...) do { \ if (!pcmk_is_set(pe_wo, pe_wo_bit)) { \ if (pe_wo_bit == pe_wo_blind) { \ crm_warn(fmt); \ } else { \ pe_warn(fmt); \ } \ pe_wo = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Warn-once", "logging", pe_wo, \ (pe_wo_bit), #pe_wo_bit); \ } \ } while (0); typedef struct pe__location_constraint_s { char *id; // Constraint XML ID pe_resource_t *rsc_lh; // Resource being located enum rsc_role_e role_filter; // Role to locate enum pe_discover_e discover_mode; // Resource discovery GList *node_list_rh; // List of pe_node_t* } pe__location_t; typedef struct pe__order_constraint_s { int id; uint32_t flags; // Group of enum pe_ordering flags void *lh_opaque; pe_resource_t *lh_rsc; pe_action_t *lh_action; char *lh_action_task; void *rh_opaque; pe_resource_t *rh_rsc; pe_action_t *rh_action; char *rh_action_task; } pe__ordering_t; typedef struct notify_data_s { GSList *keys; // Environment variable name/value pairs const char *action; pe_action_t *pre; pe_action_t *post; pe_action_t *pre_done; pe_action_t *post_done; GList *active; /* notify_entry_t* */ GList *inactive; /* notify_entry_t* */ GList *start; /* notify_entry_t* */ GList *stop; /* notify_entry_t* */ GList *demote; /* notify_entry_t* */ GList *promote; /* notify_entry_t* */ GList *promoted; /* notify_entry_t* */ GList *unpromoted; /* notify_entry_t* */ GHashTable *allowed_nodes; } notify_data_t; const pe_resource_t *pe__const_top_resource(const pe_resource_t *rsc, bool include_bundle); +int pe__clone_max(const pe_resource_t *clone); int pe__clone_promoted_max(const pe_resource_t *clone); int pe__clone_promoted_node_max(const pe_resource_t *clone); pe_action_t *pe__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task, bool optional, bool runnable); void pe__create_promotable_pseudo_ops(pe_resource_t *clone, bool any_promoting, bool any_demoting); bool pe_can_fence(const pe_working_set_t *data_set, const pe_node_t *node); void add_hash_param(GHashTable * hash, const char *name, const char *value); char *native_parameter(pe_resource_t * rsc, pe_node_t * node, gboolean create, const char *name, pe_working_set_t * data_set); pe_node_t *native_location(const pe_resource_t *rsc, GList **list, int current); void pe_metadata(pcmk__output_t *out); void verify_pe_options(GHashTable * options); void common_update_score(pe_resource_t * rsc, const char *id, int score); void native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set, gboolean failed); gboolean native_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean group_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean clone_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set); pe_resource_t *native_find_rsc(pe_resource_t *rsc, const char *id, const pe_node_t *node, int flags); gboolean native_active(pe_resource_t * rsc, gboolean all); gboolean group_active(pe_resource_t * rsc, gboolean all); gboolean clone_active(pe_resource_t * rsc, gboolean all); gboolean pe__bundle_active(pe_resource_t *rsc, gboolean all); //! \deprecated This function will be removed in a future release void native_print(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); //! \deprecated This function will be removed in a future release void group_print(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); //! \deprecated This function will be removed in a future release void clone_print(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); //! \deprecated This function will be removed in a future release void pe__print_bundle(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); gchar *pcmk__native_output_string(const pe_resource_t *rsc, const char *name, const pe_node_t *node, uint32_t show_opts, const char *target_role, bool show_nodes); int pe__name_and_nvpairs_xml(pcmk__output_t *out, bool is_list, const char *tag_name , size_t pairs_count, ...); char *pe__node_display_name(pe_node_t *node, bool print_detail); // Clone notifications (pe_notif.c) void pe__create_notifications(pe_resource_t *rsc, notify_data_t *n_data); notify_data_t *pe__clone_notif_pseudo_ops(pe_resource_t *rsc, const char *task, pe_action_t *action, pe_action_t *complete); void pe__free_notification_data(notify_data_t *n_data); void pe__order_notifs_after_fencing(const pe_action_t *action, pe_resource_t *rsc, pe_action_t *stonith_op); static inline const char * pe__rsc_bool_str(const pe_resource_t *rsc, uint64_t rsc_flag) { return pcmk__btoa(pcmk_is_set(rsc->flags, rsc_flag)); } int pe__clone_xml(pcmk__output_t *out, va_list args); int pe__clone_default(pcmk__output_t *out, va_list args); int pe__group_xml(pcmk__output_t *out, va_list args); int pe__group_default(pcmk__output_t *out, va_list args); int pe__bundle_xml(pcmk__output_t *out, va_list args); int pe__bundle_html(pcmk__output_t *out, va_list args); int pe__bundle_text(pcmk__output_t *out, va_list args); int pe__node_html(pcmk__output_t *out, va_list args); int pe__node_text(pcmk__output_t *out, va_list args); int pe__node_xml(pcmk__output_t *out, va_list args); int pe__resource_xml(pcmk__output_t *out, va_list args); int pe__resource_html(pcmk__output_t *out, va_list args); int pe__resource_text(pcmk__output_t *out, va_list args); void native_free(pe_resource_t * rsc); void group_free(pe_resource_t * rsc); void clone_free(pe_resource_t * rsc); void pe__free_bundle(pe_resource_t *rsc); enum rsc_role_e native_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e group_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e clone_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e pe__bundle_resource_state(const pe_resource_t *rsc, gboolean current); void pe__count_common(pe_resource_t *rsc); void pe__count_bundle(pe_resource_t *rsc); void common_free(pe_resource_t * rsc); pe_node_t *pe__copy_node(const pe_node_t *this_node); extern time_t get_effective_time(pe_working_set_t * data_set); /* Failure handling utilities (from failcounts.c) */ // bit flags for fail count handling options enum pe_fc_flags_e { pe_fc_default = (1 << 0), pe_fc_effective = (1 << 1), // don't count expired failures pe_fc_fillers = (1 << 2), // if container, include filler failures in count }; int pe_get_failcount(const pe_node_t *node, pe_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op); pe_action_t *pe__clear_failcount(pe_resource_t *rsc, const pe_node_t *node, const char *reason, pe_working_set_t *data_set); /* Functions for finding/counting a resource's active nodes */ pe_node_t *pe__find_active_on(const pe_resource_t *rsc, unsigned int *count_all, unsigned int *count_clean); pe_node_t *pe__find_active_requires(const pe_resource_t *rsc, unsigned int *count); static inline pe_node_t * pe__current_node(const pe_resource_t *rsc) { return pe__find_active_on(rsc, NULL, NULL); } /* Binary like operators for lists of nodes */ extern void node_list_exclude(GHashTable * list, GList *list2, gboolean merge_scores); GHashTable *pe__node_list2table(const GList *list); static inline gpointer pe_hash_table_lookup(GHashTable * hash, gconstpointer key) { if (hash) { return g_hash_table_lookup(hash, key); } return NULL; } extern pe_action_t *get_pseudo_op(const char *name, pe_working_set_t * data_set); extern gboolean order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order); void pe__show_node_weights_as(const char *file, const char *function, int line, bool to_log, const pe_resource_t *rsc, const char *comment, GHashTable *nodes, pe_working_set_t *data_set); #define pe__show_node_weights(level, rsc, text, nodes, data_set) \ pe__show_node_weights_as(__FILE__, __func__, __LINE__, \ (level), (rsc), (text), (nodes), (data_set)) xmlNode *find_rsc_op_entry(const pe_resource_t *rsc, const char *key); pe_action_t *custom_action(pe_resource_t *rsc, char *key, const char *task, const pe_node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set); # define delete_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DELETE, 0) # define delete_action(rsc, node, optional) custom_action( \ rsc, delete_key(rsc), CRMD_ACTION_DELETE, node, \ optional, TRUE, rsc->cluster); # define stopped_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STOPPED, 0) # define stopped_action(rsc, node, optional) custom_action( \ rsc, stopped_key(rsc), CRMD_ACTION_STOPPED, node, \ optional, TRUE, rsc->cluster); # define stop_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STOP, 0) # define stop_action(rsc, node, optional) custom_action( \ rsc, stop_key(rsc), CRMD_ACTION_STOP, node, \ optional, TRUE, rsc->cluster); # define reload_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_RELOAD_AGENT, 0) # define start_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_START, 0) # define start_action(rsc, node, optional) custom_action( \ rsc, start_key(rsc), CRMD_ACTION_START, node, \ optional, TRUE, rsc->cluster) # define started_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STARTED, 0) # define started_action(rsc, node, optional) custom_action( \ rsc, started_key(rsc), CRMD_ACTION_STARTED, node, \ optional, TRUE, rsc->cluster) # define promote_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_PROMOTE, 0) # define promote_action(rsc, node, optional) custom_action( \ rsc, promote_key(rsc), CRMD_ACTION_PROMOTE, node, \ optional, TRUE, rsc->cluster) # define promoted_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_PROMOTED, 0) # define promoted_action(rsc, node, optional) custom_action( \ rsc, promoted_key(rsc), CRMD_ACTION_PROMOTED, node, \ optional, TRUE, rsc->cluster) # define demote_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DEMOTE, 0) # define demote_action(rsc, node, optional) custom_action( \ rsc, demote_key(rsc), CRMD_ACTION_DEMOTE, node, \ optional, TRUE, rsc->cluster) # define demoted_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DEMOTED, 0) # define demoted_action(rsc, node, optional) custom_action( \ rsc, demoted_key(rsc), CRMD_ACTION_DEMOTED, node, \ optional, TRUE, rsc->cluster) extern int pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set); pe_action_t *find_first_action(const GList *input, const char *uuid, const char *task, const pe_node_t *on_node); extern enum action_tasks get_complex_task(pe_resource_t * rsc, const char *name, gboolean allow_non_atomic); extern GList *find_actions(GList *input, const char *key, const pe_node_t *on_node); GList *find_actions_exact(GList *input, const char *key, const pe_node_t *on_node); GList *pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node); extern void pe_free_action(pe_action_t * action); void resource_location(pe_resource_t *rsc, const pe_node_t *node, int score, const char *tag, pe_working_set_t *data_set); extern int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b, bool same_node_default); extern gint sort_op_by_callid(gconstpointer a, gconstpointer b); gboolean get_target_role(const pe_resource_t *rsc, enum rsc_role_e *role); void pe__set_next_role(pe_resource_t *rsc, enum rsc_role_e role, const char *why); pe_resource_t *find_clone_instance(const pe_resource_t *rsc, const char *sub_id); extern void destroy_ticket(gpointer data); extern pe_ticket_t *ticket_new(const char *ticket_id, pe_working_set_t * data_set); // Resources for manipulating resource names const char *pe_base_name_end(const char *id); char *clone_strip(const char *last_rsc_id); char *clone_zero(const char *last_rsc_id); static inline bool pe_base_name_eq(const pe_resource_t *rsc, const char *id) { if (id && rsc && rsc->id) { // Number of characters in rsc->id before any clone suffix size_t base_len = pe_base_name_end(rsc->id) - rsc->id + 1; return (strlen(id) == base_len) && !strncmp(id, rsc->id, base_len); } return false; } int pe__target_rc_from_xml(const xmlNode *xml_op); gint pe__cmp_node_name(gconstpointer a, gconstpointer b); bool is_set_recursive(const pe_resource_t *rsc, long long flag, bool any); enum rsc_digest_cmp_val { /*! Digests are the same */ RSC_DIGEST_MATCH = 0, /*! Params that require a restart changed */ RSC_DIGEST_RESTART, /*! Some parameter changed. */ RSC_DIGEST_ALL, /*! rsc op didn't have a digest associated with it, so * it is unknown if parameters changed or not. */ RSC_DIGEST_UNKNOWN, }; typedef struct op_digest_cache_s { enum rsc_digest_cmp_val rc; xmlNode *params_all; xmlNode *params_secure; xmlNode *params_restart; char *digest_all_calc; char *digest_secure_calc; char *digest_restart_calc; } op_digest_cache_t; op_digest_cache_t *pe__calculate_digests(pe_resource_t *rsc, const char *task, guint *interval_ms, const pe_node_t *node, const xmlNode *xml_op, GHashTable *overrides, bool calc_secure, pe_working_set_t *data_set); void pe__free_digests(gpointer ptr); op_digest_cache_t *rsc_action_digest_cmp(pe_resource_t *rsc, const xmlNode *xml_op, pe_node_t *node, pe_working_set_t *data_set); pe_action_t *pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set); void trigger_unfencing(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t *data_set); char *pe__action2reason(const pe_action_t *action, enum pe_action_flags flag); void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite); void pe__add_action_expected_result(pe_action_t *action, int expected_result); void pe__set_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags); void pe__clear_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags); void pe__clear_resource_flags_on_all(pe_working_set_t *data_set, uint64_t flag); gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref); //! \deprecated This function will be removed in a future release void print_rscs_brief(GList *rsc_list, const char * pre_text, long options, void * print_data, gboolean print_all); int pe__rscs_brief_output(pcmk__output_t *out, GList *rsc_list, unsigned int options); void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason, bool priority_delay); pe_node_t *pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); //! \deprecated This function will be removed in a future release void common_print(pe_resource_t *rsc, const char *pre_text, const char *name, const pe_node_t *node, long options, void *print_data); int pe__common_output_text(pcmk__output_t *out, const pe_resource_t *rsc, const char *name, const pe_node_t *node, unsigned int options); int pe__common_output_html(pcmk__output_t *out, const pe_resource_t *rsc, const char *name, const pe_node_t *node, unsigned int options); pe_resource_t *pe__find_bundle_replica(const pe_resource_t *bundle, const pe_node_t *node); bool pe__bundle_needs_remote_name(pe_resource_t *rsc); const char *pe__add_bundle_remote_name(pe_resource_t *rsc, pe_working_set_t *data_set, xmlNode *xml, const char *field); const char *pe_node_attribute_calculated(const pe_node_t *node, const char *name, const pe_resource_t *rsc); const char *pe_node_attribute_raw(const pe_node_t *node, const char *name); bool pe__is_universal_clone(const pe_resource_t *rsc, const pe_working_set_t *data_set); void pe__add_param_check(const xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set); void pe__foreach_param_check(pe_working_set_t *data_set, void (*cb)(pe_resource_t*, pe_node_t*, const xmlNode*, enum pe_check_parameters)); void pe__free_param_checks(pe_working_set_t *data_set); bool pe__shutdown_requested(const pe_node_t *node); void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set); /*! * \internal * \brief Register xml formatting message functions. * * \param[in,out] out Output object to register messages with */ void pe__register_messages(pcmk__output_t *out); void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pe_working_set_t *data_set); bool pe__resource_is_disabled(const pe_resource_t *rsc); pe_action_t *pe__clear_resource_history(pe_resource_t *rsc, const pe_node_t *node, pe_working_set_t *data_set); GList *pe__rscs_with_tag(pe_working_set_t *data_set, const char *tag_name); GList *pe__unames_with_tag(pe_working_set_t *data_set, const char *tag_name); bool pe__rsc_has_tag(pe_working_set_t *data_set, const char *rsc, const char *tag); bool pe__uname_has_tag(pe_working_set_t *data_set, const char *node, const char *tag); bool pe__rsc_running_on_any(pe_resource_t *rsc, GList *node_list); GList *pe__filter_rsc_list(GList *rscs, GList *filter); GList * pe__build_node_name_list(pe_working_set_t *data_set, const char *s); GList * pe__build_rsc_list(pe_working_set_t *data_set, const char *s); bool pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GList *only_node); gboolean pe__bundle_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent); gboolean pe__clone_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent); gboolean pe__group_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent); gboolean pe__native_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent); xmlNode *pe__failed_probe_for_rsc(const pe_resource_t *rsc, const char *name); const char *pe__clone_child_id(const pe_resource_t *rsc); int pe__sum_node_health_scores(const pe_node_t *node, int base_health); int pe__node_health(pe_node_t *node); static inline enum pcmk__health_strategy pe__health_strategy(pe_working_set_t *data_set) { return pcmk__parse_health_strategy(pe_pref(data_set->config_hash, PCMK__OPT_NODE_HEALTH_STRATEGY)); } static inline int pe__health_score(const char *option, pe_working_set_t *data_set) { return char2score(pe_pref(data_set->config_hash, option)); } /*! * \internal * \brief Return a string suitable for logging as a node name * * \param[in] node Node to return a node name string for * * \return Node name if available, otherwise node ID if available, * otherwise "unspecified node" if node is NULL or "unidentified node" * if node has neither a name nor ID. */ static inline const char * pe__node_name(const pe_node_t *node) { if (node == NULL) { return "unspecified node"; } else if (node->details->uname != NULL) { return node->details->uname; } else if (node->details->id != NULL) { return node->details->id; } else { return "unidentified node"; } } /*! * \internal * \brief Check whether two node objects refer to the same node * * \param[in] node1 First node object to compare * \param[in] node2 Second node object to compare * * \return true if \p node1 and \p node2 refer to the same node */ static inline bool pe__same_node(const pe_node_t *node1, const pe_node_t *node2) { return (node1 != NULL) && (node2 != NULL) && (node1->details == node2->details); } #endif diff --git a/lib/pacemaker/Makefile.am b/lib/pacemaker/Makefile.am index 0e199059be..6f13c46185 100644 --- a/lib/pacemaker/Makefile.am +++ b/lib/pacemaker/Makefile.am @@ -1,68 +1,69 @@ # -# Copyright 2004-2022 the Pacemaker project contributors +# Copyright 2004-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/mk/common.mk AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir) noinst_HEADERS = libpacemaker_private.h ## libraries lib_LTLIBRARIES = libpacemaker.la ## SOURCES libpacemaker_la_LDFLAGS = -version-info 6:0:5 libpacemaker_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libpacemaker_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libpacemaker_la_LIBADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/common/libcrmcommon.la # -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version # Use += rather than backlashed continuation lines for parsing by bumplibs libpacemaker_la_SOURCES = libpacemaker_la_SOURCES += pcmk_acl.c libpacemaker_la_SOURCES += pcmk_cluster_queries.c libpacemaker_la_SOURCES += pcmk_fence.c libpacemaker_la_SOURCES += pcmk_graph_consumer.c libpacemaker_la_SOURCES += pcmk_graph_logging.c libpacemaker_la_SOURCES += pcmk_graph_producer.c libpacemaker_la_SOURCES += pcmk_injections.c libpacemaker_la_SOURCES += pcmk_output.c libpacemaker_la_SOURCES += pcmk_resource.c libpacemaker_la_SOURCES += pcmk_result_code.c libpacemaker_la_SOURCES += pcmk_rule.c libpacemaker_la_SOURCES += pcmk_sched_actions.c -libpacemaker_la_SOURCES += pcmk_sched_allocate.c libpacemaker_la_SOURCES += pcmk_sched_bundle.c libpacemaker_la_SOURCES += pcmk_sched_clone.c libpacemaker_la_SOURCES += pcmk_sched_colocation.c libpacemaker_la_SOURCES += pcmk_sched_constraints.c libpacemaker_la_SOURCES += pcmk_sched_fencing.c libpacemaker_la_SOURCES += pcmk_sched_group.c +libpacemaker_la_SOURCES += pcmk_sched_instances.c libpacemaker_la_SOURCES += pcmk_sched_location.c libpacemaker_la_SOURCES += pcmk_sched_migration.c libpacemaker_la_SOURCES += pcmk_sched_nodes.c libpacemaker_la_SOURCES += pcmk_sched_ordering.c libpacemaker_la_SOURCES += pcmk_sched_primitive.c libpacemaker_la_SOURCES += pcmk_sched_probes.c libpacemaker_la_SOURCES += pcmk_sched_promotable.c libpacemaker_la_SOURCES += pcmk_sched_recurring.c libpacemaker_la_SOURCES += pcmk_sched_remote.c libpacemaker_la_SOURCES += pcmk_sched_resource.c libpacemaker_la_SOURCES += pcmk_sched_tickets.c libpacemaker_la_SOURCES += pcmk_sched_utilization.c +libpacemaker_la_SOURCES += pcmk_scheduler.c libpacemaker_la_SOURCES += pcmk_simulate.c libpacemaker_la_SOURCES += pcmk_status.c diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h index 801cc92cf6..6f38f58cd2 100644 --- a/lib/pacemaker/libpacemaker_private.h +++ b/lib/pacemaker/libpacemaker_private.h @@ -1,830 +1,837 @@ /* * Copyright 2021-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__LIBPACEMAKER_PRIVATE__H # define PCMK__LIBPACEMAKER_PRIVATE__H /* This header is for the sole use of libpacemaker, so that functions can be * declared with G_GNUC_INTERNAL for efficiency. */ #include // pe_action_t, pe_node_t, pe_working_set_t // Flags to modify the behavior of pcmk__add_colocated_node_scores() enum pcmk__coloc_select { // With no other flags, apply all "with this" colocations pcmk__coloc_select_default = 0, // Apply "this with" colocations instead of "with this" colocations pcmk__coloc_select_this_with = (1 << 0), // Apply only colocations with non-negative scores pcmk__coloc_select_nonnegative = (1 << 1), // Apply only colocations with at least one matching node pcmk__coloc_select_active = (1 << 2), }; // Flags the update_ordered_actions() method can return enum pcmk__updated { pcmk__updated_none = 0, // Nothing changed pcmk__updated_first = (1 << 0), // First action was updated pcmk__updated_then = (1 << 1), // Then action was updated }; #define pcmk__set_updated_flags(au_flags, action, flags_to_set) do { \ au_flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Action update", \ (action)->uuid, au_flags, \ (flags_to_set), #flags_to_set); \ } while (0) #define pcmk__clear_updated_flags(au_flags, action, flags_to_clear) do { \ au_flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Action update", \ (action)->uuid, au_flags, \ (flags_to_clear), #flags_to_clear); \ } while (0) // Resource allocation methods struct resource_alloc_functions_s { /*! * \internal * \brief Assign a resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * * \return Node that \p rsc is assigned to, if assigned entirely to one node */ pe_node_t *(*assign)(pe_resource_t *rsc, const pe_node_t *prefer); /*! * \internal * \brief Create all actions needed for a given resource * * \param[in,out] rsc Resource to create actions for */ void (*create_actions)(pe_resource_t *rsc); /*! * \internal * \brief Schedule any probes needed for a resource on a node * * \param[in,out] rsc Resource to create probe for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool (*create_probe)(pe_resource_t *rsc, pe_node_t *node); /*! * \internal * \brief Create implicit constraints needed for a resource * * \param[in,out] rsc Resource to create implicit constraints for */ void (*internal_constraints)(pe_resource_t *rsc); /*! * \internal * \brief Apply a colocation's score to node weights or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node weights (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void (*apply_coloc_score) (pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); /*! * \internal * \brief Create list of all resources in colocations with a given resource * * Given a resource, create a list of all resources involved in mandatory * colocations with it, whether directly or indirectly via chained colocations. * * \param[in] rsc Resource to add to colocated list * \param[in] orig_rsc Resource originally requested * \param[in,out] colocated_rscs Existing list * * \return List of given resource and all resources involved in colocations * * \note This function is recursive; top-level callers should pass NULL as * \p colocated_rscs and \p orig_rsc, and the desired resource as * \p rsc. The recursive calls will use other values. */ GList *(*colocated_resources)(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *colocated_rscs); /*! * \internal * \brief Apply a location constraint to a resource's allowed node scores * * \param[in,out] rsc Resource to apply constraint to * \param[in,out] location Location constraint to apply */ void (*apply_location)(pe_resource_t *rsc, pe__location_t *location); /*! * \internal * \brief Return action flags for a given resource action * * \param[in,out] action Action to get flags for * \param[in] node If not NULL, limit effects to this node * * \return Flags appropriate to \p action on \p node * \note For primitives, this will be the same as action->flags regardless * of node. For collective resources, the flags can differ due to * multiple instances possibly being involved. */ enum pe_action_flags (*action_flags)(pe_action_t *action, const pe_node_t *node); /*! * \internal * \brief Update two actions according to an ordering between them * * Given information about an ordering of two actions, update the actions' * flags (and runnable_before members if appropriate) as appropriate for the * ordering. In some cases, the ordering could be disabled as well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this * node (only used when interleaving instances) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates * (may include pe_action_optional to affect only * mandatory actions, and pe_action_runnable to * affect only runnable actions) * \param[in] type Group of enum pe_ordering flags to apply * \param[in,out] data_set Cluster working set * * \return Group of enum pcmk__updated flags indicating what was updated */ uint32_t (*update_ordered_actions)(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set); void (*output_actions)(pe_resource_t *rsc); /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void (*add_actions_to_graph)(pe_resource_t *rsc); /*! * \internal * \brief Add meta-attributes relevant to transition graph actions to XML * * If a given resource supports variant-specific meta-attributes that are * needed for transition graph actions, add them to a given XML element. * * \param[in] rsc Resource whose meta-attributes should be added * \param[in,out] xml Transition graph action attributes XML to add to */ void (*add_graph_meta)(const pe_resource_t *rsc, xmlNode *xml); /*! * \internal * \brief Add a resource's utilization to a table of utilization values * * This function is used when summing the utilization of a resource and all * resources colocated with it, to determine whether a node has sufficient * capacity. Given a resource and a table of utilization values, it will add * the resource's utilization to the existing values, if the resource has * not yet been allocated to a node. * * \param[in] rsc Resource with utilization to add * \param[in] orig_rsc Resource being allocated (for logging only) * \param[in] all_rscs List of all resources that will be summed * \param[in,out] utilization Table of utilization values to add to */ void (*add_utilization)(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization); /*! * \internal * \brief Apply a shutdown lock for a resource, if appropriate * * \param[in,out] rsc Resource to check for shutdown lock */ void (*shutdown_lock)(pe_resource_t *rsc); }; // Actions (pcmk_sched_actions.c) G_GNUC_INTERNAL void pcmk__update_action_for_orderings(pe_action_t *action, pe_working_set_t *data_set); G_GNUC_INTERNAL uint32_t pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__log_action(const char *pre_text, const pe_action_t *action, bool details); G_GNUC_INTERNAL pe_action_t *pcmk__new_cancel_action(pe_resource_t *rsc, const char *name, guint interval_ms, const pe_node_t *node); G_GNUC_INTERNAL pe_action_t *pcmk__new_shutdown_action(pe_node_t *node); G_GNUC_INTERNAL bool pcmk__action_locks_rsc_to_node(const pe_action_t *action); G_GNUC_INTERNAL void pcmk__deduplicate_action_inputs(pe_action_t *action); G_GNUC_INTERNAL void pcmk__output_actions(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node, const xmlNode *xml_op); G_GNUC_INTERNAL void pcmk__handle_rsc_config_changes(pe_working_set_t *data_set); // Recurring actions (pcmk_sched_recurring.c) G_GNUC_INTERNAL void pcmk__create_recurring_actions(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__schedule_cancel(pe_resource_t *rsc, const char *call_id, const char *task, guint interval_ms, const pe_node_t *node, const char *reason); G_GNUC_INTERNAL void pcmk__reschedule_recurring(pe_resource_t *rsc, const char *task, guint interval_ms, pe_node_t *node); G_GNUC_INTERNAL bool pcmk__action_is_recurring(const pe_action_t *action); // Producing transition graphs (pcmk_graph_producer.c) G_GNUC_INTERNAL bool pcmk__graph_has_loop(const pe_action_t *init_action, const pe_action_t *action, pe_action_wrapper_t *input); G_GNUC_INTERNAL void pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__create_graph(pe_working_set_t *data_set); // Fencing (pcmk_sched_fencing.c) G_GNUC_INTERNAL void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_vs_unfence(const pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order); G_GNUC_INTERNAL void pcmk__fence_guest(pe_node_t *node); G_GNUC_INTERNAL bool pcmk__node_unfenced(const pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data); // Injected scheduler inputs (pcmk_sched_injections.c) void pcmk__inject_scheduler_input(pe_working_set_t *data_set, cib_t *cib, const pcmk_injections_t *injections); // Constraints of any type (pcmk_sched_constraints.c) G_GNUC_INTERNAL pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id); G_GNUC_INTERNAL xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj, const pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__valid_resource_or_tag(const pe_working_set_t *data_set, const char *id, pe_resource_t **rsc, pe_tag_t **tag); G_GNUC_INTERNAL bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr, bool convert_rsc, const pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__create_internal_constraints(pe_working_set_t *data_set); // Location constraints G_GNUC_INTERNAL void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc, int node_weight, const char *discover_mode, pe_node_t *foo_node, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_locations(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__apply_location(pe_resource_t *rsc, pe__location_t *constraint); // Colocation constraints (pcmk_sched_colocation.c) enum pcmk__coloc_affects { pcmk__coloc_affects_nothing = 0, pcmk__coloc_affects_location, pcmk__coloc_affects_role, }; G_GNUC_INTERNAL enum pcmk__coloc_affects pcmk__colocation_affects(const pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool preview); G_GNUC_INTERNAL void pcmk__apply_coloc_to_weights(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__apply_coloc_to_priority(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__add_colocated_node_scores(pe_resource_t *rsc, const char *log_id, GHashTable **nodes, const char *attr, float factor, uint32_t flags); G_GNUC_INTERNAL void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__add_this_with(pe_resource_t *rsc, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__add_with_this(pe_resource_t *rsc, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__new_colocation(const char *id, const char *node_attr, int score, pe_resource_t *dependent, pe_resource_t *primary, const char *dependent_role, const char *primary_role, bool influence, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__block_colocation_dependents(pe_action_t *action, pe_working_set_t *data_set); /*! * \internal * \brief Check whether colocation's dependent preferences should be considered * * \param[in] colocation Colocation constraint * \param[in] rsc Primary instance (normally this will be * colocation->primary, which NULL will be treated as, * but for clones or bundles with multiple instances * this can be a particular instance) * * \return true if colocation influence should be effective, otherwise false */ static inline bool pcmk__colocation_has_influence(const pcmk__colocation_t *colocation, const pe_resource_t *rsc) { if (rsc == NULL) { rsc = colocation->primary; } /* A bundle replica colocates its remote connection with its container, * using a finite score so that the container can run on Pacemaker Remote * nodes. * * Moving a connection is lightweight and does not interrupt the service, * while moving a container is heavyweight and does interrupt the service, * so don't move a clean, active container based solely on the preferences * of its connection. * * This also avoids problematic scenarios where two containers want to * perpetually swap places. */ if (pcmk_is_set(colocation->dependent->flags, pe_rsc_allow_remote_remotes) && !pcmk_is_set(rsc->flags, pe_rsc_failed) && pcmk__list_of_1(rsc->running_on)) { return false; } /* The dependent in a colocation influences the primary's location * if the influence option is true or the primary is not yet active. */ return colocation->influence || (rsc->running_on == NULL); } // Ordering constraints (pcmk_sched_ordering.c) G_GNUC_INTERNAL void pcmk__new_ordering(pe_resource_t *first_rsc, char *first_task, pe_action_t *first_action, pe_resource_t *then_rsc, char *then_task, pe_action_t *then_action, uint32_t flags, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__disable_invalid_orderings(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_stops_before_shutdown(pe_node_t *node, pe_action_t *shutdown_op); G_GNUC_INTERNAL void pcmk__apply_orderings(pe_working_set_t *data_set); G_GNUC_INTERNAL void pcmk__order_after_each(pe_action_t *after, GList *list); /*! * \internal * \brief Create a new ordering between two resource actions * * \param[in,out] first_rsc Resource for 'first' action * \param[in,out] first_task Action key for 'first' action * \param[in] then_rsc Resource for 'then' action * \param[in,out] then_task Action key for 'then' action * \param[in] flags Bitmask of enum pe_ordering flags */ #define pcmk__order_resource_actions(first_rsc, first_task, \ then_rsc, then_task, flags) \ pcmk__new_ordering((first_rsc), \ pcmk__op_key((first_rsc)->id, (first_task), 0), \ NULL, \ (then_rsc), \ pcmk__op_key((then_rsc)->id, (then_task), 0), \ NULL, (flags), (first_rsc)->cluster) #define pcmk__order_starts(rsc1, rsc2, flags) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \ (rsc2), CRMD_ACTION_START, (flags)) #define pcmk__order_stops(rsc1, rsc2, flags) \ pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \ (rsc2), CRMD_ACTION_STOP, (flags)) // Ticket constraints (pcmk_sched_tickets.c) G_GNUC_INTERNAL void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set); // Promotable clone resources (pcmk_sched_promotable.c) G_GNUC_INTERNAL void pcmk__add_promotion_scores(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__require_promotion_tickets(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__set_instance_roles(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__create_promotable_actions(pe_resource_t *clone); G_GNUC_INTERNAL void pcmk__promotable_restart_ordering(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__order_promotable_instances(pe_resource_t *clone); G_GNUC_INTERNAL void pcmk__update_dependent_with_promotable(const pe_resource_t *primary, pe_resource_t *dependent, const pcmk__colocation_t *colocation); G_GNUC_INTERNAL void pcmk__update_promotable_dependent_priority(const pe_resource_t *primary, pe_resource_t *dependent, const pcmk__colocation_t *colocation); // Pacemaker Remote nodes (pcmk_sched_remote.c) G_GNUC_INTERNAL bool pcmk__is_failed_remote_node(const pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_remote_connection_actions(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__rsc_corresponds_to_guest(const pe_resource_t *rsc, const pe_node_t *node); G_GNUC_INTERNAL pe_node_t *pcmk__connection_host_for_action(const pe_action_t *action); G_GNUC_INTERNAL void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params); G_GNUC_INTERNAL void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, const pe_action_t *action); // Primitives (pcmk_sched_primitive.c) G_GNUC_INTERNAL pe_node_t *pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer); G_GNUC_INTERNAL void pcmk__primitive_create_actions(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__primitive_internal_constraints(pe_resource_t *rsc); G_GNUC_INTERNAL enum pe_action_flags pcmk__primitive_action_flags(pe_action_t *action, const pe_node_t *node); G_GNUC_INTERNAL void pcmk__primitive_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); G_GNUC_INTERNAL void pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, bool optional); G_GNUC_INTERNAL void pcmk__primitive_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml); G_GNUC_INTERNAL void pcmk__primitive_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization); G_GNUC_INTERNAL void pcmk__primitive_shutdown_lock(pe_resource_t *rsc); // Groups (pcmk_sched_group.c) G_GNUC_INTERNAL pe_node_t *pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer); G_GNUC_INTERNAL void pcmk__group_create_actions(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__group_internal_constraints(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__group_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); G_GNUC_INTERNAL void pcmk__group_apply_location(pe_resource_t *rsc, pe__location_t *location); G_GNUC_INTERNAL enum pe_action_flags pcmk__group_action_flags(pe_action_t *action, const pe_node_t *node); G_GNUC_INTERNAL uint32_t pcmk__group_update_ordered_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set); G_GNUC_INTERNAL GList *pcmk__group_colocated_resources(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *colocated_rscs); G_GNUC_INTERNAL void pcmk__group_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization); G_GNUC_INTERNAL void pcmk__group_shutdown_lock(pe_resource_t *rsc); // Clones (pcmk_sched_clone.c) G_GNUC_INTERNAL void pcmk__clone_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); // Bundles (pcmk_sched_bundle.c) G_GNUC_INTERNAL void pcmk__bundle_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent); G_GNUC_INTERNAL void pcmk__output_bundle_actions(pe_resource_t *rsc); +// Clone instances or bundle replica containers (pcmk_sched_instances.c) + +G_GNUC_INTERNAL +void distribute_children(pe_resource_t *rsc, GList *children, int max, + int per_host_max, pe_working_set_t *data_set); + + // Injections (pcmk_injections.c) G_GNUC_INTERNAL xmlNode *pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid); G_GNUC_INTERNAL xmlNode *pcmk__inject_node_state_change(cib_t *cib_conn, const char *node, bool up); G_GNUC_INTERNAL xmlNode *pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node, const char *resource, const char *lrm_name, const char *rclass, const char *rtype, const char *rprovider); G_GNUC_INTERNAL void pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node, const char *resource, const char *task, guint interval_ms, int rc); G_GNUC_INTERNAL xmlNode *pcmk__inject_action_result(xmlNode *cib_resource, lrmd_event_data_t *op, int target_rc); // Nodes (pcmk_sched_nodes.c) G_GNUC_INTERNAL bool pcmk__node_available(const pe_node_t *node, bool consider_score, bool consider_guest); G_GNUC_INTERNAL bool pcmk__any_node_available(GHashTable *nodes); G_GNUC_INTERNAL GHashTable *pcmk__copy_node_table(GHashTable *nodes); G_GNUC_INTERNAL GList *pcmk__sort_nodes(GList *nodes, pe_node_t *active_node); G_GNUC_INTERNAL void pcmk__apply_node_health(pe_working_set_t *data_set); G_GNUC_INTERNAL pe_node_t *pcmk__top_allowed_node(const pe_resource_t *rsc, const pe_node_t *node); // Functions applying to more than one variant (pcmk_sched_resource.c) G_GNUC_INTERNAL void pcmk__set_allocation_methods(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node, const xmlNode *rsc_entry, bool active_on_node); G_GNUC_INTERNAL GList *pcmk__rscs_matching_id(const char *id, const pe_working_set_t *data_set); G_GNUC_INTERNAL GList *pcmk__colocated_resources(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *colocated_rscs); G_GNUC_INTERNAL void pcmk__noop_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml); G_GNUC_INTERNAL void pcmk__output_resource_actions(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__finalize_assignment(pe_resource_t *rsc, pe_node_t *chosen, bool force); G_GNUC_INTERNAL bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force); G_GNUC_INTERNAL void pcmk__unassign_resource(pe_resource_t *rsc); G_GNUC_INTERNAL bool pcmk__threshold_reached(pe_resource_t *rsc, const pe_node_t *node, pe_resource_t **failed); G_GNUC_INTERNAL void pcmk__sort_resources(pe_working_set_t *data_set); G_GNUC_INTERNAL gint pcmk__cmp_instance(gconstpointer a, gconstpointer b); G_GNUC_INTERNAL gint pcmk__cmp_instance_number(gconstpointer a, gconstpointer b); // Functions related to probes (pcmk_sched_probes.c) G_GNUC_INTERNAL bool pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node); G_GNUC_INTERNAL void pcmk__order_probes(pe_working_set_t *data_set); G_GNUC_INTERNAL bool pcmk__probe_resource_list(GList *rscs, pe_node_t *node); G_GNUC_INTERNAL void pcmk__schedule_probes(pe_working_set_t *data_set); // Functions related to live migration (pcmk_sched_migration.c) void pcmk__create_migration_actions(pe_resource_t *rsc, const pe_node_t *current); void pcmk__abort_dangling_migration(void *data, void *user_data); bool pcmk__rsc_can_migrate(const pe_resource_t *rsc, const pe_node_t *current); void pcmk__order_migration_equivalents(pe__ordering_t *order); // Functions related to node utilization (pcmk_sched_utilization.c) G_GNUC_INTERNAL int pcmk__compare_node_capacities(const pe_node_t *node1, const pe_node_t *node2); G_GNUC_INTERNAL void pcmk__consume_node_capacity(GHashTable *current_utilization, const pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__release_node_capacity(GHashTable *current_utilization, const pe_resource_t *rsc); G_GNUC_INTERNAL const pe_node_t *pcmk__ban_insufficient_capacity(pe_resource_t *rsc); G_GNUC_INTERNAL void pcmk__create_utilization_constraints(pe_resource_t *rsc, const GList *allowed_nodes); G_GNUC_INTERNAL void pcmk__show_node_capacities(const char *desc, pe_working_set_t *data_set); #endif // PCMK__LIBPACEMAKER_PRIVATE__H diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c index e5b9be6a78..cb04ad4f1c 100644 --- a/lib/pacemaker/pcmk_sched_bundle.c +++ b/lib/pacemaker/pcmk_sched_bundle.c @@ -1,1156 +1,1149 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include "libpacemaker_private.h" #define PE__VARIANT_BUNDLE 1 #include static bool is_bundle_node(pe__bundle_variant_data_t *data, pe_node_t *node) { for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (node->details == replica->node->details) { return TRUE; } } return FALSE; } -void distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, - int max, int per_host_max, pe_working_set_t * data_set); - static GList * get_container_list(const pe_resource_t *rsc) { GList *containers = NULL; if (rsc->variant == pe_container) { pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; containers = g_list_append(containers, replica->container); } } return containers; } static inline GList * get_containers_or_children(const pe_resource_t *rsc) { return (rsc->variant == pe_container)? get_container_list(rsc) : rsc->children; } /*! * \internal * \brief Assign a bundle resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * * \return Node that \p rsc is assigned to, if assigned entirely to one node */ pe_node_t * pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer) { GList *containers = NULL; - GList *nodes = NULL; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return NULL); get_bundle_variant_data(bundle_data, rsc); pe__set_resource_flags(rsc, pe_rsc_allocating); containers = get_container_list(rsc); pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, rsc->cluster); - nodes = g_hash_table_get_values(rsc->allowed_nodes); - nodes = pcmk__sort_nodes(nodes, NULL); containers = g_list_sort(containers, pcmk__cmp_instance); - distribute_children(rsc, containers, nodes, bundle_data->nreplicas, + distribute_children(rsc, containers, bundle_data->nreplicas, bundle_data->nreplicas_per_host, rsc->cluster); - g_list_free(nodes); g_list_free(containers); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; pe_node_t *container_host = NULL; CRM_ASSERT(replica); if (replica->ip) { pe_rsc_trace(rsc, "Allocating bundle %s IP %s", rsc->id, replica->ip->id); replica->ip->cmds->assign(replica->ip, prefer); } container_host = replica->container->allocated_to; if (replica->remote && pe__is_guest_or_remote_node(container_host)) { /* We need 'nested' connection resources to be on the same * host because pacemaker-remoted only supports a single * active connection */ pcmk__new_colocation("child-remote-with-docker-remote", NULL, INFINITY, replica->remote, container_host->details->remote_rsc, NULL, NULL, true, rsc->cluster); } if (replica->remote) { pe_rsc_trace(rsc, "Allocating bundle %s connection %s", rsc->id, replica->remote->id); replica->remote->cmds->assign(replica->remote, prefer); } // Explicitly allocate replicas' children before bundle child if (replica->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, replica->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (node->details != replica->node->details) { node->weight = -INFINITY; } else if (!pcmk__threshold_reached(replica->child, node, NULL)) { node->weight = INFINITY; } } pe__set_resource_flags(replica->child->parent, pe_rsc_allocating); pe_rsc_trace(rsc, "Allocating bundle %s replica child %s", rsc->id, replica->child->id); replica->child->cmds->assign(replica->child, replica->node); pe__clear_resource_flags(replica->child->parent, pe_rsc_allocating); } } if (bundle_data->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, bundle_data->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (is_bundle_node(bundle_data, node)) { node->weight = 0; } else { node->weight = -INFINITY; } } pe_rsc_trace(rsc, "Allocating bundle %s child %s", rsc->id, bundle_data->child->id); bundle_data->child->cmds->assign(bundle_data->child, prefer); } pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional); return NULL; } void pcmk__bundle_create_actions(pe_resource_t *rsc) { pe_action_t *action = NULL; GList *containers = NULL; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); containers = get_container_list(rsc); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->ip) { replica->ip->cmds->create_actions(replica->ip); } if (replica->container) { replica->container->cmds->create_actions(replica->container); } if (replica->remote) { replica->remote->cmds->create_actions(replica->remote); } } clone_create_pseudo_actions(rsc, containers, NULL, NULL); if (bundle_data->child) { bundle_data->child->cmds->create_actions(bundle_data->child); if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) { /* promote */ pe__new_rsc_pseudo_action(rsc, RSC_PROMOTE, true, true); action = pe__new_rsc_pseudo_action(rsc, RSC_PROMOTED, true, true); action->priority = INFINITY; /* demote */ pe__new_rsc_pseudo_action(rsc, RSC_DEMOTE, true, true); action = pe__new_rsc_pseudo_action(rsc, RSC_DEMOTED, true, true); action->priority = INFINITY; } } g_list_free(containers); } void pcmk__bundle_internal_constraints(pe_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); if (bundle_data->child) { pcmk__order_resource_actions(rsc, RSC_START, bundle_data->child, RSC_START, pe_order_implies_first_printed); pcmk__order_resource_actions(rsc, RSC_STOP, bundle_data->child, RSC_STOP, pe_order_implies_first_printed); if (bundle_data->child->children) { pcmk__order_resource_actions(bundle_data->child, RSC_STARTED, rsc, RSC_STARTED, pe_order_implies_then_printed); pcmk__order_resource_actions(bundle_data->child, RSC_STOPPED, rsc, RSC_STOPPED, pe_order_implies_then_printed); } else { pcmk__order_resource_actions(bundle_data->child, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed); pcmk__order_resource_actions(bundle_data->child, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed); } } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); CRM_ASSERT(replica->container); replica->container->cmds->internal_constraints(replica->container); pcmk__order_starts(rsc, replica->container, pe_order_runnable_left|pe_order_implies_first_printed); if (replica->child) { pcmk__order_stops(rsc, replica->child, pe_order_implies_first_printed); } pcmk__order_stops(rsc, replica->container, pe_order_implies_first_printed); pcmk__order_resource_actions(replica->container, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed); pcmk__order_resource_actions(replica->container, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed); if (replica->ip) { replica->ip->cmds->internal_constraints(replica->ip); // Start IP then container pcmk__order_starts(replica->ip, replica->container, pe_order_runnable_left|pe_order_preserve); pcmk__order_stops(replica->container, replica->ip, pe_order_implies_first|pe_order_preserve); pcmk__new_colocation("ip-with-docker", NULL, INFINITY, replica->ip, replica->container, NULL, NULL, true, rsc->cluster); } if (replica->remote) { /* This handles ordering and colocating remote relative to container * (via "resource-with-container"). Since IP is also ordered and * colocated relative to the container, we don't need to do anything * explicit here with IP. */ replica->remote->cmds->internal_constraints(replica->remote); } if (replica->child) { CRM_ASSERT(replica->remote); // "Start remote then child" is implicit in scheduler's remote logic } } if (bundle_data->child) { bundle_data->child->cmds->internal_constraints(bundle_data->child); if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) { pcmk__promotable_restart_ordering(rsc); /* child demoted before global demoted */ pcmk__order_resource_actions(bundle_data->child, RSC_DEMOTED, rsc, RSC_DEMOTED, pe_order_implies_then_printed); /* global demote before child demote */ pcmk__order_resource_actions(rsc, RSC_DEMOTE, bundle_data->child, RSC_DEMOTE, pe_order_implies_first_printed); /* child promoted before global promoted */ pcmk__order_resource_actions(bundle_data->child, RSC_PROMOTED, rsc, RSC_PROMOTED, pe_order_implies_then_printed); /* global promote before child promote */ pcmk__order_resource_actions(rsc, RSC_PROMOTE, bundle_data->child, RSC_PROMOTE, pe_order_implies_first_printed); } } } static pe_resource_t * compatible_replica_for_node(const pe_resource_t *rsc_lh, const pe_node_t *candidate, const pe_resource_t *rsc, enum rsc_role_e filter, gboolean current) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(candidate != NULL, return NULL); get_bundle_variant_data(bundle_data, rsc); crm_trace("Looking for compatible child from %s for %s on %s", rsc_lh->id, rsc->id, pe__node_name(candidate)); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (is_child_compatible(replica->container, candidate, filter, current)) { crm_trace("Pairing %s with %s on %s", rsc_lh->id, replica->container->id, pe__node_name(candidate)); return replica->container; } } crm_trace("Can't pair %s with %s", rsc_lh->id, rsc->id); return NULL; } static pe_resource_t * compatible_replica(const pe_resource_t *rsc_lh, const pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set) { GList *scratch = NULL; pe_resource_t *pair = NULL; pe_node_t *active_node_lh = NULL; active_node_lh = rsc_lh->fns->location(rsc_lh, NULL, current); if (active_node_lh) { return compatible_replica_for_node(rsc_lh, active_node_lh, rsc, filter, current); } scratch = g_hash_table_get_values(rsc_lh->allowed_nodes); scratch = pcmk__sort_nodes(scratch, NULL); for (GList *gIter = scratch; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pair = compatible_replica_for_node(rsc_lh, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", rsc_lh->id, (rsc? rsc->id : "none")); done: g_list_free(scratch); return pair; } int copies_per_node(pe_resource_t * rsc) { /* Strictly speaking, there should be a 'copies_per_node' addition * to the resource function table and each case would be a * function. However that would be serious overkill to return an * int. In fact, it seems to me that both function tables * could/should be replaced by resources.{c,h} full of * rsc_{some_operation} functions containing a switch as below * which calls out to functions named {variant}_{some_operation} * as needed. */ switch(rsc->variant) { case pe_unknown: return 0; case pe_native: case pe_group: return 1; case pe_clone: { const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX); if (max_clones_node == NULL) { return 1; } else { int max_i; pcmk__scan_min_int(max_clones_node, &max_i, 0); return max_i; } } case pe_container: { pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); return data->nreplicas_per_host; } } return 0; } /*! * \internal * \brief Apply a colocation's score to node weights or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node weights (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void pcmk__bundle_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent) { GList *allocated_primaries = NULL; pe__bundle_variant_data_t *bundle_data = NULL; /* This should never be called for the bundle itself as a dependent. * Instead, we add its colocation constraints to its replicas and call the * apply_coloc_score() for the replicas as dependents. */ CRM_ASSERT(!for_dependent); CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL), return); CRM_ASSERT(dependent->variant == pe_native); if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { pe_rsc_trace(primary, "%s is still provisional", primary->id); return; } else if (colocation->dependent->variant > pe_group) { pe_resource_t *primary_replica = compatible_replica(dependent, primary, RSC_ROLE_UNKNOWN, FALSE, dependent->cluster); if (primary_replica) { pe_rsc_debug(primary, "Pairing %s with %s", dependent->id, primary_replica->id); dependent->cmds->apply_coloc_score(dependent, primary_replica, colocation, true); } else if (colocation->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", dependent->id, primary->id); pcmk__assign_resource(dependent, NULL, true); } else { pe_rsc_debug(primary, "Cannot pair %s with instance of %s", dependent->id, primary->id); } return; } get_bundle_variant_data(bundle_data, primary); pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d", colocation->id, dependent->id, primary->id, colocation->score); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (colocation->score < INFINITY) { replica->container->cmds->apply_coloc_score(dependent, replica->container, colocation, false); } else { pe_node_t *chosen = replica->container->fns->location(replica->container, NULL, FALSE); if ((chosen == NULL) || is_set_recursive(replica->container, pe_rsc_block, TRUE)) { continue; } if ((colocation->primary_role >= RSC_ROLE_PROMOTED) && (replica->child == NULL)) { continue; } if ((colocation->primary_role >= RSC_ROLE_PROMOTED) && (replica->child->next_role < RSC_ROLE_PROMOTED)) { continue; } pe_rsc_trace(primary, "Allowing %s: %s %d", colocation->id, pe__node_name(chosen), chosen->weight); allocated_primaries = g_list_prepend(allocated_primaries, chosen); } } if (colocation->score >= INFINITY) { node_list_exclude(dependent->allowed_nodes, allocated_primaries, FALSE); } g_list_free(allocated_primaries); } enum pe_action_flags pcmk__bundle_action_flags(pe_action_t *action, const pe_node_t *node) { GList *containers = NULL; enum pe_action_flags flags = 0; pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, action->rsc); if(data->child) { enum action_tasks task = get_complex_task(data->child, action->task, TRUE); switch(task) { case no_action: case action_notify: case action_notified: case action_promote: case action_promoted: case action_demote: case action_demoted: return summary_action_flags(action, data->child->children, node); default: break; } } containers = get_container_list(action->rsc); flags = summary_action_flags(action, containers, node); g_list_free(containers); return flags; } pe_resource_t * find_compatible_child_by_node(const pe_resource_t *local_child, const pe_node_t *local_node, const pe_resource_t *rsc, enum rsc_role_e filter, gboolean current) { GList *gIter = NULL; GList *children = NULL; if (local_node == NULL) { crm_err("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id); return NULL; } crm_trace("Looking for compatible child from %s for %s on %s", local_child->id, rsc->id, pe__node_name(local_node)); children = get_containers_or_children(rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if(is_child_compatible(child_rsc, local_node, filter, current)) { crm_trace("Pairing %s with %s on %s", local_child->id, child_rsc->id, pe__node_name(local_node)); return child_rsc; } } crm_trace("Can't pair %s with %s", local_child->id, rsc->id); if(children != rsc->children) { g_list_free(children); } return NULL; } static pe__bundle_replica_t * replica_for_container(const pe_resource_t *rsc, const pe_resource_t *container, const pe_node_t *node) { if (rsc->variant == pe_container) { const pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (replica->child && (container == replica->container) && pe__same_node(node, replica->node)) { return replica; } } } return NULL; } static uint32_t multi_update_interleave_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t filter, uint32_t type, pe_working_set_t *data_set) { GList *gIter = NULL; GList *children = NULL; gboolean current = FALSE; uint32_t changed = pcmk__updated_none; /* Fix this - lazy */ if (pcmk__ends_with(first->uuid, "_stopped_0") || pcmk__ends_with(first->uuid, "_demoted_0")) { current = TRUE; } children = get_containers_or_children(then->rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *then_child = gIter->data; pe_resource_t *first_child = find_compatible_child(then_child, first->rsc, RSC_ROLE_UNKNOWN, current); if (first_child == NULL && current) { crm_trace("Ignore"); } else if (first_child == NULL) { crm_debug("No match found for %s (%d / %s / %s)", then_child->id, current, first->uuid, then->uuid); /* Me no like this hack - but what else can we do? * * If there is no-one active or about to be active * on the same node as then_child, then they must * not be allowed to start */ if (pcmk_any_flags_set(type, pe_order_runnable_left|pe_order_implies_then) /* Mandatory */ ) { pe_rsc_info(then->rsc, "Inhibiting %s from being active", then_child->id); if (pcmk__assign_resource(then_child, NULL, true)) { pcmk__set_updated_flags(changed, first, pcmk__updated_then); } } } else { pe_action_t *first_action = NULL; pe_action_t *then_action = NULL; enum action_tasks task = clone_child_action(first); const char *first_task = task2text(task); pe__bundle_replica_t *first_replica = NULL; pe__bundle_replica_t *then_replica = NULL; first_replica = replica_for_container(first->rsc, first_child, node); if (strstr(first->task, "stop") && first_replica && first_replica->child) { /* Except for 'stopped' we should be looking at the * in-container resource, actions for the child will * happen later and are therefor more likely to align * with the user's intent. */ first_action = find_first_action(first_replica->child->actions, NULL, task2text(task), node); } else { first_action = find_first_action(first_child->actions, NULL, task2text(task), node); } then_replica = replica_for_container(then->rsc, then_child, node); if (strstr(then->task, "mote") && then_replica && then_replica->child) { /* Promote/demote actions will never be found for the * container resource, look in the child instead * * Alternatively treat: * 'XXXX then promote YYYY' as 'XXXX then start container for YYYY', and * 'demote XXXX then stop YYYY' as 'stop container for XXXX then stop YYYY' */ then_action = find_first_action(then_replica->child->actions, NULL, then->task, node); } else { then_action = find_first_action(then_child->actions, NULL, then->task, node); } if (first_action == NULL) { if (!pcmk_is_set(first_child->flags, pe_rsc_orphan) && !pcmk__str_any_of(first_task, RSC_STOP, RSC_DEMOTE, NULL)) { crm_err("Internal error: No action found for %s in %s (first)", first_task, first_child->id); } else { crm_trace("No action found for %s in %s%s (first)", first_task, first_child->id, pcmk_is_set(first_child->flags, pe_rsc_orphan)? " (ORPHAN)" : ""); } continue; } /* We're only interested if 'then' is neither stopping nor being demoted */ if (then_action == NULL) { if (!pcmk_is_set(then_child->flags, pe_rsc_orphan) && !pcmk__str_any_of(then->task, RSC_STOP, RSC_DEMOTE, NULL)) { crm_err("Internal error: No action found for %s in %s (then)", then->task, then_child->id); } else { crm_trace("No action found for %s in %s%s (then)", then->task, then_child->id, pcmk_is_set(then_child->flags, pe_rsc_orphan)? " (ORPHAN)" : ""); } continue; } if (order_actions(first_action, then_action, type)) { crm_debug("Created constraint for %s (%d) -> %s (%d) %.6x", first_action->uuid, pcmk_is_set(first_action->flags, pe_action_optional), then_action->uuid, pcmk_is_set(then_action->flags, pe_action_optional), type); pcmk__set_updated_flags(changed, first, pcmk__updated_first|pcmk__updated_then); } if(first_action && then_action) { changed |= then_child->cmds->update_ordered_actions(first_action, then_action, node, first_child->cmds->action_flags(first_action, node), filter, type, data_set); } else { crm_err("Nothing found either for %s (%p) or %s (%p) %s", first_child->id, first_action, then_child->id, then_action, task2text(task)); } } } if(children != then->rsc->children) { g_list_free(children); } return changed; } static bool can_interleave_actions(pe_action_t *first, pe_action_t *then) { bool interleave = FALSE; pe_resource_t *rsc = NULL; const char *interleave_s = NULL; if(first->rsc == NULL || then->rsc == NULL) { crm_trace("Not interleaving %s with %s (both must be resources)", first->uuid, then->uuid); return FALSE; } else if(first->rsc == then->rsc) { crm_trace("Not interleaving %s with %s (must belong to different resources)", first->uuid, then->uuid); return FALSE; } else if(first->rsc->variant < pe_clone || then->rsc->variant < pe_clone) { crm_trace("Not interleaving %s with %s (both sides must be clones or bundles)", first->uuid, then->uuid); return FALSE; } if (pcmk__ends_with(then->uuid, "_stop_0") || pcmk__ends_with(then->uuid, "_demote_0")) { rsc = first->rsc; } else { rsc = then->rsc; } interleave_s = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE); interleave = crm_is_true(interleave_s); crm_trace("Interleave %s -> %s: %s (based on %s)", first->uuid, then->uuid, interleave ? "yes" : "no", rsc->id); return interleave; } /*! * \internal * \brief Update two actions according to an ordering between them * * Given information about an ordering of two actions, update the actions' * flags (and runnable_before members if appropriate) as appropriate for the * ordering. In some cases, the ordering could be disabled as well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this node * (only used when interleaving instances) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates (may * include pe_action_optional to affect only mandatory * actions, and pe_action_runnable to affect only * runnable actions) * \param[in] type Group of enum pe_ordering flags to apply * \param[in,out] data_set Cluster working set * * \return Group of enum pcmk__updated flags indicating what was updated */ uint32_t pcmk__multi_update_actions(pe_action_t *first, pe_action_t *then, const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pe_working_set_t *data_set) { uint32_t changed = pcmk__updated_none; crm_trace("%s -> %s", first->uuid, then->uuid); if(can_interleave_actions(first, then)) { changed = multi_update_interleave_actions(first, then, node, filter, type, data_set); } else if(then->rsc) { GList *gIter = NULL; GList *children = NULL; // Handle the 'primitive' ordering case changed |= pcmk__update_ordered_actions(first, then, node, flags, filter, type, data_set); // Now any children (or containers in the case of a bundle) children = get_containers_or_children(then->rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *then_child = (pe_resource_t *) gIter->data; uint32_t then_child_changed = pcmk__updated_none; pe_action_t *then_child_action = find_first_action(then_child->actions, NULL, then->task, node); if (then_child_action) { uint32_t then_child_flags = then_child->cmds->action_flags(then_child_action, node); if (pcmk_is_set(then_child_flags, pe_action_runnable)) { then_child_changed |= then_child->cmds->update_ordered_actions(first, then_child_action, node, flags, filter, type, data_set); } changed |= then_child_changed; if (pcmk_is_set(then_child_changed, pcmk__updated_then)) { for (GList *lpc = then_child_action->actions_after; lpc != NULL; lpc = lpc->next) { pe_action_wrapper_t *next = (pe_action_wrapper_t *) lpc->data; pcmk__update_action_for_orderings(next->action, data_set); } } } } if(children != then->rsc->children) { g_list_free(children); } } return changed; } void pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); pcmk__apply_location(rsc, constraint); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (replica->container) { replica->container->cmds->apply_location(replica->container, constraint); } if (replica->ip) { replica->ip->cmds->apply_location(replica->ip, constraint); } } if (bundle_data->child && ((constraint->role_filter == RSC_ROLE_UNPROMOTED) || (constraint->role_filter == RSC_ROLE_PROMOTED))) { bundle_data->child->cmds->apply_location(bundle_data->child, constraint); bundle_data->child->rsc_location = g_list_prepend(bundle_data->child->rsc_location, constraint); } } /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void pcmk__bundle_expand(pe_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); if (bundle_data->child) { bundle_data->child->cmds->add_actions_to_graph(bundle_data->child); } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->remote && replica->container && pe__bundle_needs_remote_name(replica->remote)) { /* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that * run pacemaker-remoted inside, without needing a separate IP for * the container. This is done by configuring the inner remote's * connection host as the magic string "#uname", then * replacing it with the underlying host when needed. */ xmlNode *nvpair = get_xpath_object("//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']", replica->remote->xml, LOG_ERR); const char *calculated_addr = NULL; // Replace the value in replica->remote->xml (if appropriate) calculated_addr = pe__add_bundle_remote_name(replica->remote, rsc->cluster, nvpair, "value"); if (calculated_addr) { /* Since this is for the bundle as a resource, and not any * particular action, replace the value in the default * parameters (not evaluated for node). create_graph_action() * will grab it from there to replace it in node-evaluated * parameters. */ GHashTable *params = pe_rsc_params(replica->remote, NULL, rsc->cluster); g_hash_table_replace(params, strdup(XML_RSC_ATTR_REMOTE_RA_ADDR), strdup(calculated_addr)); } else { /* The only way to get here is if the remote connection is * neither currently running nor scheduled to run. That means we * won't be doing any operations that require addr (only start * requires it; we additionally use it to compare digests when * unpacking status, promote, and migrate_from history, but * that's already happened by this point). */ crm_info("Unable to determine address for bundle %s remote connection", rsc->id); } } if (replica->ip) { replica->ip->cmds->add_actions_to_graph(replica->ip); } if (replica->container) { replica->container->cmds->add_actions_to_graph(replica->container); } if (replica->remote) { replica->remote->cmds->add_actions_to_graph(replica->remote); } } } /*! * \internal * * \brief Schedule any probes needed for a resource on a node * * \param[in,out] rsc Resource to create probe for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node) { bool any_created = false; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return false); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if ((replica->ip != NULL) && replica->ip->cmds->create_probe(replica->ip, node)) { any_created = true; } if ((replica->child != NULL) && (node->details == replica->node->details) && replica->child->cmds->create_probe(replica->child, node)) { any_created = true; } if ((replica->container != NULL) && replica->container->cmds->create_probe(replica->container, node)) { any_created = true; /* If we're limited to one replica per host (due to * the lack of an IP range probably), then we don't * want any of our peer containers starting until * we've established that no other copies are already * running. * * Partly this is to ensure that nreplicas_per_host is * observed, but also to ensure that the containers * don't fail to start because the necessary port * mappings (which won't include an IP for uniqueness) * are already taken */ for (GList *tIter = bundle_data->replicas; tIter && (bundle_data->nreplicas_per_host == 1); tIter = tIter->next) { pe__bundle_replica_t *other = tIter->data; if ((other != replica) && (other != NULL) && (other->container != NULL)) { pcmk__new_ordering(replica->container, pcmk__op_key(replica->container->id, RSC_STATUS, 0), NULL, other->container, pcmk__op_key(other->container->id, RSC_START, 0), NULL, pe_order_optional|pe_order_same_node, rsc->cluster); } } } if ((replica->container != NULL) && (replica->remote != NULL) && replica->remote->cmds->create_probe(replica->remote, node)) { /* Do not probe the remote resource until we know where the * container is running. This is required for REMOTE_CONTAINER_HACK * to correctly probe remote resources. */ char *probe_uuid = pcmk__op_key(replica->remote->id, RSC_STATUS, 0); pe_action_t *probe = find_first_action(replica->remote->actions, probe_uuid, NULL, node); free(probe_uuid); if (probe != NULL) { any_created = true; crm_trace("Ordering %s probe on %s", replica->remote->id, pe__node_name(node)); pcmk__new_ordering(replica->container, pcmk__op_key(replica->container->id, RSC_START, 0), NULL, replica->remote, NULL, probe, pe_order_probe, rsc->cluster); } } } return any_created; } void pcmk__output_bundle_actions(pe_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->ip != NULL) { replica->ip->cmds->output_actions(replica->ip); } if (replica->container != NULL) { replica->container->cmds->output_actions(replica->container); } if (replica->remote != NULL) { replica->remote->cmds->output_actions(replica->remote); } if (replica->child != NULL) { replica->child->cmds->output_actions(replica->child); } } } // Bundle implementation of resource_alloc_functions_t:add_utilization() void pcmk__bundle_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { pe__bundle_variant_data_t *bundle_data = NULL; pe__bundle_replica_t *replica = NULL; if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } get_bundle_variant_data(bundle_data, rsc); if (bundle_data->replicas == NULL) { return; } /* All bundle replicas are identical, so using the utilization of the first * is sufficient for any. Only the implicit container resource can have * utilization values. */ replica = (pe__bundle_replica_t *) bundle_data->replicas->data; if (replica->container != NULL) { replica->container->cmds->add_utilization(replica->container, orig_rsc, all_rscs, utilization); } } // Bundle implementation of resource_alloc_functions_t:shutdown_lock() void pcmk__bundle_shutdown_lock(pe_resource_t *rsc) { return; // Bundles currently don't support shutdown locks } diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c index a887f7d6d6..fb2705e0f3 100644 --- a/lib/pacemaker/pcmk_sched_clone.c +++ b/lib/pacemaker/pcmk_sched_clone.c @@ -1,1195 +1,645 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include "libpacemaker_private.h" #define VARIANT_CLONE 1 #include -static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all); - -static pe_node_t * -can_run_instance(pe_resource_t * rsc, pe_node_t * node, int limit) -{ - pe_node_t *local_node = NULL; - - if (node == NULL && rsc->allowed_nodes) { - GHashTableIter iter; - g_hash_table_iter_init(&iter, rsc->allowed_nodes); - while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) { - can_run_instance(rsc, local_node, limit); - } - return NULL; - } - - if (!node) { - /* make clang analyzer happy */ - goto bail; - - } else if (!pcmk__node_available(node, false, false)) { - goto bail; - - } else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { - goto bail; - } - - local_node = pcmk__top_allowed_node(rsc, node); - - if (local_node == NULL) { - crm_warn("%s cannot run on %s: node not allowed", - rsc->id, pe__node_name(node)); - goto bail; - - } else if (local_node->weight < 0) { - common_update_score(rsc, node->details->id, local_node->weight); - pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.", - rsc->id, pe__node_name(node)); - - } else if (local_node->count < limit) { - pe_rsc_trace(rsc, "%s can run on %s (already running %d)", - rsc->id, pe__node_name(node), local_node->count); - return local_node; - - } else { - pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)", - rsc->id, pe__node_name(node), local_node->count, limit); - } - - bail: - if (node) { - common_update_score(rsc, node->details->id, -INFINITY); - } - return NULL; -} - -static pe_node_t * -allocate_instance(pe_resource_t *rsc, pe_node_t *prefer, gboolean all_coloc, - int limit, pe_working_set_t *data_set) -{ - pe_node_t *chosen = NULL; - GHashTable *backup = NULL; - - CRM_ASSERT(rsc); - pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)", - rsc->id, (prefer? prefer->details->uname: "none"), - (all_coloc? "all" : "some")); - - if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { - return rsc->fns->location(rsc, NULL, FALSE); - - } else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { - pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); - return NULL; - } - - /* Only include positive colocation preferences of dependent resources - * if not every node will get a copy of the clone - */ - append_parent_colocation(rsc->parent, rsc, all_coloc); - - if (prefer) { - pe_node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); - - if (local_prefer == NULL || local_prefer->weight < 0) { - pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id, - pe__node_name(prefer)); - return NULL; - } - } - - can_run_instance(rsc, NULL, limit); - - backup = pcmk__copy_node_table(rsc->allowed_nodes); - pe_rsc_trace(rsc, "Allocating instance %s", rsc->id); - chosen = rsc->cmds->assign(rsc, prefer); - if (chosen && prefer && (chosen->details != prefer->details)) { - crm_info("Not pre-allocating %s to %s because %s is better", - rsc->id, pe__node_name(prefer), pe__node_name(chosen)); - g_hash_table_destroy(rsc->allowed_nodes); - rsc->allowed_nodes = backup; - pcmk__unassign_resource(rsc); - chosen = NULL; - backup = NULL; - } - if (chosen) { - pe_node_t *local_node = pcmk__top_allowed_node(rsc, chosen); - - if (local_node) { - local_node->count++; - - } else if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { - /* what to do? we can't enforce per-node limits in this case */ - pcmk__config_err("%s not found in %s (list of %d)", - chosen->details->id, rsc->parent->id, - g_hash_table_size(rsc->parent->allowed_nodes)); - } - } - - if(backup) { - g_hash_table_destroy(backup); - } - return chosen; -} - -static void -append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all) -{ - - GList *gIter = NULL; - - gIter = rsc->rsc_cons; - for (; gIter != NULL; gIter = gIter->next) { - pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; - - if (all || cons->score < 0 || cons->score == INFINITY) { - pcmk__add_this_with(child, cons); - } - } - - gIter = rsc->rsc_cons_lhs; - for (; gIter != NULL; gIter = gIter->next) { - pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; - - if (!pcmk__colocation_has_influence(cons, child)) { - continue; - } - if (all || cons->score < 0) { - pcmk__add_with_this(child, cons); - } - } -} - - -void -distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, - int max, int per_host_max, pe_working_set_t * data_set); - -void -distribute_children(pe_resource_t *rsc, GList *children, GList *nodes, - int max, int per_host_max, pe_working_set_t * data_set) -{ - int loop_max = 0; - int allocated = 0; - int available_nodes = 0; - bool all_coloc = false; - - /* count now tracks the number of clones currently allocated */ - for(GList *nIter = nodes; nIter != NULL; nIter = nIter->next) { - pe_node_t *node = nIter->data; - - node->count = 0; - if (pcmk__node_available(node, false, false)) { - available_nodes++; - } - } - - all_coloc = (max < available_nodes) ? true : false; - - if(available_nodes) { - loop_max = max / available_nodes; - } - if (loop_max < 1) { - loop_max = 1; - } - - pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)", - max, rsc->id, available_nodes, per_host_max, loop_max); - - /* Pre-allocate as many instances as we can to their current location */ - for (GList *gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) { - pe_resource_t *child = (pe_resource_t *) gIter->data; - pe_node_t *child_node = NULL; - pe_node_t *local_node = NULL; - - if ((child->running_on == NULL) - || !pcmk_is_set(child->flags, pe_rsc_provisional) - || pcmk_is_set(child->flags, pe_rsc_failed)) { - - continue; - } - - child_node = pe__current_node(child); - local_node = pcmk__top_allowed_node(child, child_node); - - pe_rsc_trace(rsc, - "Checking pre-allocation of %s to %s (%d remaining of %d)", - child->id, pe__node_name(child_node), max - allocated, - max); - - if (!pcmk__node_available(child_node, true, false)) { - pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s", - pe__node_name(child_node), child->id); - continue; - } - - if ((local_node != NULL) && (local_node->count >= loop_max)) { - pe_rsc_trace(rsc, - "Not pre-allocating because %s already allocated " - "optimal instances", pe__node_name(child_node)); - continue; - } - - if (allocate_instance(child, child_node, all_coloc, per_host_max, - data_set)) { - pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id, - pe__node_name(child_node)); - allocated++; - } - } - - pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max); - - for (GList *gIter = children; gIter != NULL; gIter = gIter->next) { - pe_resource_t *child = (pe_resource_t *) gIter->data; - - if (child->running_on != NULL) { - pe_node_t *child_node = pe__current_node(child); - pe_node_t *local_node = pcmk__top_allowed_node(child, child_node); - - if (local_node == NULL) { - crm_err("%s is running on %s which isn't allowed", - child->id, pe__node_name(child_node)); - } - } - - if (!pcmk_is_set(child->flags, pe_rsc_provisional)) { - } else if (allocated >= max) { - pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max); - resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set); - } else { - if (allocate_instance(child, NULL, all_coloc, per_host_max, - data_set)) { - allocated++; - } - } - } - - pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d", - allocated, rsc->id, max); -} - /*! * \internal * \brief Assign a clone resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * * \return Node that \p rsc is assigned to, if assigned entirely to one node */ pe_node_t * pcmk__clone_allocate(pe_resource_t *rsc, const pe_node_t *prefer) { - GList *nodes = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return NULL; } else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__add_promotion_scores(rsc); } pe__set_resource_flags(rsc, pe_rsc_allocating); /* This information is used by pcmk__cmp_instance() when deciding the order * in which to assign clone instances to nodes. */ for (GList *gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; pe_rsc_trace(rsc, "%s: Allocating %s first", rsc->id, constraint->primary->id); constraint->primary->cmds->assign(constraint->primary, prefer); } for (GList *gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; if (pcmk__colocation_has_influence(constraint, NULL)) { pe_resource_t *dependent = constraint->dependent; const char *attr = constraint->node_attribute; const float factor = constraint->score / (float) INFINITY; const uint32_t flags = pcmk__coloc_select_active |pcmk__coloc_select_nonnegative; pcmk__add_colocated_node_scores(dependent, rsc->id, &rsc->allowed_nodes, attr, factor, flags); } } pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, rsc->cluster); - nodes = g_hash_table_get_values(rsc->allowed_nodes); - nodes = pcmk__sort_nodes(nodes, NULL); rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance); - distribute_children(rsc, rsc->children, nodes, clone_data->clone_max, + distribute_children(rsc, rsc->children, clone_data->clone_max, clone_data->clone_node_max, rsc->cluster); - g_list_free(nodes); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__set_instance_roles(rsc); } pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating); pe_rsc_trace(rsc, "Done allocating %s", rsc->id); return NULL; } -static void -clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting, - gboolean * active) -{ - GList *gIter = NULL; - - if (rsc->children) { - - gIter = rsc->children; - for (; gIter != NULL; gIter = gIter->next) { - pe_resource_t *child = (pe_resource_t *) gIter->data; - - clone_update_pseudo_status(child, stopping, starting, active); - } - - return; - } - - CRM_ASSERT(active != NULL); - CRM_ASSERT(starting != NULL); - CRM_ASSERT(stopping != NULL); - - if (rsc->running_on) { - *active = TRUE; - } - - gIter = rsc->actions; - for (; gIter != NULL; gIter = gIter->next) { - pe_action_t *action = (pe_action_t *) gIter->data; - - if (*starting && *stopping) { - return; - - } else if (pcmk_is_set(action->flags, pe_action_optional)) { - pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid); - continue; - - } else if (!pcmk_any_flags_set(action->flags, - pe_action_pseudo|pe_action_runnable)) { - pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid); - continue; - - } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) { - pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid); - *stopping = TRUE; - - } else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) { - if (!pcmk_is_set(action->flags, pe_action_runnable)) { - pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d", - action->uuid, - pcmk_is_set(action->flags, pe_action_runnable), - pcmk_is_set(action->flags, pe_action_pseudo)); - } else { - pe_rsc_trace(rsc, "Starting due to: %s", action->uuid); - pe_rsc_trace(rsc, "%s run=%d, pseudo=%d", - action->uuid, - pcmk_is_set(action->flags, pe_action_runnable), - pcmk_is_set(action->flags, pe_action_pseudo)); - *starting = TRUE; - } - } - } -} - static pe_action_t * find_rsc_action(pe_resource_t *rsc, const char *task) { pe_action_t *match = NULL; GList *actions = pe__resource_actions(rsc, NULL, task, FALSE); for (GList *item = actions; item != NULL; item = item->next) { pe_action_t *op = (pe_action_t *) item->data; if (!pcmk_is_set(op->flags, pe_action_optional)) { if (match != NULL) { // More than one match, don't return any match = NULL; break; } match = op; } } g_list_free(actions); return match; } static void child_ordering_constraints(pe_resource_t * rsc, pe_working_set_t * data_set) { pe_action_t *stop = NULL; pe_action_t *start = NULL; pe_action_t *last_stop = NULL; pe_action_t *last_start = NULL; GList *gIter = NULL; if (!pe__clone_is_ordered(rsc)) { return; } /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; stop = find_rsc_action(child, RSC_STOP); if (stop) { if (last_stop) { /* child/child relative stop */ order_actions(stop, last_stop, pe_order_optional); } last_stop = stop; } start = find_rsc_action(child, RSC_START); if (start) { if (last_start) { /* child/child relative start */ order_actions(last_start, start, pe_order_optional); } last_start = start; } } } void clone_create_actions(pe_resource_t *rsc) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_debug(rsc, "Creating actions for clone %s", rsc->id); clone_create_pseudo_actions(rsc, rsc->children, &clone_data->start_notify, &clone_data->stop_notify); child_ordering_constraints(rsc, rsc->cluster); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__create_promotable_actions(rsc); } } -void -clone_create_pseudo_actions(pe_resource_t *rsc, GList *children, - notify_data_t **start_notify, - notify_data_t **stop_notify) -{ - gboolean child_active = FALSE; - gboolean child_starting = FALSE; - gboolean child_stopping = FALSE; - gboolean allow_dependent_migrations = TRUE; - - pe_action_t *stop = NULL; - pe_action_t *stopped = NULL; - - pe_action_t *start = NULL; - pe_action_t *started = NULL; - - pe_rsc_trace(rsc, "Creating actions for %s", rsc->id); - - for (GList *gIter = children; gIter != NULL; gIter = gIter->next) { - pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; - gboolean starting = FALSE; - gboolean stopping = FALSE; - - child_rsc->cmds->create_actions(child_rsc); - clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active); - if (stopping && starting) { - allow_dependent_migrations = FALSE; - } - - child_stopping |= stopping; - child_starting |= starting; - } - - /* start */ - start = pe__new_rsc_pseudo_action(rsc, RSC_START, !child_starting, true); - started = pe__new_rsc_pseudo_action(rsc, RSC_STARTED, !child_starting, - false); - started->priority = INFINITY; - - if (child_active || child_starting) { - pe__set_action_flags(started, pe_action_runnable); - } - - if (start_notify != NULL && *start_notify == NULL) { - *start_notify = pe__clone_notif_pseudo_ops(rsc, RSC_START, start, - started); - } - - /* stop */ - stop = pe__new_rsc_pseudo_action(rsc, RSC_STOP, !child_stopping, true); - stopped = pe__new_rsc_pseudo_action(rsc, RSC_STOPPED, !child_stopping, - true); - stopped->priority = INFINITY; - if (allow_dependent_migrations) { - pe__set_action_flags(stop, pe_action_migrate_runnable); - } - - if (stop_notify != NULL && *stop_notify == NULL) { - *stop_notify = pe__clone_notif_pseudo_ops(rsc, RSC_STOP, stop, stopped); - - if (start_notify && *start_notify && *stop_notify) { - order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional); - } - } -} - void clone_internal_constraints(pe_resource_t *rsc) { pe_resource_t *last_rsc = NULL; GList *gIter; bool ordered = pe__clone_is_ordered(rsc); pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id); pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional); pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left); pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP, pe_order_optional); pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE, pe_order_runnable_left); } if (ordered) { /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->internal_constraints(child_rsc); pcmk__order_starts(rsc, child_rsc, pe_order_runnable_left|pe_order_implies_first_printed); pcmk__order_resource_actions(child_rsc, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed); if (ordered && (last_rsc != NULL)) { pcmk__order_starts(last_rsc, child_rsc, pe_order_optional); } pcmk__order_stops(rsc, child_rsc, pe_order_implies_first_printed); pcmk__order_resource_actions(child_rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed); if (ordered && (last_rsc != NULL)) { pcmk__order_stops(child_rsc, last_rsc, pe_order_optional); } last_rsc = child_rsc; } if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__order_promotable_instances(rsc); } } -gboolean -is_child_compatible(const pe_resource_t *child_rsc, const pe_node_t *local_node, - enum rsc_role_e filter, gboolean current) -{ - pe_node_t *node = NULL; - enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); - - CRM_CHECK(child_rsc && local_node, return FALSE); - if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { - /* We only want instances that haven't failed */ - node = child_rsc->fns->location(child_rsc, NULL, current); - } - - if (filter != RSC_ROLE_UNKNOWN && next_role != filter) { - crm_trace("Filtered %s", child_rsc->id); - return FALSE; - } - - if (node && (node->details == local_node->details)) { - return TRUE; - - } else if (node) { - crm_trace("%s - %s vs %s", child_rsc->id, pe__node_name(node), - pe__node_name(local_node)); - - } else { - crm_trace("%s - not allocated %d", child_rsc->id, current); - } - return FALSE; -} - -pe_resource_t * -find_compatible_child(const pe_resource_t *local_child, - const pe_resource_t *rsc, enum rsc_role_e filter, - gboolean current) -{ - pe_resource_t *pair = NULL; - GList *gIter = NULL; - GList *scratch = NULL; - pe_node_t *local_node = NULL; - - local_node = local_child->fns->location(local_child, NULL, current); - if (local_node) { - return find_compatible_child_by_node(local_child, local_node, rsc, filter, current); - } - - scratch = g_hash_table_get_values(local_child->allowed_nodes); - scratch = pcmk__sort_nodes(scratch, NULL); - - gIter = scratch; - for (; gIter != NULL; gIter = gIter->next) { - pe_node_t *node = (pe_node_t *) gIter->data; - - pair = find_compatible_child_by_node(local_child, node, rsc, filter, current); - if (pair) { - goto done; - } - } - - pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id); - done: - g_list_free(scratch); - return pair; -} - /*! * \internal * \brief Apply a colocation's score to node weights or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node weights (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void pcmk__clone_apply_coloc_score(pe_resource_t *dependent, const pe_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent) { GList *gIter = NULL; gboolean do_interleave = FALSE; const char *interleave_s = NULL; /* This should never be called for the clone itself as a dependent. Instead, * we add its colocation constraints to its instances and call the * apply_coloc_score() for the instances as dependents. */ CRM_ASSERT(!for_dependent); CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL), return); CRM_CHECK(dependent->variant == pe_native, return); pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d", colocation->id, dependent->id, primary->id, colocation->score); if (pcmk_is_set(primary->flags, pe_rsc_promotable)) { if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { // We haven't placed the primary yet, so we can't apply colocation pe_rsc_trace(primary, "%s is still provisional", primary->id); return; } else if (colocation->primary_role == RSC_ROLE_UNKNOWN) { // This isn't a role-specfic colocation, so handle normally pe_rsc_trace(primary, "Handling %s as a clone colocation", colocation->id); } else if (pcmk_is_set(dependent->flags, pe_rsc_provisional)) { // We're placing the dependent pcmk__update_dependent_with_promotable(primary, dependent, colocation); return; } else if (colocation->dependent_role == RSC_ROLE_PROMOTED) { // We're choosing roles for the dependent pcmk__update_promotable_dependent_priority(primary, dependent, colocation); return; } } // Only the dependent needs to be marked for interleave interleave_s = g_hash_table_lookup(colocation->dependent->meta, XML_RSC_ATTR_INTERLEAVE); if (crm_is_true(interleave_s) && (colocation->dependent->variant > pe_group)) { /* @TODO Do we actually care about multiple primary copies sharing a * dependent copy anymore? */ if (copies_per_node(colocation->dependent) != copies_per_node(colocation->primary)) { pcmk__config_err("Cannot interleave %s and %s because they do not " "support the same number of instances per node", colocation->dependent->id, colocation->primary->id); } else { do_interleave = TRUE; } } if (pcmk_is_set(primary->flags, pe_rsc_provisional)) { pe_rsc_trace(primary, "%s is still provisional", primary->id); return; } else if (do_interleave) { pe_resource_t *primary_instance = NULL; primary_instance = find_compatible_child(dependent, primary, RSC_ROLE_UNKNOWN, FALSE); if (primary_instance != NULL) { pe_rsc_debug(primary, "Pairing %s with %s", dependent->id, primary_instance->id); dependent->cmds->apply_coloc_score(dependent, primary_instance, colocation, true); } else if (colocation->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", dependent->id, primary->id); pcmk__assign_resource(dependent, NULL, true); } else { pe_rsc_debug(primary, "Cannot pair %s with instance of %s", dependent->id, primary->id); } return; } else if (colocation->score >= INFINITY) { GList *affected_nodes = NULL; gIter = primary->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { pe_rsc_trace(primary, "Allowing %s: %s %d", colocation->id, pe__node_name(chosen), chosen->weight); affected_nodes = g_list_prepend(affected_nodes, chosen); } } node_list_exclude(dependent->allowed_nodes, affected_nodes, FALSE); g_list_free(affected_nodes); return; } gIter = primary->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->apply_coloc_score(dependent, child_rsc, colocation, false); } } -enum action_tasks -clone_child_action(pe_action_t * action) -{ - enum action_tasks result = no_action; - pe_resource_t *child = (pe_resource_t *) action->rsc->children->data; - - if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) { - - /* Find the action we're notifying about instead */ - - int stop = 0; - char *key = action->uuid; - int lpc = strlen(key); - - for (; lpc > 0; lpc--) { - if (key[lpc] == '_' && stop == 0) { - stop = lpc; - - } else if (key[lpc] == '_') { - char *task_mutable = NULL; - - lpc++; - task_mutable = strdup(key + lpc); - task_mutable[stop - lpc] = 0; - - crm_trace("Extracted action '%s' from '%s'", task_mutable, key); - result = get_complex_task(child, task_mutable, TRUE); - free(task_mutable); - break; - } - } - - } else { - result = get_complex_task(child, action->task, TRUE); - } - return result; -} - -#define pe__clear_action_summary_flags(flags, action, flag) do { \ - flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ - "Action summary", action->rsc->id, \ - flags, flag, #flag); \ - } while (0) - -enum pe_action_flags -summary_action_flags(pe_action_t *action, GList *children, - const pe_node_t *node) -{ - GList *gIter = NULL; - gboolean any_runnable = FALSE; - gboolean check_runnable = TRUE; - enum action_tasks task = clone_child_action(action); - enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); - const char *task_s = task2text(task); - - for (gIter = children; gIter != NULL; gIter = gIter->next) { - pe_action_t *child_action = NULL; - pe_resource_t *child = (pe_resource_t *) gIter->data; - - child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node); - pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id, - pe__node_name(node), child_action?child_action->uuid:"NA"); - if (child_action) { - enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); - - if (pcmk_is_set(flags, pe_action_optional) - && !pcmk_is_set(child_flags, pe_action_optional)) { - pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid, - child_action->uuid); - pe__clear_action_summary_flags(flags, action, pe_action_optional); - pe__clear_action_flags(action, pe_action_optional); - } - if (pcmk_is_set(child_flags, pe_action_runnable)) { - any_runnable = TRUE; - } - } - } - - if (check_runnable && any_runnable == FALSE) { - pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid); - pe__clear_action_summary_flags(flags, action, pe_action_runnable); - if (node == NULL) { - pe__clear_action_flags(action, pe_action_runnable); - } - } - - return flags; -} - enum pe_action_flags clone_action_flags(pe_action_t *action, const pe_node_t *node) { return summary_action_flags(action, action->rsc->children, node); } void clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { GList *gIter = rsc->children; pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id); pcmk__apply_location(rsc, constraint); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->apply_location(child_rsc, constraint); } } /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void clone_expand(pe_resource_t *rsc) { GList *gIter = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); g_list_foreach(rsc->actions, (GFunc) rsc->cmds->action_flags, NULL); pe__create_notifications(rsc, clone_data->start_notify); pe__create_notifications(rsc, clone_data->stop_notify); pe__create_notifications(rsc, clone_data->promote_notify); pe__create_notifications(rsc, clone_data->demote_notify); /* Now that the notifcations have been created we can expand the children */ gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->add_actions_to_graph(child_rsc); } pcmk__add_rsc_actions_to_graph(rsc); /* The notifications are in the graph now, we can destroy the notify_data */ pe__free_notification_data(clone_data->demote_notify); clone_data->demote_notify = NULL; pe__free_notification_data(clone_data->stop_notify); clone_data->stop_notify = NULL; pe__free_notification_data(clone_data->start_notify); clone_data->start_notify = NULL; pe__free_notification_data(clone_data->promote_notify); clone_data->promote_notify = NULL; } // Check whether a resource or any of its children is known on node static bool rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node) { if (rsc->children) { for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; if (rsc_known_on(child, node)) { return TRUE; } } } else if (rsc->known_on) { GHashTableIter iter; pe_node_t *known_node = NULL; g_hash_table_iter_init(&iter, rsc->known_on); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) { if (node->details == known_node->details) { return TRUE; } } } return FALSE; } // Look for an instance of clone that is known on node static pe_resource_t * find_instance_on(const pe_resource_t *clone, const pe_node_t *node) { for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (rsc_known_on(child, node)) { return child; } } return NULL; } // For anonymous clones, only a single instance needs to be probed static bool probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { // First, check if we probed an instance on this node last time pe_resource_t *child = find_instance_on(rsc, node); // Otherwise, check if we plan to start an instance on this node if (child == NULL) { for (GList *child_iter = rsc->children; child_iter && !child; child_iter = child_iter->next) { pe_node_t *local_node = NULL; pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data; if (child_rsc) { /* make clang analyzer happy */ local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); if (local_node && (local_node->details == node->details)) { child = child_rsc; } } } } // Otherwise, use the first clone instance if (child == NULL) { child = rsc->children->data; } CRM_ASSERT(child); return child->cmds->create_probe(child, node); } /*! * \internal * * \brief Schedule any probes needed for a resource on a node * * \param[in,out] rsc Resource to create probe for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool clone_create_probe(pe_resource_t *rsc, pe_node_t *node) { CRM_ASSERT(rsc); rsc->children = g_list_sort(rsc->children, pcmk__cmp_instance_number); if (rsc->children == NULL) { pe_warn("Clone %s has no children", rsc->id); return false; } if (rsc->exclusive_discover) { pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on * * remove the node from allowed_nodes so that the * notification contains only nodes that we might ever run * on */ g_hash_table_remove(rsc->allowed_nodes, node->details->id); /* Bit of a shortcut - might as well take it */ return false; } } if (pcmk_is_set(rsc->flags, pe_rsc_unique)) { return pcmk__probe_resource_list(rsc->children, node); } else { return probe_anonymous_clone(rsc, node, rsc->cluster); } } void clone_append_meta(const pe_resource_t *rsc, xmlNode *xml) { char *name = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); name = crm_meta_name(XML_RSC_ATTR_UNIQUE); crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_unique)); free(name); name = crm_meta_name(XML_RSC_ATTR_NOTIFY); crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_notify)); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX); crm_xml_add_int(xml, name, clone_data->clone_max); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX); crm_xml_add_int(xml, name, clone_data->clone_node_max); free(name); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { int promoted_max = pe__clone_promoted_max(rsc); int promoted_node_max = pe__clone_promoted_node_max(rsc); name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX); crm_xml_add_int(xml, name, promoted_max); free(name); name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX); crm_xml_add_int(xml, name, promoted_node_max); free(name); /* @COMPAT Maintain backward compatibility with resource agents that * expect the old names (deprecated since 2.0.0). */ name = crm_meta_name(PCMK_XA_PROMOTED_MAX_LEGACY); crm_xml_add_int(xml, name, promoted_max); free(name); name = crm_meta_name(PCMK_XA_PROMOTED_NODE_MAX_LEGACY); crm_xml_add_int(xml, name, promoted_node_max); free(name); } } // Clone implementation of resource_alloc_functions_t:add_utilization() void pcmk__clone_add_utilization(const pe_resource_t *rsc, const pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { bool existing = false; pe_resource_t *child = NULL; if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } // Look for any child already existing in the list for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { child = (pe_resource_t *) iter->data; if (g_list_find(all_rscs, child)) { existing = true; // Keep checking remaining children } else { // If this is a clone of a group, look for group's members for (GList *member_iter = child->children; member_iter != NULL; member_iter = member_iter->next) { pe_resource_t *member = (pe_resource_t *) member_iter->data; if (g_list_find(all_rscs, member) != NULL) { // Add *child's* utilization, not group member's child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization); existing = true; break; } } } } if (!existing && (rsc->children != NULL)) { // If nothing was found, still add first child's utilization child = (pe_resource_t *) rsc->children->data; child->cmds->add_utilization(child, orig_rsc, all_rscs, utilization); } } // Clone implementation of resource_alloc_functions_t:shutdown_lock() void pcmk__clone_shutdown_lock(pe_resource_t *rsc) { return; // Clones currently don't support shutdown locks } diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c new file mode 100644 index 0000000000..420edeef0c --- /dev/null +++ b/lib/pacemaker/pcmk_sched_instances.c @@ -0,0 +1,654 @@ +/* + * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +/* This file is intended for code usable with both clone instances and bundle + * replica containers. + */ + +#include +#include +#include +#include "libpacemaker_private.h" + +static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all); + +/*! + * \internal + * \brief Check whether a node is allowed to run an instance + * + * \param[in] instance Clone instance or bundle container to check + * \param[in] node Node to check + * \param[in] max_per_node Maximum number of instances allowed to run on a node + * + * \return true if \p node is allowed to run \p instance, otherwise false + */ +static bool +can_run_instance(const pe_resource_t *instance, const pe_node_t *node, + int max_per_node) +{ + pe_node_t *allowed_node = NULL; + + if (pcmk_is_set(instance->flags, pe_rsc_orphan)) { + pe_rsc_trace(instance, "%s cannot run on %s: orphaned", + instance->id, pe__node_name(node)); + return false; + } + + if (!pcmk__node_available(node, false, false)) { + pe_rsc_trace(instance, + "%s cannot run on %s: node cannot run resources", + instance->id, pe__node_name(node)); + return false; + } + + allowed_node = pcmk__top_allowed_node(instance, node); + if (allowed_node == NULL) { + crm_warn("%s cannot run on %s: node not allowed", + instance->id, pe__node_name(node)); + return false; + } + + if (allowed_node->weight < 0) { + pe_rsc_trace(instance, "%s cannot run on %s: parent score is %s there", + instance->id, pe__node_name(node), + pcmk_readable_score(allowed_node->weight)); + return false; + } + + if (allowed_node->count >= max_per_node) { + pe_rsc_trace(instance, + "%s cannot run on %s: node already has %d instance%s", + instance->id, pe__node_name(node), max_per_node, + pcmk__plural_s(max_per_node)); + return false; + } + + pe_rsc_trace(instance, "%s can run on %s (%d already running)", + instance->id, pe__node_name(node), allowed_node->count); + return true; +} + +/*! + * \internal + * \brief Ban a clone instance or bundle replica from unavailable allowed nodes + * + * \param[in,out] instance Clone instance or bundle replica to ban + * \param[in] max_per_node Maximum instances allowed to run on a node + */ +static void +ban_unavailable_allowed_nodes(pe_resource_t *instance, int max_per_node) +{ + if (instance->allowed_nodes != NULL) { + GHashTableIter iter; + const pe_node_t *allowed_node = NULL; + + g_hash_table_iter_init(&iter, instance->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **) &allowed_node)) { + if (!can_run_instance(instance, allowed_node, max_per_node)) { + // Ban instance (and all its children) from node + common_update_score(instance, allowed_node->details->id, + -INFINITY); + } + } + } +} + +/*! + * \internal + * \brief Choose a node for an instance + * + * \param[in,out] instance Clone instance or bundle replica container + * \param[in] prefer If not NULL, attempt early assignment to this + * node, if still the best choice; otherwise, + * perform final assignment + * \param[in] all_coloc If true (indicating that there are more + * available nodes than instances), add all parent + * colocations to instance, otherwise add only + * negative (and for "this with" colocations, + * infinite) colocations to avoid needless + * shuffling of instances among nodes + * \param[in] max_per_node Assign at most this many instances to one node + * + * \return true if \p instance could be assigned to a node, otherwise false + */ +static bool +assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + bool all_coloc, int max_per_node) +{ + pe_node_t *chosen = NULL; + pe_node_t *allowed = NULL; + + CRM_ASSERT(instance != NULL); + pe_rsc_trace(instance, + "Assigning %s (preferring %s, using %s parent colocations)", + instance->id, + ((prefer == NULL)? "no node" : prefer->details->uname), + (all_coloc? "all" : "essential")); + + if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) { + // Instance is already assigned + return instance->fns->location(instance, NULL, FALSE) != NULL; + } + + if (pcmk_is_set(instance->flags, pe_rsc_allocating)) { + pe_rsc_debug(instance, + "Assignment loop detected involving %s colocations", + instance->id); + return false; + } + + if (prefer != NULL) { // Possible early assignment to preferred node + + // Get preferred node with instance's scores + allowed = g_hash_table_lookup(instance->allowed_nodes, + prefer->details->id); + + if ((allowed == NULL) || (allowed->weight < 0)) { + pe_rsc_trace(instance, + "Not assigning %s to preferred node %s: unavailable", + instance->id, pe__node_name(prefer)); + return false; + } + } + + ban_unavailable_allowed_nodes(instance, max_per_node); + + if (prefer == NULL) { // Final assignment + chosen = instance->cmds->assign(instance, NULL); + + } else { // Possible early assignment to preferred node + GHashTable *backup = pcmk__copy_node_table(instance->allowed_nodes); + + chosen = instance->cmds->assign(instance, prefer); + + // Revert nodes if preferred node won't be assigned + if ((chosen != NULL) && (chosen->details != prefer->details)) { + crm_info("Not assigning %s to preferred node %s: %s is better", + instance->id, pe__node_name(prefer), + pe__node_name(chosen)); + g_hash_table_destroy(instance->allowed_nodes); + instance->allowed_nodes = backup; + pcmk__unassign_resource(instance); + chosen = NULL; + } else if (backup != NULL) { + g_hash_table_destroy(backup); + } + } + + // The parent tracks how many instances have been assigned to each node + if (chosen != NULL) { + allowed = pcmk__top_allowed_node(instance, chosen); + if (allowed == NULL) { + /* The instance is allowed on the node, but its parent isn't. This + * shouldn't be possible if the resource is managed, and we won't be + * able to limit the number of instances assigned to the node. + */ + CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pe_rsc_managed)); + + } else { + allowed->count++; + } + } + return chosen != NULL; +} + +static void +append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all) +{ + + GList *gIter = NULL; + + gIter = rsc->rsc_cons; + for (; gIter != NULL; gIter = gIter->next) { + pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; + + if (all || cons->score < 0 || cons->score == INFINITY) { + pcmk__add_this_with(child, cons); + } + } + + gIter = rsc->rsc_cons_lhs; + for (; gIter != NULL; gIter = gIter->next) { + pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data; + + if (!pcmk__colocation_has_influence(cons, child)) { + continue; + } + if (all || cons->score < 0) { + pcmk__add_with_this(child, cons); + } + } +} + +/*! + * \internal + * \brief Reset the node counts of a resource's allowed nodes to zero + * + * \param[in,out] rsc Resource to reset + * + * \return Number of nodes that are available to run resources + */ +static unsigned int +reset_allowed_node_counts(pe_resource_t *rsc) +{ + unsigned int available_nodes = 0; + pe_node_t *node = NULL; + GHashTableIter iter; + + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + node->count = 0; + if (pcmk__node_available(node, false, false)) { + available_nodes++; + } + } + return available_nodes; +} + +/*! + * \internal + * \brief Check whether an instance has a preferred node + * + * \param[in] rsc Clone or bundle being assigned (for logs only) + * \param[in] instance Clone instance or bundle replica container + * \param[in] optimal_per_node Optimal number of instances per node + * + * \return Instance's current node if still available, otherwise NULL + */ +static const pe_node_t * +preferred_node(const pe_resource_t *rsc, const pe_resource_t *instance, + int optimal_per_node) +{ + const pe_node_t *node = NULL; + const pe_node_t *parent_node = NULL; + + // Check whether instance is active, healthy, and not yet assigned + if ((instance->running_on == NULL) + || !pcmk_is_set(instance->flags, pe_rsc_provisional) + || pcmk_is_set(instance->flags, pe_rsc_failed)) { + return NULL; + } + + // Check whether instance's current node can run resources + node = pe__current_node(instance); + if (!pcmk__node_available(node, true, false)) { + pe_rsc_trace(rsc, "Not assigning %s to %s early (unavailable)", + instance->id, pe__node_name(node)); + return NULL; + } + + // Check whether node already has optimal number of instances assigned + parent_node = pcmk__top_allowed_node(instance, node); + if ((parent_node != NULL) && (parent_node->count >= optimal_per_node)) { + pe_rsc_trace(rsc, + "Not assigning %s to %s early " + "(optimal instances already assigned)", + instance->id, pe__node_name(node)); + return NULL; + } + + return node; +} + +void +distribute_children(pe_resource_t *rsc, GList *children, int max, + int per_host_max, pe_working_set_t *data_set) +{ + // Reuse node count to track number of assigned instances + unsigned int available_nodes = reset_allowed_node_counts(rsc); + + /* Include finite positive preferences of the collective's + * colocation dependents only if not every node will get an instance. + */ + bool all_coloc = (max < available_nodes); + + int loop_max = 0; + int allocated = 0; + + if(available_nodes) { + loop_max = max / available_nodes; + } + if (loop_max < 1) { + loop_max = 1; + } + + pe_rsc_debug(rsc, + "Allocating up to %d %s instances to a possible %u nodes " + "(at most %d per host, %d optimal)", + max, rsc->id, available_nodes, per_host_max, loop_max); + + /* Pre-allocate as many instances as we can to their current location */ + for (GList *gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) { + pe_resource_t *child = (pe_resource_t *) gIter->data; + const pe_node_t *child_node = NULL; + + append_parent_colocation(child->parent, child, all_coloc); + + child_node = preferred_node(rsc, child, loop_max); + if ((child_node != NULL) + && assign_instance(child, child_node, all_coloc, per_host_max)) { + pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id, + pe__node_name(child_node)); + allocated++; + } + } + + pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max); + + for (GList *gIter = children; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child = (pe_resource_t *) gIter->data; + + if (child->running_on != NULL) { + pe_node_t *child_node = pe__current_node(child); + pe_node_t *local_node = pcmk__top_allowed_node(child, child_node); + + if (local_node == NULL) { + crm_err("%s is running on %s which isn't allowed", + child->id, pe__node_name(child_node)); + } + } + + if (!pcmk_is_set(child->flags, pe_rsc_provisional)) { + } else if (allocated >= max) { + pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max); + resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set); + } else { + if (assign_instance(child, NULL, all_coloc, per_host_max)) { + allocated++; + } + } + } + + pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d", + allocated, rsc->id, max); +} + +static void +clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting, + gboolean * active) +{ + GList *gIter = NULL; + + if (rsc->children) { + + gIter = rsc->children; + for (; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child = (pe_resource_t *) gIter->data; + + clone_update_pseudo_status(child, stopping, starting, active); + } + + return; + } + + CRM_ASSERT(active != NULL); + CRM_ASSERT(starting != NULL); + CRM_ASSERT(stopping != NULL); + + if (rsc->running_on) { + *active = TRUE; + } + + gIter = rsc->actions; + for (; gIter != NULL; gIter = gIter->next) { + pe_action_t *action = (pe_action_t *) gIter->data; + + if (*starting && *stopping) { + return; + + } else if (pcmk_is_set(action->flags, pe_action_optional)) { + pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid); + continue; + + } else if (!pcmk_any_flags_set(action->flags, + pe_action_pseudo|pe_action_runnable)) { + pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid); + continue; + + } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) { + pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid); + *stopping = TRUE; + + } else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) { + if (!pcmk_is_set(action->flags, pe_action_runnable)) { + pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d", + action->uuid, + pcmk_is_set(action->flags, pe_action_runnable), + pcmk_is_set(action->flags, pe_action_pseudo)); + } else { + pe_rsc_trace(rsc, "Starting due to: %s", action->uuid); + pe_rsc_trace(rsc, "%s run=%d, pseudo=%d", + action->uuid, + pcmk_is_set(action->flags, pe_action_runnable), + pcmk_is_set(action->flags, pe_action_pseudo)); + *starting = TRUE; + } + } + } +} + +void +clone_create_pseudo_actions(pe_resource_t *rsc, GList *children, + notify_data_t **start_notify, + notify_data_t **stop_notify) +{ + gboolean child_active = FALSE; + gboolean child_starting = FALSE; + gboolean child_stopping = FALSE; + gboolean allow_dependent_migrations = TRUE; + + pe_action_t *stop = NULL; + pe_action_t *stopped = NULL; + + pe_action_t *start = NULL; + pe_action_t *started = NULL; + + pe_rsc_trace(rsc, "Creating actions for %s", rsc->id); + + for (GList *gIter = children; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; + gboolean starting = FALSE; + gboolean stopping = FALSE; + + child_rsc->cmds->create_actions(child_rsc); + clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active); + if (stopping && starting) { + allow_dependent_migrations = FALSE; + } + + child_stopping |= stopping; + child_starting |= starting; + } + + /* start */ + start = pe__new_rsc_pseudo_action(rsc, RSC_START, !child_starting, true); + started = pe__new_rsc_pseudo_action(rsc, RSC_STARTED, !child_starting, + false); + started->priority = INFINITY; + + if (child_active || child_starting) { + pe__set_action_flags(started, pe_action_runnable); + } + + if (start_notify != NULL && *start_notify == NULL) { + *start_notify = pe__clone_notif_pseudo_ops(rsc, RSC_START, start, + started); + } + + /* stop */ + stop = pe__new_rsc_pseudo_action(rsc, RSC_STOP, !child_stopping, true); + stopped = pe__new_rsc_pseudo_action(rsc, RSC_STOPPED, !child_stopping, + true); + stopped->priority = INFINITY; + if (allow_dependent_migrations) { + pe__set_action_flags(stop, pe_action_migrate_runnable); + } + + if (stop_notify != NULL && *stop_notify == NULL) { + *stop_notify = pe__clone_notif_pseudo_ops(rsc, RSC_STOP, stop, stopped); + + if (start_notify && *start_notify && *stop_notify) { + order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional); + } + } +} + +gboolean +is_child_compatible(const pe_resource_t *child_rsc, const pe_node_t *local_node, + enum rsc_role_e filter, gboolean current) +{ + pe_node_t *node = NULL; + enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); + + CRM_CHECK(child_rsc && local_node, return FALSE); + if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { + /* We only want instances that haven't failed */ + node = child_rsc->fns->location(child_rsc, NULL, current); + } + + if (filter != RSC_ROLE_UNKNOWN && next_role != filter) { + crm_trace("Filtered %s", child_rsc->id); + return FALSE; + } + + if (node && (node->details == local_node->details)) { + return TRUE; + + } else if (node) { + crm_trace("%s - %s vs %s", child_rsc->id, pe__node_name(node), + pe__node_name(local_node)); + + } else { + crm_trace("%s - not allocated %d", child_rsc->id, current); + } + return FALSE; +} + +pe_resource_t * +find_compatible_child(const pe_resource_t *local_child, + const pe_resource_t *rsc, enum rsc_role_e filter, + gboolean current) +{ + pe_resource_t *pair = NULL; + GList *gIter = NULL; + GList *scratch = NULL; + pe_node_t *local_node = NULL; + + local_node = local_child->fns->location(local_child, NULL, current); + if (local_node) { + return find_compatible_child_by_node(local_child, local_node, rsc, filter, current); + } + + scratch = g_hash_table_get_values(local_child->allowed_nodes); + scratch = pcmk__sort_nodes(scratch, NULL); + + gIter = scratch; + for (; gIter != NULL; gIter = gIter->next) { + pe_node_t *node = (pe_node_t *) gIter->data; + + pair = find_compatible_child_by_node(local_child, node, rsc, filter, current); + if (pair) { + goto done; + } + } + + pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id); + done: + g_list_free(scratch); + return pair; +} + +enum action_tasks +clone_child_action(pe_action_t * action) +{ + enum action_tasks result = no_action; + pe_resource_t *child = (pe_resource_t *) action->rsc->children->data; + + if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) { + + /* Find the action we're notifying about instead */ + + int stop = 0; + char *key = action->uuid; + int lpc = strlen(key); + + for (; lpc > 0; lpc--) { + if (key[lpc] == '_' && stop == 0) { + stop = lpc; + + } else if (key[lpc] == '_') { + char *task_mutable = NULL; + + lpc++; + task_mutable = strdup(key + lpc); + task_mutable[stop - lpc] = 0; + + crm_trace("Extracted action '%s' from '%s'", task_mutable, key); + result = get_complex_task(child, task_mutable, TRUE); + free(task_mutable); + break; + } + } + + } else { + result = get_complex_task(child, action->task, TRUE); + } + return result; +} + +#define pe__clear_action_summary_flags(flags, action, flag) do { \ + flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ + "Action summary", action->rsc->id, \ + flags, flag, #flag); \ + } while (0) + +enum pe_action_flags +summary_action_flags(pe_action_t *action, GList *children, + const pe_node_t *node) +{ + GList *gIter = NULL; + gboolean any_runnable = FALSE; + gboolean check_runnable = TRUE; + enum action_tasks task = clone_child_action(action); + enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); + const char *task_s = task2text(task); + + for (gIter = children; gIter != NULL; gIter = gIter->next) { + pe_action_t *child_action = NULL; + pe_resource_t *child = (pe_resource_t *) gIter->data; + + child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node); + pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id, + pe__node_name(node), child_action?child_action->uuid:"NA"); + if (child_action) { + enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); + + if (pcmk_is_set(flags, pe_action_optional) + && !pcmk_is_set(child_flags, pe_action_optional)) { + pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid, + child_action->uuid); + pe__clear_action_summary_flags(flags, action, pe_action_optional); + pe__clear_action_flags(action, pe_action_optional); + } + if (pcmk_is_set(child_flags, pe_action_runnable)) { + any_runnable = TRUE; + } + } + } + + if (check_runnable && any_runnable == FALSE) { + pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid); + pe__clear_action_summary_flags(flags, action, pe_action_runnable); + if (node == NULL) { + pe__clear_action_flags(action, pe_action_runnable); + } + } + + return flags; +} diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_scheduler.c similarity index 100% rename from lib/pacemaker/pcmk_sched_allocate.c rename to lib/pacemaker/pcmk_scheduler.c diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index 7a5e40e34e..e9bc23d92a 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -1,1325 +1,1342 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #define VARIANT_CLONE 1 #include "./variant.h" #ifdef PCMK__COMPAT_2_0 #define PROMOTED_INSTANCES RSC_ROLE_PROMOTED_LEGACY_S "s" #define UNPROMOTED_INSTANCES RSC_ROLE_UNPROMOTED_LEGACY_S "s" #else #define PROMOTED_INSTANCES RSC_ROLE_PROMOTED_S #define UNPROMOTED_INSTANCES RSC_ROLE_UNPROMOTED_S #endif +/*! + * \internal + * \brief Return the maximum number of clone instances allowed to be run + * + * \param[in] clone Clone or clone instance to check + * + * \return Maximum instances for \p clone + */ +int +pe__clone_max(const pe_resource_t *clone) +{ + const clone_variant_data_t *clone_data = NULL; + + get_clone_variant_data(clone_data, pe__const_top_resource(clone, false)); + return clone_data->clone_max; +} + /*! * \internal * \brief Return the maximum number of clone instances allowed to be promoted * * \param[in] clone Promotable clone or clone instance to check * * \return Maximum promoted instances for \p clone */ int pe__clone_promoted_max(const pe_resource_t *clone) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, pe__const_top_resource(clone, false)); return clone_data->promoted_max; } /*! * \internal * \brief Return the maximum number of clone instances allowed to be promoted * * \param[in] clone Promotable clone or clone instance to check * * \return Maximum promoted instances for \p clone */ int pe__clone_promoted_node_max(const pe_resource_t *clone) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, pe__const_top_resource(clone, false)); return clone_data->promoted_node_max; } static GList * sorted_hash_table_values(GHashTable *table) { GList *retval = NULL; GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, table); while (g_hash_table_iter_next(&iter, &key, &value)) { if (!g_list_find_custom(retval, value, (GCompareFunc) strcmp)) { retval = g_list_prepend(retval, (char *) value); } } retval = g_list_sort(retval, (GCompareFunc) strcmp); return retval; } static GList * nodes_with_status(GHashTable *table, const char *status) { GList *retval = NULL; GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, table); while (g_hash_table_iter_next(&iter, &key, &value)) { if (!strcmp((char *) value, status)) { retval = g_list_prepend(retval, key); } } retval = g_list_sort(retval, (GCompareFunc) pcmk__numeric_strcasecmp); return retval; } static GString * node_list_to_str(const GList *list) { GString *retval = NULL; for (const GList *iter = list; iter != NULL; iter = iter->next) { pcmk__add_word(&retval, 1024, (const char *) iter->data); } return retval; } static void clone_header(pcmk__output_t *out, int *rc, const pe_resource_t *rsc, clone_variant_data_t *clone_data) { GString *attrs = NULL; if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pcmk__add_separated_word(&attrs, 64, "promotable", ", "); } if (pcmk_is_set(rsc->flags, pe_rsc_unique)) { pcmk__add_separated_word(&attrs, 64, "unique", ", "); } if (pe__resource_is_disabled(rsc)) { pcmk__add_separated_word(&attrs, 64, "disabled", ", "); } if (pcmk_is_set(rsc->flags, pe_rsc_maintenance)) { pcmk__add_separated_word(&attrs, 64, "maintenance", ", "); } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pcmk__add_separated_word(&attrs, 64, "unmanaged", ", "); } if (attrs != NULL) { PCMK__OUTPUT_LIST_HEADER(out, FALSE, *rc, "Clone Set: %s [%s] (%s)", rsc->id, ID(clone_data->xml_obj_child), (const char *) attrs->str); g_string_free(attrs, TRUE); } else { PCMK__OUTPUT_LIST_HEADER(out, FALSE, *rc, "Clone Set: %s [%s]", rsc->id, ID(clone_data->xml_obj_child)) } } void pe__force_anon(const char *standard, pe_resource_t *rsc, const char *rid, pe_working_set_t *data_set) { if (pe_rsc_is_clone(rsc)) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_warn("Ignoring " XML_RSC_ATTR_UNIQUE " for %s because %s resources " "such as %s can be used only as anonymous clones", rsc->id, standard, rid); clone_data->clone_node_max = 1; clone_data->clone_max = QB_MIN(clone_data->clone_max, g_list_length(data_set->nodes)); } } pe_resource_t * find_clone_instance(const pe_resource_t *rsc, const char *sub_id) { char *child_id = NULL; pe_resource_t *child = NULL; const char *child_base = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); child_base = ID(clone_data->xml_obj_child); child_id = crm_strdup_printf("%s:%s", child_base, sub_id); child = pe_find_resource(rsc->children, child_id); free(child_id); return child; } pe_resource_t * pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set) { gboolean as_orphan = FALSE; char *inc_num = NULL; char *inc_max = NULL; pe_resource_t *child_rsc = NULL; xmlNode *child_copy = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); CRM_CHECK(clone_data->xml_obj_child != NULL, return FALSE); if (clone_data->total_clones >= clone_data->clone_max) { // If we've already used all available instances, this is an orphan as_orphan = TRUE; } // Allocate instance numbers in numerical order (starting at 0) inc_num = pcmk__itoa(clone_data->total_clones); inc_max = pcmk__itoa(clone_data->clone_max); child_copy = copy_xml(clone_data->xml_obj_child); crm_xml_add(child_copy, XML_RSC_ATTR_INCARNATION, inc_num); if (pe__unpack_resource(child_copy, &child_rsc, rsc, data_set) != pcmk_rc_ok) { goto bail; } /* child_rsc->globally_unique = rsc->globally_unique; */ CRM_ASSERT(child_rsc); clone_data->total_clones += 1; pe_rsc_trace(child_rsc, "Setting clone attributes for: %s", child_rsc->id); rsc->children = g_list_append(rsc->children, child_rsc); if (as_orphan) { pe__set_resource_flags_recursive(child_rsc, pe_rsc_orphan); } add_hash_param(child_rsc->meta, XML_RSC_ATTR_INCARNATION_MAX, inc_max); pe_rsc_trace(rsc, "Added %s instance %s", rsc->id, child_rsc->id); bail: free(inc_num); free(inc_max); return child_rsc; } gboolean clone_unpack(pe_resource_t * rsc, pe_working_set_t * data_set) { int lpc = 0; xmlNode *a_child = NULL; xmlNode *xml_obj = rsc->xml; clone_variant_data_t *clone_data = NULL; const char *max_clones = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_MAX); const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX); pe_rsc_trace(rsc, "Processing resource %s...", rsc->id); clone_data = calloc(1, sizeof(clone_variant_data_t)); rsc->variant_opaque = clone_data; if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { const char *promoted_max = NULL; const char *promoted_node_max = NULL; promoted_max = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROMOTED_MAX); if (promoted_max == NULL) { // @COMPAT deprecated since 2.0.0 promoted_max = g_hash_table_lookup(rsc->meta, PCMK_XA_PROMOTED_MAX_LEGACY); } promoted_node_max = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROMOTED_NODEMAX); if (promoted_node_max == NULL) { // @COMPAT deprecated since 2.0.0 promoted_node_max = g_hash_table_lookup(rsc->meta, PCMK_XA_PROMOTED_NODE_MAX_LEGACY); } // Use 1 as default but 0 for minimum and invalid if (promoted_max == NULL) { clone_data->promoted_max = 1; } else { pcmk__scan_min_int(promoted_max, &(clone_data->promoted_max), 0); } // Use 1 as default but 0 for minimum and invalid if (promoted_node_max == NULL) { clone_data->promoted_node_max = 1; } else { pcmk__scan_min_int(promoted_node_max, &(clone_data->promoted_node_max), 0); } } // Implied by calloc() /* clone_data->xml_obj_child = NULL; */ // Use 1 as default but 0 for minimum and invalid if (max_clones_node == NULL) { clone_data->clone_node_max = 1; } else { pcmk__scan_min_int(max_clones_node, &(clone_data->clone_node_max), 0); } /* Use number of nodes (but always at least 1, which is handy for crm_verify * for a CIB without nodes) as default, but 0 for minimum and invalid */ if (max_clones == NULL) { clone_data->clone_max = QB_MAX(1, g_list_length(data_set->nodes)); } else { pcmk__scan_min_int(max_clones, &(clone_data->clone_max), 0); } if (crm_is_true(g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_ORDERED))) { clone_data->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Clone", rsc->id, clone_data->flags, pe__clone_ordered, "pe__clone_ordered"); } if ((rsc->flags & pe_rsc_unique) == 0 && clone_data->clone_node_max > 1) { pcmk__config_err("Ignoring " XML_RSC_ATTR_PROMOTED_MAX " for %s " "because anonymous clones support only one instance " "per node", rsc->id); clone_data->clone_node_max = 1; } pe_rsc_trace(rsc, "Options for %s", rsc->id); pe_rsc_trace(rsc, "\tClone max: %d", clone_data->clone_max); pe_rsc_trace(rsc, "\tClone node max: %d", clone_data->clone_node_max); pe_rsc_trace(rsc, "\tClone is unique: %s", pe__rsc_bool_str(rsc, pe_rsc_unique)); pe_rsc_trace(rsc, "\tClone is promotable: %s", pe__rsc_bool_str(rsc, pe_rsc_promotable)); // Clones may contain a single group or primitive for (a_child = pcmk__xe_first_child(xml_obj); a_child != NULL; a_child = pcmk__xe_next(a_child)) { if (pcmk__str_any_of((const char *)a_child->name, XML_CIB_TAG_RESOURCE, XML_CIB_TAG_GROUP, NULL)) { clone_data->xml_obj_child = a_child; break; } } if (clone_data->xml_obj_child == NULL) { pcmk__config_err("%s has nothing to clone", rsc->id); return FALSE; } /* * Make clones ever so slightly sticky by default * * This helps ensure clone instances are not shuffled around the cluster * for no benefit in situations when pre-allocation is not appropriate */ if (g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_STICKINESS) == NULL) { add_hash_param(rsc->meta, XML_RSC_ATTR_STICKINESS, "1"); } /* This ensures that the globally-unique value always exists for children to * inherit when being unpacked, as well as in resource agents' environment. */ add_hash_param(rsc->meta, XML_RSC_ATTR_UNIQUE, pe__rsc_bool_str(rsc, pe_rsc_unique)); if (clone_data->clone_max <= 0) { /* Create one child instance so that unpack_find_resource() will hook up * any orphans up to the parent correctly. */ if (pe__create_clone_child(rsc, data_set) == NULL) { return FALSE; } } else { // Create a child instance for each available instance number for (lpc = 0; lpc < clone_data->clone_max; lpc++) { if (pe__create_clone_child(rsc, data_set) == NULL) { return FALSE; } } } pe_rsc_trace(rsc, "Added %d children to resource %s...", clone_data->clone_max, rsc->id); return TRUE; } gboolean clone_active(pe_resource_t * rsc, gboolean all) { GList *gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; gboolean child_active = child_rsc->fns->active(child_rsc, all); if (all == FALSE && child_active) { return TRUE; } else if (all && child_active == FALSE) { return FALSE; } } if (all) { return TRUE; } else { return FALSE; } } /*! * \internal * \deprecated This function will be removed in a future release */ static void short_print(const char *list, const char *prefix, const char *type, const char *suffix, long options, void *print_data) { if(suffix == NULL) { suffix = ""; } if (!pcmk__str_empty(list)) { if (options & pe_print_html) { status_print("
  • "); } status_print("%s%s: [ %s ]%s", prefix, type, list, suffix); if (options & pe_print_html) { status_print("
  • \n"); } else if (options & pe_print_suppres_nl) { /* nothing */ } else if ((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print("\n"); } } } static const char * configured_role_str(pe_resource_t * rsc) { const char *target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if ((target_role == NULL) && rsc->children && rsc->children->data) { target_role = g_hash_table_lookup(((pe_resource_t*)rsc->children->data)->meta, XML_RSC_ATTR_TARGET_ROLE); } return target_role; } static enum rsc_role_e configured_role(pe_resource_t * rsc) { const char *target_role = configured_role_str(rsc); if (target_role) { return text2role(target_role); } return RSC_ROLE_UNKNOWN; } /*! * \internal * \deprecated This function will be removed in a future release */ static void clone_print_xml(pe_resource_t *rsc, const char *pre_text, long options, void *print_data) { char *child_text = crm_strdup_printf("%s ", pre_text); const char *target_role = configured_role_str(rsc); GList *gIter = rsc->children; status_print("%sid); status_print("multi_state=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_promotable)); status_print("unique=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_unique)); status_print("managed=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_managed)); status_print("failed=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_failed)); status_print("failure_ignored=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_failure_ignored)); if (target_role) { status_print("target_role=\"%s\" ", target_role); } status_print(">\n"); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->fns->print(child_rsc, child_text, options, print_data); } status_print("%s\n", pre_text); free(child_text); } bool is_set_recursive(const pe_resource_t *rsc, long long flag, bool any) { GList *gIter; bool all = !any; if (pcmk_is_set(rsc->flags, flag)) { if(any) { return TRUE; } } else if(all) { return FALSE; } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { if(is_set_recursive(gIter->data, flag, any)) { if(any) { return TRUE; } } else if(all) { return FALSE; } } if(all) { return TRUE; } return FALSE; } /*! * \internal * \deprecated This function will be removed in a future release */ void clone_print(pe_resource_t *rsc, const char *pre_text, long options, void *print_data) { GString *list_text = NULL; char *child_text = NULL; GString *stopped_list = NULL; GList *promoted_list = NULL; GList *started_list = NULL; GList *gIter = rsc->children; clone_variant_data_t *clone_data = NULL; int active_instances = 0; if (pre_text == NULL) { pre_text = " "; } if (options & pe_print_xml) { clone_print_xml(rsc, pre_text, options, print_data); return; } get_clone_variant_data(clone_data, rsc); child_text = crm_strdup_printf("%s ", pre_text); status_print("%sClone Set: %s [%s]%s%s%s", pre_text ? pre_text : "", rsc->id, ID(clone_data->xml_obj_child), pcmk_is_set(rsc->flags, pe_rsc_promotable)? " (promotable)" : "", pcmk_is_set(rsc->flags, pe_rsc_unique)? " (unique)" : "", pcmk_is_set(rsc->flags, pe_rsc_managed)? "" : " (unmanaged)"); if (options & pe_print_html) { status_print("\n
      \n"); } else if ((options & pe_print_log) == 0) { status_print("\n"); } for (; gIter != NULL; gIter = gIter->next) { gboolean print_full = FALSE; pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; gboolean partially_active = child_rsc->fns->active(child_rsc, FALSE); if (options & pe_print_clone_details) { print_full = TRUE; } if (pcmk_is_set(rsc->flags, pe_rsc_unique)) { // Print individual instance when unique (except stopped orphans) if (partially_active || !pcmk_is_set(rsc->flags, pe_rsc_orphan)) { print_full = TRUE; } // Everything else in this block is for anonymous clones } else if (pcmk_is_set(options, pe_print_pending) && (child_rsc->pending_task != NULL) && strcmp(child_rsc->pending_task, "probe")) { // Print individual instance when non-probe action is pending print_full = TRUE; } else if (partially_active == FALSE) { // List stopped instances when requested (except orphans) if (!pcmk_is_set(child_rsc->flags, pe_rsc_orphan) && !pcmk_is_set(options, pe_print_clone_active)) { pcmk__add_word(&stopped_list, 1024, child_rsc->id); } } else if (is_set_recursive(child_rsc, pe_rsc_orphan, TRUE) || is_set_recursive(child_rsc, pe_rsc_managed, FALSE) == FALSE || is_set_recursive(child_rsc, pe_rsc_failed, TRUE)) { // Print individual instance when active orphaned/unmanaged/failed print_full = TRUE; } else if (child_rsc->fns->active(child_rsc, TRUE)) { // Instance of fully active anonymous clone pe_node_t *location = child_rsc->fns->location(child_rsc, NULL, TRUE); if (location) { // Instance is active on a single node enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, TRUE); if (location->details->online == FALSE && location->details->unclean) { print_full = TRUE; } else if (a_role > RSC_ROLE_UNPROMOTED) { promoted_list = g_list_append(promoted_list, location); } else { started_list = g_list_append(started_list, location); } } else { /* uncolocated group - bleh */ print_full = TRUE; } } else { // Instance of partially active anonymous clone print_full = TRUE; } if (print_full) { if (options & pe_print_html) { status_print("
    • \n"); } child_rsc->fns->print(child_rsc, child_text, options, print_data); if (options & pe_print_html) { status_print("
    • \n"); } } } /* Promoted */ promoted_list = g_list_sort(promoted_list, pe__cmp_node_name); for (gIter = promoted_list; gIter; gIter = gIter->next) { pe_node_t *host = gIter->data; pcmk__add_word(&list_text, 1024, host->details->uname); active_instances++; } if (list_text != NULL) { short_print((const char *) list_text->str, child_text, PROMOTED_INSTANCES, NULL, options, print_data); g_string_truncate(list_text, 0); } g_list_free(promoted_list); /* Started/Unpromoted */ started_list = g_list_sort(started_list, pe__cmp_node_name); for (gIter = started_list; gIter; gIter = gIter->next) { pe_node_t *host = gIter->data; pcmk__add_word(&list_text, 1024, host->details->uname); active_instances++; } if (list_text != NULL) { if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { enum rsc_role_e role = configured_role(rsc); if (role == RSC_ROLE_UNPROMOTED) { short_print((const char *) list_text->str, child_text, UNPROMOTED_INSTANCES " (target-role)", NULL, options, print_data); } else { short_print((const char *) list_text->str, child_text, UNPROMOTED_INSTANCES, NULL, options, print_data); } } else { short_print((const char *) list_text->str, child_text, "Started", NULL, options, print_data); } } g_list_free(started_list); if (!pcmk_is_set(options, pe_print_clone_active)) { const char *state = "Stopped"; enum rsc_role_e role = configured_role(rsc); if (role == RSC_ROLE_STOPPED) { state = "Stopped (disabled)"; } if (!pcmk_is_set(rsc->flags, pe_rsc_unique) && (clone_data->clone_max > active_instances)) { GList *nIter; GList *list = g_hash_table_get_values(rsc->allowed_nodes); /* Custom stopped list for non-unique clones */ if (stopped_list != NULL) { g_string_truncate(stopped_list, 0); } if (list == NULL) { /* Clusters with symmetrical=false haven't calculated allowed_nodes yet * If we've not probed for them yet, the Stopped list will be empty */ list = g_hash_table_get_values(rsc->known_on); } list = g_list_sort(list, pe__cmp_node_name); for (nIter = list; nIter != NULL; nIter = nIter->next) { pe_node_t *node = (pe_node_t *)nIter->data; if (pe_find_node(rsc->running_on, node->details->uname) == NULL) { pcmk__add_word(&stopped_list, 1024, node->details->uname); } } g_list_free(list); } if (stopped_list != NULL) { short_print((const char *) stopped_list->str, child_text, state, NULL, options, print_data); } } if (options & pe_print_html) { status_print("
    \n"); } if (list_text != NULL) { g_string_free(list_text, TRUE); } if (stopped_list != NULL) { g_string_free(stopped_list, TRUE); } free(child_text); } PCMK__OUTPUT_ARGS("clone", "uint32_t", "pe_resource_t *", "GList *", "GList *") int pe__clone_xml(pcmk__output_t *out, va_list args) { uint32_t show_opts = va_arg(args, uint32_t); pe_resource_t *rsc = va_arg(args, pe_resource_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); GList *gIter = rsc->children; GList *all = NULL; int rc = pcmk_rc_no_output; gboolean printed_header = FALSE; gboolean print_everything = TRUE; if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) { return rc; } print_everything = pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) || (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches)); all = g_list_prepend(all, (gpointer) "*"); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if (pcmk__rsc_filtered_by_node(child_rsc, only_node)) { continue; } if (child_rsc->fns->is_filtered(child_rsc, only_rsc, print_everything)) { continue; } if (!printed_header) { printed_header = TRUE; rc = pe__name_and_nvpairs_xml(out, true, "clone", 9, "id", rsc->id, "multi_state", pe__rsc_bool_str(rsc, pe_rsc_promotable), "unique", pe__rsc_bool_str(rsc, pe_rsc_unique), "maintenance", pe__rsc_bool_str(rsc, pe_rsc_maintenance), "managed", pe__rsc_bool_str(rsc, pe_rsc_managed), "disabled", pcmk__btoa(pe__resource_is_disabled(rsc)), "failed", pe__rsc_bool_str(rsc, pe_rsc_failed), "failure_ignored", pe__rsc_bool_str(rsc, pe_rsc_failure_ignored), "target_role", configured_role_str(rsc)); CRM_ASSERT(rc == pcmk_rc_ok); } out->message(out, crm_map_element_name(child_rsc->xml), show_opts, child_rsc, only_node, all); } if (printed_header) { pcmk__output_xml_pop_parent(out); } g_list_free(all); return rc; } PCMK__OUTPUT_ARGS("clone", "uint32_t", "pe_resource_t *", "GList *", "GList *") int pe__clone_default(pcmk__output_t *out, va_list args) { uint32_t show_opts = va_arg(args, uint32_t); pe_resource_t *rsc = va_arg(args, pe_resource_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); GHashTable *stopped = NULL; GString *list_text = NULL; GList *promoted_list = NULL; GList *started_list = NULL; GList *gIter = rsc->children; clone_variant_data_t *clone_data = NULL; int active_instances = 0; int rc = pcmk_rc_no_output; gboolean print_everything = TRUE; get_clone_variant_data(clone_data, rsc); if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) { return rc; } print_everything = pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) || (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches)); for (; gIter != NULL; gIter = gIter->next) { gboolean print_full = FALSE; pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; gboolean partially_active = child_rsc->fns->active(child_rsc, FALSE); if (pcmk__rsc_filtered_by_node(child_rsc, only_node)) { continue; } if (child_rsc->fns->is_filtered(child_rsc, only_rsc, print_everything)) { continue; } if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) { print_full = TRUE; } if (pcmk_is_set(rsc->flags, pe_rsc_unique)) { // Print individual instance when unique (except stopped orphans) if (partially_active || !pcmk_is_set(rsc->flags, pe_rsc_orphan)) { print_full = TRUE; } // Everything else in this block is for anonymous clones } else if (pcmk_is_set(show_opts, pcmk_show_pending) && (child_rsc->pending_task != NULL) && strcmp(child_rsc->pending_task, "probe")) { // Print individual instance when non-probe action is pending print_full = TRUE; } else if (partially_active == FALSE) { // List stopped instances when requested (except orphans) if (!pcmk_is_set(child_rsc->flags, pe_rsc_orphan) && !pcmk_is_set(show_opts, pcmk_show_clone_detail) && pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { if (stopped == NULL) { stopped = pcmk__strkey_table(free, free); } g_hash_table_insert(stopped, strdup(child_rsc->id), strdup("Stopped")); } } else if (is_set_recursive(child_rsc, pe_rsc_orphan, TRUE) || is_set_recursive(child_rsc, pe_rsc_managed, FALSE) == FALSE || is_set_recursive(child_rsc, pe_rsc_failed, TRUE)) { // Print individual instance when active orphaned/unmanaged/failed print_full = TRUE; } else if (child_rsc->fns->active(child_rsc, TRUE)) { // Instance of fully active anonymous clone pe_node_t *location = child_rsc->fns->location(child_rsc, NULL, TRUE); if (location) { // Instance is active on a single node enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, TRUE); if (location->details->online == FALSE && location->details->unclean) { print_full = TRUE; } else if (a_role > RSC_ROLE_UNPROMOTED) { promoted_list = g_list_append(promoted_list, location); } else { started_list = g_list_append(started_list, location); } } else { /* uncolocated group - bleh */ print_full = TRUE; } } else { // Instance of partially active anonymous clone print_full = TRUE; } if (print_full) { GList *all = NULL; clone_header(out, &rc, rsc, clone_data); /* Print every resource that's a child of this clone. */ all = g_list_prepend(all, (gpointer) "*"); out->message(out, crm_map_element_name(child_rsc->xml), show_opts, child_rsc, only_node, all); g_list_free(all); } } if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) { PCMK__OUTPUT_LIST_FOOTER(out, rc); return pcmk_rc_ok; } /* Promoted */ promoted_list = g_list_sort(promoted_list, pe__cmp_node_name); for (gIter = promoted_list; gIter; gIter = gIter->next) { pe_node_t *host = gIter->data; if (!pcmk__str_in_list(host->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } pcmk__add_word(&list_text, 1024, host->details->uname); active_instances++; } g_list_free(promoted_list); if ((list_text != NULL) && (list_text->len > 0)) { clone_header(out, &rc, rsc, clone_data); out->list_item(out, NULL, PROMOTED_INSTANCES ": [ %s ]", (const char *) list_text->str); g_string_truncate(list_text, 0); } /* Started/Unpromoted */ started_list = g_list_sort(started_list, pe__cmp_node_name); for (gIter = started_list; gIter; gIter = gIter->next) { pe_node_t *host = gIter->data; if (!pcmk__str_in_list(host->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } pcmk__add_word(&list_text, 1024, host->details->uname); active_instances++; } g_list_free(started_list); if ((list_text != NULL) && (list_text->len > 0)) { clone_header(out, &rc, rsc, clone_data); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { enum rsc_role_e role = configured_role(rsc); if (role == RSC_ROLE_UNPROMOTED) { out->list_item(out, NULL, UNPROMOTED_INSTANCES " (target-role): [ %s ]", (const char *) list_text->str); } else { out->list_item(out, NULL, UNPROMOTED_INSTANCES ": [ %s ]", (const char *) list_text->str); } } else { out->list_item(out, NULL, "Started: [ %s ]", (const char *) list_text->str); } } if (list_text != NULL) { g_string_free(list_text, TRUE); } if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { if (!pcmk_is_set(rsc->flags, pe_rsc_unique) && (clone_data->clone_max > active_instances)) { GList *nIter; GList *list = g_hash_table_get_values(rsc->allowed_nodes); /* Custom stopped table for non-unique clones */ if (stopped != NULL) { g_hash_table_destroy(stopped); stopped = NULL; } if (list == NULL) { /* Clusters with symmetrical=false haven't calculated allowed_nodes yet * If we've not probed for them yet, the Stopped list will be empty */ list = g_hash_table_get_values(rsc->known_on); } list = g_list_sort(list, pe__cmp_node_name); for (nIter = list; nIter != NULL; nIter = nIter->next) { pe_node_t *node = (pe_node_t *)nIter->data; if (pe_find_node(rsc->running_on, node->details->uname) == NULL && pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { xmlNode *probe_op = pe__failed_probe_for_rsc(rsc, node->details->uname); const char *state = "Stopped"; if (configured_role(rsc) == RSC_ROLE_STOPPED) { state = "Stopped (disabled)"; } if (stopped == NULL) { stopped = pcmk__strkey_table(free, free); } if (probe_op != NULL) { int rc; pcmk__scan_min_int(crm_element_value(probe_op, XML_LRM_ATTR_RC), &rc, 0); g_hash_table_insert(stopped, strdup(node->details->uname), crm_strdup_printf("Stopped (%s)", services_ocf_exitcode_str(rc))); } else { g_hash_table_insert(stopped, strdup(node->details->uname), strdup(state)); } } } g_list_free(list); } if (stopped != NULL) { GList *list = sorted_hash_table_values(stopped); clone_header(out, &rc, rsc, clone_data); for (GList *status_iter = list; status_iter != NULL; status_iter = status_iter->next) { const char *status = status_iter->data; GList *nodes = nodes_with_status(stopped, status); GString *nodes_str = node_list_to_str(nodes); if (nodes_str != NULL) { if (nodes_str->len > 0) { out->list_item(out, NULL, "%s: [ %s ]", status, (const char *) nodes_str->str); } g_string_free(nodes_str, TRUE); } g_list_free(nodes); } g_list_free(list); g_hash_table_destroy(stopped); /* If there are no instances of this clone (perhaps because there are no * nodes configured), simply output the clone header by itself. This can * come up in PCS testing. */ } else if (active_instances == 0) { clone_header(out, &rc, rsc, clone_data); PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } void clone_free(pe_resource_t * rsc) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_trace(rsc, "Freeing %s", rsc->id); for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; CRM_ASSERT(child_rsc); pe_rsc_trace(child_rsc, "Freeing child %s", child_rsc->id); free_xml(child_rsc->xml); child_rsc->xml = NULL; /* There could be a saved unexpanded xml */ free_xml(child_rsc->orig_xml); child_rsc->orig_xml = NULL; child_rsc->fns->free(child_rsc); } g_list_free(rsc->children); if (clone_data) { CRM_ASSERT(clone_data->demote_notify == NULL); CRM_ASSERT(clone_data->stop_notify == NULL); CRM_ASSERT(clone_data->start_notify == NULL); CRM_ASSERT(clone_data->promote_notify == NULL); } common_free(rsc); } enum rsc_role_e clone_resource_state(const pe_resource_t * rsc, gboolean current) { enum rsc_role_e clone_role = RSC_ROLE_UNKNOWN; GList *gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, current); if (a_role > clone_role) { clone_role = a_role; } } pe_rsc_trace(rsc, "%s role: %s", rsc->id, role2text(clone_role)); return clone_role; } /*! * \internal * \brief Check whether a clone has an instance for every node * * \param[in] rsc Clone to check * \param[in] data_set Cluster state */ bool pe__is_universal_clone(const pe_resource_t *rsc, const pe_working_set_t *data_set) { if (pe_rsc_is_clone(rsc)) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (clone_data->clone_max == g_list_length(data_set->nodes)) { return TRUE; } } return FALSE; } gboolean pe__clone_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent) { gboolean passes = FALSE; clone_variant_data_t *clone_data = NULL; if (pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches)) { passes = TRUE; } else { get_clone_variant_data(clone_data, rsc); passes = pcmk__str_in_list(ID(clone_data->xml_obj_child), only_rsc, pcmk__str_star_matches); if (!passes) { for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) { const pe_resource_t *child_rsc = NULL; child_rsc = (const pe_resource_t *) iter->data; if (!child_rsc->fns->is_filtered(child_rsc, only_rsc, FALSE)) { passes = TRUE; break; } } } } return !passes; } const char * pe__clone_child_id(const pe_resource_t *rsc) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); return ID(clone_data->xml_obj_child); } /*! * \internal * \brief Check whether a clone is ordered * * \param[in] clone Clone resource to check * * \return true if clone is ordered, otherwise false */ bool pe__clone_is_ordered(const pe_resource_t *clone) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, clone); return pcmk_is_set(clone_data->flags, pe__clone_ordered); } /*! * \internal * \brief Set a clone flag * * \param[in,out] clone Clone resource to set flag for * \param[in] flag Clone flag to set * * \return Standard Pacemaker return code (either pcmk_rc_ok if flag was not * already set or pcmk_rc_already if it was) */ int pe__set_clone_flag(pe_resource_t *clone, enum pe__clone_flags flag) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, clone); if (pcmk_is_set(clone_data->flags, flag)) { return pcmk_rc_already; } clone_data->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Clone", clone->id, clone_data->flags, flag, "flag"); return pcmk_rc_ok; } /*! * \internal * \brief Create pseudo-actions needed for promotable clones * * \param[in,out] clone Promotable clone to create actions for * \param[in] any_promoting Whether any instances will be promoted * \param[in] any_demoting Whether any instance will be demoted */ void pe__create_promotable_pseudo_ops(pe_resource_t *clone, bool any_promoting, bool any_demoting) { pe_action_t *action = NULL; pe_action_t *action_complete = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, clone); // Create a "promote" action for the clone itself action = pe__new_rsc_pseudo_action(clone, RSC_PROMOTE, !any_promoting, true); // Create a "promoted" action for when all promotions are done action_complete = pe__new_rsc_pseudo_action(clone, RSC_PROMOTED, !any_promoting, true); action_complete->priority = INFINITY; // Create notification pseudo-actions for promotion if (clone_data->promote_notify == NULL) { clone_data->promote_notify = pe__clone_notif_pseudo_ops(clone, RSC_PROMOTE, action, action_complete); } // Create a "demote" action for the clone itself action = pe__new_rsc_pseudo_action(clone, RSC_DEMOTE, !any_demoting, true); // Create a "demoted" action for when all demotions are done action_complete = pe__new_rsc_pseudo_action(clone, RSC_DEMOTED, !any_demoting, true); action_complete->priority = INFINITY; // Create notification pseudo-actions for demotion if (clone_data->demote_notify == NULL) { clone_data->demote_notify = pe__clone_notif_pseudo_ops(clone, RSC_DEMOTE, action, action_complete); if (clone_data->promote_notify != NULL) { order_actions(clone_data->stop_notify->post_done, clone_data->promote_notify->pre, pe_order_optional); order_actions(clone_data->start_notify->post_done, clone_data->promote_notify->pre, pe_order_optional); order_actions(clone_data->demote_notify->post_done, clone_data->promote_notify->pre, pe_order_optional); order_actions(clone_data->demote_notify->post_done, clone_data->start_notify->pre, pe_order_optional); order_actions(clone_data->demote_notify->post_done, clone_data->stop_notify->pre, pe_order_optional); } } }