diff --git a/include/crm/common/roles_internal.h b/include/crm/common/roles_internal.h index c544519d10..514e300201 100644 --- a/include/crm/common/roles_internal.h +++ b/include/crm/common/roles_internal.h @@ -1,24 +1,25 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_ROLES_INTERNAL__H # define PCMK__CRM_COMMON_ROLES_INTERNAL__H #ifdef __cplusplus extern "C" { #endif // String equivalents of enum rsc_role_e #define PCMK__ROLE_UNKNOWN "Unknown" +#define PCMK__ROLE_STOPPED "Stopped" #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_ROLES_INTERNAL__H diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h index 85c734681a..71ac7d0006 100644 --- a/include/crm/pengine/common.h +++ b/include/crm/pengine/common.h @@ -1,183 +1,182 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_PENGINE_COMMON__H # define PCMK__CRM_PENGINE_COMMON__H # include # include # include # include #ifdef __cplusplus extern "C" { #endif extern gboolean was_processing_error; extern gboolean was_processing_warning; /* The order is (partially) significant here; the values from action_fail_ignore * through action_fail_fence are in order of increasing severity. * * @COMPAT The values should be ordered and numbered per the "TODO" comments * below, so all values are in order of severity and there is room for * future additions, but that would break API compatibility. * @TODO For now, we just use a function to compare the values specially, but * at the next compatibility break, we should arrange things properly. */ enum action_fail_response { action_fail_ignore, // @TODO = 10 // @TODO action_fail_demote = 20, action_fail_recover, // @TODO = 30 // @TODO action_fail_reset_remote = 40, // @TODO action_fail_restart_container = 50, action_fail_migrate, // @TODO = 60 action_fail_block, // @TODO = 70 action_fail_stop, // @TODO = 80 action_fail_standby, // @TODO = 90 action_fail_fence, // @TODO = 100 // @COMPAT Values below here are out of order for API compatibility action_fail_restart_container, /* This is reserved for internal use for remote node connection resources. * Fence the remote node if stonith is enabled, otherwise attempt to recover * the connection resource. This allows us to specify types of connection * resource failures that should result in fencing the remote node * (for example, recurring monitor failures). */ action_fail_reset_remote, action_fail_demote, }; /* the "done" action must be the "pre" action +1 */ enum action_tasks { no_action, monitor_rsc, stop_rsc, stopped_rsc, start_rsc, started_rsc, action_notify, action_notified, action_promote, action_promoted, action_demote, action_demoted, shutdown_crm, stonith_node }; enum rsc_start_requirement { rsc_req_nothing, /* Allowed by custom_action() */ rsc_req_quorum, /* Enforced by custom_action() */ rsc_req_stonith /* Enforced by native_start_constraints() */ }; -# define RSC_ROLE_STOPPED_S "Stopped" # define RSC_ROLE_STARTED_S "Started" # define RSC_ROLE_UNPROMOTED_S "Unpromoted" # define RSC_ROLE_PROMOTED_S "Promoted" # define RSC_ROLE_UNPROMOTED_LEGACY_S "Slave" # define RSC_ROLE_PROMOTED_LEGACY_S "Master" //! Deprecated enum pe_print_options { pe_print_log = (1 << 0), pe_print_html = (1 << 1), pe_print_ncurses = (1 << 2), pe_print_printf = (1 << 3), pe_print_dev = (1 << 4), //! Ignored pe_print_details = (1 << 5), //! Ignored pe_print_max_details = (1 << 6), //! Ignored pe_print_rsconly = (1 << 7), pe_print_ops = (1 << 8), pe_print_suppres_nl = (1 << 9), pe_print_xml = (1 << 10), pe_print_brief = (1 << 11), pe_print_pending = (1 << 12), pe_print_clone_details = (1 << 13), pe_print_clone_active = (1 << 14), // Print clone instances only if active pe_print_implicit = (1 << 15) // Print implicitly created resources }; const char *task2text(enum action_tasks task); enum action_tasks text2task(const char *task); enum rsc_role_e text2role(const char *role); const char *role2text(enum rsc_role_e role); const char *fail2text(enum action_fail_response fail); const char *pe_pref(GHashTable * options, const char *name); /*! * \brief Get readable description of a recovery type * * \param[in] type Recovery type * * \return Static string describing \p type */ static inline const char * recovery2text(enum rsc_recovery_type type) { switch (type) { case pcmk_multiply_active_stop: return "shutting it down"; case pcmk_multiply_active_restart: return "attempting recovery"; case pcmk_multiply_active_block: return "waiting for an administrator"; case pcmk_multiply_active_unexpected: return "stopping unexpected instances"; } return "Unknown"; } typedef struct pe_re_match_data { char *string; int nregs; regmatch_t *pmatch; } pe_re_match_data_t; typedef struct pe_match_data { pe_re_match_data_t *re; GHashTable *params; GHashTable *meta; } pe_match_data_t; typedef struct pe_rsc_eval_data { const char *standard; const char *provider; const char *agent; } pe_rsc_eval_data_t; typedef struct pe_op_eval_data { const char *op_name; guint interval; } pe_op_eval_data_t; typedef struct pe_rule_eval_data { GHashTable *node_hash; // Only used with g_hash_table_lookup() enum rsc_role_e role; crm_time_t *now; // @COMPAT could be const pe_match_data_t *match_data; // @COMPAT could be const pe_rsc_eval_data_t *rsc_data; // @COMPAT could be const pe_op_eval_data_t *op_data; // @COMPAT could be const } pe_rule_eval_data_t; #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) #include #endif #ifdef __cplusplus } #endif #endif diff --git a/include/crm/pengine/common_compat.h b/include/crm/pengine/common_compat.h index 0e78dd6fd4..15b3cb0dad 100644 --- a/include/crm/pengine/common_compat.h +++ b/include/crm/pengine/common_compat.h @@ -1,45 +1,48 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_PENGINE_COMMON_COMPAT__H # define PCMK__CRM_PENGINE_COMMON_COMPAT__H #include #ifdef __cplusplus extern "C" { #endif /** * \file * \brief Deprecated Pacemaker scheduler utilities * \ingroup pengine * \deprecated Do not include this header directly. The utilities in this * header, and the header itself, will be removed in a future * release. */ //! \deprecated Use (pcmk_role_promoted + 1) instead #define RSC_ROLE_MAX (pcmk_role_promoted + 1) //! \deprecated Use role2text(pcmk_role_unknown) instead #define RSC_ROLE_UNKNOWN_S role2text(pcmk_role_unknown) +//! \deprecated Use role2text(pcmk_role_stopped) instead +#define RSC_ROLE_STOPPED_S role2text(pcmk_role_stopped) + //! \deprecated Use RSC_ROLE_UNPROMOTED_LEGACY_S instead # define RSC_ROLE_SLAVE_S RSC_ROLE_UNPROMOTED_LEGACY_S //! \deprecated Use RSC_ROLE_PROMOTED_LEGACY_S instead # define RSC_ROLE_MASTER_S RSC_ROLE_PROMOTED_LEGACY_S #ifdef __cplusplus } #endif #endif // PCMK__CRM_PENGINE_COMMON_COMPAT__H diff --git a/lib/pacemaker/pcmk_sched_recurring.c b/lib/pacemaker/pcmk_sched_recurring.c index d480a22fb0..e0ef7805c3 100644 --- a/lib/pacemaker/pcmk_sched_recurring.c +++ b/lib/pacemaker/pcmk_sched_recurring.c @@ -1,717 +1,718 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include +#include #include #include "libpacemaker_private.h" // Information parsed from an operation history entry in the CIB struct op_history { // XML attributes const char *id; // ID of history entry const char *name; // Action name // Parsed information char *key; // Operation key for action enum rsc_role_e role; // Action role (or pcmk_role_unknown for default) guint interval_ms; // Action interval }; /*! * \internal * \brief Parse an interval from XML * * \param[in] xml XML containing an interval attribute * * \return Interval parsed from XML (or 0 as default) */ static guint xe_interval(const xmlNode *xml) { return crm_parse_interval_spec(crm_element_value(xml, XML_LRM_ATTR_INTERVAL)); } /*! * \internal * \brief Check whether an operation exists multiple times in resource history * * \param[in] rsc Resource with history to search * \param[in] name Name of action to search for * \param[in] interval_ms Interval (in milliseconds) of action to search for * * \return true if an operation with \p name and \p interval_ms exists more than * once in the operation history of \p rsc, otherwise false */ static bool is_op_dup(const pe_resource_t *rsc, const char *name, guint interval_ms) { const char *id = NULL; for (xmlNode *op = first_named_child(rsc->ops_xml, "op"); op != NULL; op = crm_next_same_xml(op)) { // Check whether action name and interval match if (!pcmk__str_eq(crm_element_value(op, "name"), name, pcmk__str_none) || (xe_interval(op) != interval_ms)) { continue; } if (ID(op) == NULL) { continue; // Shouldn't be possible } if (id == NULL) { id = ID(op); // First matching op } else { pcmk__config_err("Operation %s is duplicate of %s (do not use " "same name and interval combination more " "than once per resource)", ID(op), id); return true; } } return false; } /*! * \internal * \brief Check whether an action name is one that can be recurring * * \param[in] name Action name to check * * \return true if \p name is an action known to be unsuitable as a recurring * operation, otherwise false * * \note Pacemaker's current philosophy is to allow users to configure recurring * operations except for a short list of actions known not to be suitable * for that (as opposed to allowing only actions known to be suitable, * which includes only monitor). Among other things, this approach allows * users to define their own custom operations and make them recurring, * though that use case is not well tested. */ static bool op_cannot_recur(const char *name) { return pcmk__str_any_of(name, PCMK_ACTION_STOP, PCMK_ACTION_START, PCMK_ACTION_DEMOTE, PCMK_ACTION_PROMOTE, PCMK_ACTION_RELOAD_AGENT, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL); } /*! * \internal * \brief Check whether a resource history entry is for a recurring action * * \param[in] rsc Resource that history entry is for * \param[in] xml XML of resource history entry to check * \param[out] op Where to store parsed info if recurring * * \return true if \p xml is for a recurring action, otherwise false */ static bool is_recurring_history(const pe_resource_t *rsc, const xmlNode *xml, struct op_history *op) { const char *role = NULL; op->interval_ms = xe_interval(xml); if (op->interval_ms == 0) { return false; // Not recurring } op->id = ID(xml); if (pcmk__str_empty(op->id)) { pcmk__config_err("Ignoring resource history entry without ID"); return false; // Shouldn't be possible (unless CIB was manually edited) } op->name = crm_element_value(xml, "name"); if (op_cannot_recur(op->name)) { pcmk__config_err("Ignoring %s because %s action cannot be recurring", op->id, pcmk__s(op->name, "unnamed")); return false; } // There should only be one recurring operation per action/interval if (is_op_dup(rsc, op->name, op->interval_ms)) { return false; } // Ensure role is valid if specified role = crm_element_value(xml, "role"); if (role == NULL) { op->role = pcmk_role_unknown; } else { op->role = text2role(role); if (op->role == pcmk_role_unknown) { pcmk__config_err("Ignoring %s because %s is not a valid role", op->id, role); } } // Disabled resources don't get monitored op->key = pcmk__op_key(rsc->id, op->name, op->interval_ms); if (find_rsc_op_entry(rsc, op->key) == NULL) { crm_trace("Not creating recurring action %s for disabled resource %s", op->id, rsc->id); free(op->key); return false; } return true; } /*! * \internal * \brief Check whether a recurring action for an active role should be optional * * \param[in] rsc Resource that recurring action is for * \param[in] node Node that \p rsc will be active on (if any) * \param[in] key Operation key for recurring action to check * \param[in,out] start Start action for \p rsc * * \return true if recurring action should be optional, otherwise false */ static bool active_recurring_should_be_optional(const pe_resource_t *rsc, const pe_node_t *node, const char *key, pe_action_t *start) { GList *possible_matches = NULL; if (node == NULL) { // Should only be possible if unmanaged and stopped pe_rsc_trace(rsc, "%s will be mandatory because resource is unmanaged", key); return false; } if (!pcmk_is_set(rsc->cmds->action_flags(start, NULL), pe_action_optional)) { pe_rsc_trace(rsc, "%s will be mandatory because %s is", key, start->uuid); return false; } possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches == NULL) { pe_rsc_trace(rsc, "%s will be mandatory because it is not active on %s", key, pe__node_name(node)); return false; } for (const GList *iter = possible_matches; iter != NULL; iter = iter->next) { const pe_action_t *op = (const pe_action_t *) iter->data; if (pcmk_is_set(op->flags, pe_action_reschedule)) { pe_rsc_trace(rsc, "%s will be mandatory because " "it needs to be rescheduled", key); g_list_free(possible_matches); return false; } } g_list_free(possible_matches); return true; } /*! * \internal * \brief Create recurring action from resource history entry for an active role * * \param[in,out] rsc Resource that resource history is for * \param[in,out] start Start action for \p rsc on \p node * \param[in] node Node that resource will be active on (if any) * \param[in] op Resource history entry */ static void recurring_op_for_active(pe_resource_t *rsc, pe_action_t *start, const pe_node_t *node, const struct op_history *op) { pe_action_t *mon = NULL; bool is_optional = true; const bool is_default_role = (op->role == pcmk_role_unknown); // We're only interested in recurring actions for active roles if (op->role == pcmk_role_stopped) { return; } is_optional = active_recurring_should_be_optional(rsc, node, op->key, start); if ((!is_default_role && (rsc->next_role != op->role)) || (is_default_role && (rsc->next_role == pcmk_role_promoted))) { // Configured monitor role doesn't match role resource will have if (is_optional) { // It's running, so cancel it char *after_key = NULL; pe_action_t *cancel_op = pcmk__new_cancel_action(rsc, op->name, op->interval_ms, node); switch (rsc->role) { case pcmk_role_unpromoted: case pcmk_role_started: if (rsc->next_role == pcmk_role_promoted) { after_key = promote_key(rsc); } else if (rsc->next_role == pcmk_role_stopped) { after_key = stop_key(rsc); } break; case pcmk_role_promoted: after_key = demote_key(rsc); break; default: break; } if (after_key) { pcmk__new_ordering(rsc, NULL, cancel_op, rsc, after_key, NULL, pe_order_runnable_left, rsc->cluster); } } do_crm_log((is_optional? LOG_INFO : LOG_TRACE), "%s recurring action %s because %s configured for %s role " "(not %s)", (is_optional? "Cancelling" : "Ignoring"), op->key, op->id, role2text(is_default_role? pcmk_role_unpromoted : op->role), role2text(rsc->next_role)); return; } pe_rsc_trace(rsc, "Creating %s recurring action %s for %s (%s %s on %s)", (is_optional? "optional" : "mandatory"), op->key, op->id, rsc->id, role2text(rsc->next_role), pe__node_name(node)); mon = custom_action(rsc, strdup(op->key), op->name, node, is_optional, TRUE, rsc->cluster); if (!pcmk_is_set(start->flags, pe_action_runnable)) { pe_rsc_trace(rsc, "%s is unrunnable because start is", mon->uuid); pe__clear_action_flags(mon, pe_action_runnable); } else if ((node == NULL) || !node->details->online || node->details->unclean) { pe_rsc_trace(rsc, "%s is unrunnable because no node is available", mon->uuid); pe__clear_action_flags(mon, pe_action_runnable); } else if (!pcmk_is_set(mon->flags, pe_action_optional)) { pe_rsc_info(rsc, "Start %s-interval %s for %s on %s", pcmk__readable_interval(op->interval_ms), mon->task, rsc->id, pe__node_name(node)); } if (rsc->next_role == pcmk_role_promoted) { pe__add_action_expected_result(mon, CRM_EX_PROMOTED); } // Order monitor relative to other actions if ((node == NULL) || pcmk_is_set(rsc->flags, pe_rsc_managed)) { pcmk__new_ordering(rsc, start_key(rsc), NULL, NULL, strdup(mon->uuid), mon, pe_order_implies_then|pe_order_runnable_left, rsc->cluster); pcmk__new_ordering(rsc, reload_key(rsc), NULL, NULL, strdup(mon->uuid), mon, pe_order_implies_then|pe_order_runnable_left, rsc->cluster); if (rsc->next_role == pcmk_role_promoted) { pcmk__new_ordering(rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional|pe_order_runnable_left, rsc->cluster); } else if (rsc->role == pcmk_role_promoted) { pcmk__new_ordering(rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional|pe_order_runnable_left, rsc->cluster); } } } /*! * \internal * \brief Cancel a recurring action if running on a node * * \param[in,out] rsc Resource that action is for * \param[in] node Node to cancel action on * \param[in] key Operation key for action * \param[in] name Action name * \param[in] interval_ms Action interval (in milliseconds) */ static void cancel_if_running(pe_resource_t *rsc, const pe_node_t *node, const char *key, const char *name, guint interval_ms) { GList *possible_matches = find_actions_exact(rsc->actions, key, node); pe_action_t *cancel_op = NULL; if (possible_matches == NULL) { return; // Recurring action isn't running on this node } g_list_free(possible_matches); cancel_op = pcmk__new_cancel_action(rsc, name, interval_ms, node); switch (rsc->next_role) { case pcmk_role_started: case pcmk_role_unpromoted: /* Order starts after cancel. If the current role is * stopped, this cancels the monitor before the resource * starts; if the current role is started, then this cancels * the monitor on a migration target before starting there. */ pcmk__new_ordering(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL, pe_order_runnable_left, rsc->cluster); break; default: break; } pe_rsc_info(rsc, "Cancelling %s-interval %s action for %s on %s because " - "configured for " RSC_ROLE_STOPPED_S " role (not %s)", + "configured for " PCMK__ROLE_STOPPED " role (not %s)", pcmk__readable_interval(interval_ms), name, rsc->id, pe__node_name(node), role2text(rsc->next_role)); } /*! * \internal * \brief Order an action after all probes of a resource on a node * * \param[in,out] rsc Resource to check for probes * \param[in] node Node to check for probes of \p rsc * \param[in,out] action Action to order after probes of \p rsc on \p node */ static void order_after_probes(pe_resource_t *rsc, const pe_node_t *node, pe_action_t *action) { GList *probes = pe__resource_actions(rsc, node, PCMK_ACTION_MONITOR, FALSE); for (GList *iter = probes; iter != NULL; iter = iter->next) { order_actions((pe_action_t *) iter->data, action, pe_order_runnable_left); } g_list_free(probes); } /*! * \internal * \brief Order an action after all stops of a resource on a node * * \param[in,out] rsc Resource to check for stops * \param[in] node Node to check for stops of \p rsc * \param[in,out] action Action to order after stops of \p rsc on \p node */ static void order_after_stops(pe_resource_t *rsc, const pe_node_t *node, pe_action_t *action) { GList *stop_ops = pe__resource_actions(rsc, node, PCMK_ACTION_STOP, TRUE); for (GList *iter = stop_ops; iter != NULL; iter = iter->next) { pe_action_t *stop = (pe_action_t *) iter->data; if (!pcmk_is_set(stop->flags, pe_action_optional) && !pcmk_is_set(action->flags, pe_action_optional) && !pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "%s optional on %s: unmanaged", action->uuid, pe__node_name(node)); pe__set_action_flags(action, pe_action_optional); } if (!pcmk_is_set(stop->flags, pe_action_runnable)) { crm_debug("%s unrunnable on %s: stop is unrunnable", action->uuid, pe__node_name(node)); pe__clear_action_flags(action, pe_action_runnable); } if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { pcmk__new_ordering(rsc, stop_key(rsc), stop, NULL, NULL, action, pe_order_implies_then|pe_order_runnable_left, rsc->cluster); } } g_list_free(stop_ops); } /*! * \internal * \brief Create recurring action from resource history entry for inactive role * * \param[in,out] rsc Resource that resource history is for * \param[in] node Node that resource will be active on (if any) * \param[in] op Resource history entry */ static void recurring_op_for_inactive(pe_resource_t *rsc, const pe_node_t *node, const struct op_history *op) { GList *possible_matches = NULL; // We're only interested in recurring actions for the inactive role if (op->role != pcmk_role_stopped) { return; } if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { - crm_notice("Ignoring %s (recurring monitors for " RSC_ROLE_STOPPED_S + crm_notice("Ignoring %s (recurring monitors for " PCMK__ROLE_STOPPED " role are not supported for anonymous clones)", op->id); return; // @TODO add support } pe_rsc_trace(rsc, "Creating recurring action %s for %s on nodes " "where it should not be running", op->id, rsc->id); for (GList *iter = rsc->cluster->nodes; iter != NULL; iter = iter->next) { pe_node_t *stop_node = (pe_node_t *) iter->data; bool is_optional = true; pe_action_t *stopped_mon = NULL; // Cancel action on node where resource will be active if ((node != NULL) && pcmk__str_eq(stop_node->details->uname, node->details->uname, pcmk__str_casei)) { cancel_if_running(rsc, node, op->key, op->name, op->interval_ms); continue; } // Recurring action on this node is optional if it's already active here possible_matches = find_actions_exact(rsc->actions, op->key, stop_node); is_optional = (possible_matches != NULL); g_list_free(possible_matches); pe_rsc_trace(rsc, "Creating %s recurring action %s for %s (%s " - RSC_ROLE_STOPPED_S " on %s)", + PCMK__ROLE_STOPPED " on %s)", (is_optional? "optional" : "mandatory"), op->key, op->id, rsc->id, pe__node_name(stop_node)); stopped_mon = custom_action(rsc, strdup(op->key), op->name, stop_node, is_optional, TRUE, rsc->cluster); pe__add_action_expected_result(stopped_mon, CRM_EX_NOT_RUNNING); if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { order_after_probes(rsc, stop_node, stopped_mon); } /* The recurring action is for the inactive role, so it shouldn't be * performed until the resource is inactive. */ order_after_stops(rsc, stop_node, stopped_mon); if (!stop_node->details->online || stop_node->details->unclean) { pe_rsc_debug(rsc, "%s unrunnable on %s: node unavailable)", stopped_mon->uuid, pe__node_name(stop_node)); pe__clear_action_flags(stopped_mon, pe_action_runnable); } if (pcmk_is_set(stopped_mon->flags, pe_action_runnable) && !pcmk_is_set(stopped_mon->flags, pe_action_optional)) { crm_notice("Start recurring %s-interval %s for " - RSC_ROLE_STOPPED_S " %s on %s", + PCMK__ROLE_STOPPED " %s on %s", pcmk__readable_interval(op->interval_ms), stopped_mon->task, rsc->id, pe__node_name(stop_node)); } } } /*! * \internal * \brief Create recurring actions for a resource * * \param[in,out] rsc Resource to create recurring actions for */ void pcmk__create_recurring_actions(pe_resource_t *rsc) { pe_action_t *start = NULL; if (pcmk_is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Skipping recurring actions for blocked resource %s", rsc->id); return; } if (pcmk_is_set(rsc->flags, pe_rsc_maintenance)) { pe_rsc_trace(rsc, "Skipping recurring actions for %s " "in maintenance mode", rsc->id); return; } if (rsc->allocated_to == NULL) { // Recurring actions for active roles not needed } else if (rsc->allocated_to->details->maintenance) { pe_rsc_trace(rsc, "Skipping recurring actions for %s on %s " "in maintenance mode", rsc->id, pe__node_name(rsc->allocated_to)); } else if ((rsc->next_role != pcmk_role_stopped) || !pcmk_is_set(rsc->flags, pe_rsc_managed)) { // Recurring actions for active roles needed start = start_action(rsc, rsc->allocated_to, TRUE); } pe_rsc_trace(rsc, "Creating any recurring actions needed for %s", rsc->id); for (xmlNode *op = first_named_child(rsc->ops_xml, "op"); op != NULL; op = crm_next_same_xml(op)) { struct op_history op_history = { NULL, }; if (!is_recurring_history(rsc, op, &op_history)) { continue; } if (start != NULL) { recurring_op_for_active(rsc, start, rsc->allocated_to, &op_history); } recurring_op_for_inactive(rsc, rsc->allocated_to, &op_history); free(op_history.key); } } /*! * \internal * \brief Create an executor cancel action * * \param[in,out] rsc Resource of action to cancel * \param[in] task Name of action to cancel * \param[in] interval_ms Interval of action to cancel * \param[in] node Node of action to cancel * * \return Created op */ pe_action_t * pcmk__new_cancel_action(pe_resource_t *rsc, const char *task, guint interval_ms, const pe_node_t *node) { pe_action_t *cancel_op = NULL; char *key = NULL; char *interval_ms_s = NULL; CRM_ASSERT((rsc != NULL) && (task != NULL) && (node != NULL)); // @TODO dangerous if possible to schedule another action with this key key = pcmk__op_key(rsc->id, task, interval_ms); cancel_op = custom_action(rsc, key, PCMK_ACTION_CANCEL, node, FALSE, TRUE, rsc->cluster); pcmk__str_update(&cancel_op->task, PCMK_ACTION_CANCEL); pcmk__str_update(&cancel_op->cancel_task, task); interval_ms_s = crm_strdup_printf("%u", interval_ms); add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, task); add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL_MS, interval_ms_s); free(interval_ms_s); return cancel_op; } /*! * \internal * \brief Schedule cancellation of a recurring action * * \param[in,out] rsc Resource that action is for * \param[in] call_id Action's call ID from history * \param[in] task Action name * \param[in] interval_ms Action interval * \param[in] node Node that history entry is for * \param[in] reason Short description of why action is cancelled */ void pcmk__schedule_cancel(pe_resource_t *rsc, const char *call_id, const char *task, guint interval_ms, const pe_node_t *node, const char *reason) { pe_action_t *cancel = NULL; CRM_CHECK((rsc != NULL) && (task != NULL) && (node != NULL) && (reason != NULL), return); crm_info("Recurring %s-interval %s for %s will be stopped on %s: %s", pcmk__readable_interval(interval_ms), task, rsc->id, pe__node_name(node), reason); cancel = pcmk__new_cancel_action(rsc, task, interval_ms, node); add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); // Cancellations happen after stops pcmk__new_ordering(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, rsc->cluster); } /*! * \internal * \brief Reschedule a recurring action * * \param[in,out] rsc Resource that action is for * \param[in] task Name of action being rescheduled * \param[in] interval_ms Action interval (in milliseconds) * \param[in,out] node Node where action should be rescheduled */ void pcmk__reschedule_recurring(pe_resource_t *rsc, const char *task, guint interval_ms, pe_node_t *node) { pe_action_t *op = NULL; trigger_unfencing(rsc, node, "Device parameters changed (reschedule)", NULL, rsc->cluster); op = custom_action(rsc, pcmk__op_key(rsc->id, task, interval_ms), task, node, TRUE, TRUE, rsc->cluster); pe__set_action_flags(op, pe_action_reschedule); } /*! * \internal * \brief Check whether an action is recurring * * \param[in] action Action to check * * \return true if \p action has a nonzero interval, otherwise false */ bool pcmk__action_is_recurring(const pe_action_t *action) { guint interval_ms = 0; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { return false; } return (interval_ms > 0); } diff --git a/lib/pengine/common.c b/lib/pengine/common.c index 367efd2a6c..e44aeec302 100644 --- a/lib/pengine/common.c +++ b/lib/pengine/common.c @@ -1,628 +1,628 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include gboolean was_processing_error = FALSE; gboolean was_processing_warning = FALSE; static bool check_placement_strategy(const char *value) { return pcmk__strcase_any_of(value, "default", "utilization", "minimal", "balanced", NULL); } static pcmk__cluster_option_t pe_opts[] = { /* name, old name, type, allowed values, * default value, validator, * short description, * long description */ { "no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum, N_("What to do when the cluster does not have quorum"), NULL }, { "symmetric-cluster", NULL, "boolean", NULL, "true", pcmk__valid_boolean, N_("Whether resources can run on any node by default"), NULL }, { "maintenance-mode", NULL, "boolean", NULL, "false", pcmk__valid_boolean, N_("Whether the cluster should refrain from monitoring, starting, " "and stopping resources"), NULL }, { "start-failure-is-fatal", NULL, "boolean", NULL, "true", pcmk__valid_boolean, N_("Whether a start failure should prevent a resource from being " "recovered on the same node"), N_("When true, the cluster will immediately ban a resource from a node " "if it fails to start there. When false, the cluster will instead " "check the resource's fail count against its migration-threshold.") }, { "enable-startup-probes", NULL, "boolean", NULL, "true", pcmk__valid_boolean, N_("Whether the cluster should check for active resources during start-up"), NULL }, { XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, "false", pcmk__valid_boolean, N_("Whether to lock resources to a cleanly shut down node"), N_("When true, resources active on a node when it is cleanly shut down " "are kept \"locked\" to that node (not allowed to run elsewhere) " "until they start again on that node after it rejoins (or for at " "most shutdown-lock-limit, if set). Stonith resources and " "Pacemaker Remote connections are never locked. Clone and bundle " "instances and the promoted role of promotable clones are " "currently never locked, though support could be added in a future " "release.") }, { XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, "0", pcmk__valid_interval_spec, N_("Do not lock resources to a cleanly shut down node longer than " "this"), N_("If shutdown-lock is true and this is set to a nonzero time " "duration, shutdown locks will expire after this much time has " "passed since the shutdown was initiated, even if the node has not " "rejoined.") }, // Fencing-related options { "stonith-enabled", NULL, "boolean", NULL, "true", pcmk__valid_boolean, N_("*** Advanced Use Only *** " "Whether nodes may be fenced as part of recovery"), N_("If false, unresponsive nodes are immediately assumed to be harmless, " "and resources that were active on them may be recovered " "elsewhere. This can result in a \"split-brain\" situation, " "potentially leading to data loss and/or service unavailability.") }, { "stonith-action", NULL, "select", "reboot, off, poweroff", PCMK_ACTION_REBOOT, pcmk__is_fencing_action, N_("Action to send to fence device when a node needs to be fenced " "(\"poweroff\" is a deprecated alias for \"off\")"), NULL }, { "stonith-timeout", NULL, "time", NULL, "60s", pcmk__valid_interval_spec, N_("*** Advanced Use Only *** Unused by Pacemaker"), N_("This value is not used by Pacemaker, but is kept for backward " "compatibility, and certain legacy fence agents might use it.") }, { XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL, "false", pcmk__valid_boolean, N_("Whether watchdog integration is enabled"), N_("This is set automatically by the cluster according to whether SBD " "is detected to be in use. User-configured values are ignored. " "The value `true` is meaningful if diskless SBD is used and " "`stonith-watchdog-timeout` is nonzero. In that case, if fencing " "is required, watchdog-based self-fencing will be performed via " "SBD without requiring a fencing resource explicitly configured.") }, { "concurrent-fencing", NULL, "boolean", NULL, PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean, N_("Allow performing fencing operations in parallel"), NULL }, { "startup-fencing", NULL, "boolean", NULL, "true", pcmk__valid_boolean, N_("*** Advanced Use Only *** Whether to fence unseen nodes at start-up"), N_("Setting this to false may lead to a \"split-brain\" situation," "potentially leading to data loss and/or service unavailability.") }, { XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL, "0", pcmk__valid_interval_spec, N_("Apply fencing delay targeting the lost nodes with the highest total resource priority"), N_("Apply specified delay for the fencings that are targeting the lost " "nodes with the highest total resource priority in case we don't " "have the majority of the nodes in our cluster partition, so that " "the more significant nodes potentially win any fencing match, " "which is especially meaningful under split-brain of 2-node " "cluster. A promoted resource instance takes the base priority + 1 " "on calculation if the base priority is not 0. Any static/random " "delays that are introduced by `pcmk_delay_base/max` configured " "for the corresponding fencing resources will be added to this " "delay. This delay should be significantly greater than, safely " "twice, the maximum `pcmk_delay_base/max`. By default, priority " "fencing delay is disabled.") }, { XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL, "10min", pcmk__valid_interval_spec, N_("How long to wait for a node that has joined the cluster to join " "the process group"), N_("A node that has joined the cluster can be pending on joining the " "process group. We wait up to this much time for it. If it times " "out, fencing targeting the node will be issued if enabled.") }, { "cluster-delay", NULL, "time", NULL, "60s", pcmk__valid_interval_spec, N_("Maximum time for node-to-node communication"), N_("The node elected Designated Controller (DC) will consider an action " "failed if it does not get a response from the node executing the " "action within this time (after considering the action's own " "timeout). The \"correct\" value will depend on the speed and " "load of your network and cluster nodes.") }, { "batch-limit", NULL, "integer", NULL, "0", pcmk__valid_number, N_("Maximum number of jobs that the cluster may execute in parallel " "across all nodes"), N_("The \"correct\" value will depend on the speed and load of your " "network and cluster nodes. If set to 0, the cluster will " "impose a dynamically calculated limit when any node has a " "high load.") }, { "migration-limit", NULL, "integer", NULL, "-1", pcmk__valid_number, N_("The number of live migration actions that the cluster is allowed " "to execute in parallel on a node (-1 means no limit)") }, /* Orphans and stopping */ { "stop-all-resources", NULL, "boolean", NULL, "false", pcmk__valid_boolean, N_("Whether the cluster should stop all active resources"), NULL }, { "stop-orphan-resources", NULL, "boolean", NULL, "true", pcmk__valid_boolean, N_("Whether to stop resources that were removed from the configuration"), NULL }, { "stop-orphan-actions", NULL, "boolean", NULL, "true", pcmk__valid_boolean, N_("Whether to cancel recurring actions removed from the configuration"), NULL }, { "remove-after-stop", NULL, "boolean", NULL, "false", pcmk__valid_boolean, N_("*** Deprecated *** Whether to remove stopped resources from " "the executor"), N_("Values other than default are poorly tested and potentially dangerous." " This option will be removed in a future release.") }, /* Storing inputs */ { "pe-error-series-max", NULL, "integer", NULL, "-1", pcmk__valid_number, N_("The number of scheduler inputs resulting in errors to save"), N_("Zero to disable, -1 to store unlimited.") }, { "pe-warn-series-max", NULL, "integer", NULL, "5000", pcmk__valid_number, N_("The number of scheduler inputs resulting in warnings to save"), N_("Zero to disable, -1 to store unlimited.") }, { "pe-input-series-max", NULL, "integer", NULL, "4000", pcmk__valid_number, N_("The number of scheduler inputs without errors or warnings to save"), N_("Zero to disable, -1 to store unlimited.") }, /* Node health */ { PCMK__OPT_NODE_HEALTH_STRATEGY, NULL, "select", PCMK__VALUE_NONE ", " PCMK__VALUE_MIGRATE_ON_RED ", " PCMK__VALUE_ONLY_GREEN ", " PCMK__VALUE_PROGRESSIVE ", " PCMK__VALUE_CUSTOM, PCMK__VALUE_NONE, pcmk__validate_health_strategy, N_("How cluster should react to node health attributes"), N_("Requires external entities to create node attributes (named with " "the prefix \"#health\") with values \"red\", " "\"yellow\", or \"green\".") }, { PCMK__OPT_NODE_HEALTH_BASE, NULL, "integer", NULL, "0", pcmk__valid_number, N_("Base health score assigned to a node"), N_("Only used when \"node-health-strategy\" is set to \"progressive\".") }, { PCMK__OPT_NODE_HEALTH_GREEN, NULL, "integer", NULL, "0", pcmk__valid_number, N_("The score to use for a node health attribute whose value is \"green\""), N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") }, { PCMK__OPT_NODE_HEALTH_YELLOW, NULL, "integer", NULL, "0", pcmk__valid_number, N_("The score to use for a node health attribute whose value is \"yellow\""), N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") }, { PCMK__OPT_NODE_HEALTH_RED, NULL, "integer", NULL, "-INFINITY", pcmk__valid_number, N_("The score to use for a node health attribute whose value is \"red\""), N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") }, /*Placement Strategy*/ { "placement-strategy", NULL, "select", "default, utilization, minimal, balanced", "default", check_placement_strategy, N_("How the cluster should allocate resources to nodes"), NULL }, }; void pe_metadata(pcmk__output_t *out) { const char *desc_short = "Pacemaker scheduler options"; const char *desc_long = "Cluster options used by Pacemaker's scheduler"; gchar *s = pcmk__format_option_metadata("pacemaker-schedulerd", desc_short, desc_long, pe_opts, PCMK__NELEM(pe_opts)); out->output_xml(out, "metadata", s); g_free(s); } void verify_pe_options(GHashTable * options) { pcmk__validate_cluster_options(options, pe_opts, PCMK__NELEM(pe_opts)); } const char * pe_pref(GHashTable * options, const char *name) { return pcmk__cluster_option(options, pe_opts, PCMK__NELEM(pe_opts), name); } const char * fail2text(enum action_fail_response fail) { const char *result = ""; switch (fail) { case action_fail_ignore: result = "ignore"; break; case action_fail_demote: result = "demote"; break; case action_fail_block: result = "block"; break; case action_fail_recover: result = "recover"; break; case action_fail_migrate: result = "migrate"; break; case action_fail_stop: result = "stop"; break; case action_fail_fence: result = "fence"; break; case action_fail_standby: result = "standby"; break; case action_fail_restart_container: result = "restart-container"; break; case action_fail_reset_remote: result = "reset-remote"; break; } return result; } enum action_tasks text2task(const char *task) { if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)) { return stop_rsc; } else if (pcmk__str_eq(task, PCMK_ACTION_STOPPED, pcmk__str_casei)) { return stopped_rsc; } else if (pcmk__str_eq(task, PCMK_ACTION_START, pcmk__str_casei)) { return start_rsc; } else if (pcmk__str_eq(task, PCMK_ACTION_RUNNING, pcmk__str_casei)) { return started_rsc; } else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_casei)) { return shutdown_crm; } else if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) { return stonith_node; } else if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { return monitor_rsc; } else if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { return action_notify; } else if (pcmk__str_eq(task, PCMK_ACTION_NOTIFIED, pcmk__str_casei)) { return action_notified; } else if (pcmk__str_eq(task, PCMK_ACTION_PROMOTE, pcmk__str_casei)) { return action_promote; } else if (pcmk__str_eq(task, PCMK_ACTION_DEMOTE, pcmk__str_casei)) { return action_demote; } else if (pcmk__str_eq(task, PCMK_ACTION_PROMOTED, pcmk__str_casei)) { return action_promoted; } else if (pcmk__str_eq(task, PCMK_ACTION_DEMOTED, pcmk__str_casei)) { return action_demoted; } #if SUPPORT_TRACING if (pcmk__str_eq(task, PCMK_ACTION_CANCEL, pcmk__str_casei)) { return no_action; } else if (pcmk__str_eq(task, PCMK_ACTION_DELETE, pcmk__str_casei)) { return no_action; } else if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { return no_action; } else if (pcmk__str_eq(task, PCMK_ACTION_MIGRATE_TO, pcmk__str_casei)) { return no_action; } else if (pcmk__str_eq(task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_casei)) { return no_action; } crm_trace("Unsupported action: %s", task); #endif return no_action; } const char * task2text(enum action_tasks task) { const char *result = ""; switch (task) { case no_action: result = "no_action"; break; case stop_rsc: result = PCMK_ACTION_STOP; break; case stopped_rsc: result = PCMK_ACTION_STOPPED; break; case start_rsc: result = PCMK_ACTION_START; break; case started_rsc: result = PCMK_ACTION_RUNNING; break; case shutdown_crm: result = PCMK_ACTION_DO_SHUTDOWN; break; case stonith_node: result = PCMK_ACTION_STONITH; break; case monitor_rsc: result = PCMK_ACTION_MONITOR; break; case action_notify: result = PCMK_ACTION_NOTIFY; break; case action_notified: result = PCMK_ACTION_NOTIFIED; break; case action_promote: result = PCMK_ACTION_PROMOTE; break; case action_promoted: result = PCMK_ACTION_PROMOTED; break; case action_demote: result = PCMK_ACTION_DEMOTE; break; case action_demoted: result = PCMK_ACTION_DEMOTED; break; } return result; } const char * role2text(enum rsc_role_e role) { switch (role) { case pcmk_role_stopped: - return RSC_ROLE_STOPPED_S; + return PCMK__ROLE_STOPPED; case pcmk_role_started: return RSC_ROLE_STARTED_S; case pcmk_role_unpromoted: #ifdef PCMK__COMPAT_2_0 return RSC_ROLE_UNPROMOTED_LEGACY_S; #else return RSC_ROLE_UNPROMOTED_S; #endif case pcmk_role_promoted: #ifdef PCMK__COMPAT_2_0 return RSC_ROLE_PROMOTED_LEGACY_S; #else return RSC_ROLE_PROMOTED_S; #endif default: // pcmk_role_unknown return PCMK__ROLE_UNKNOWN; } } enum rsc_role_e text2role(const char *role) { CRM_ASSERT(role != NULL); - if (pcmk__str_eq(role, RSC_ROLE_STOPPED_S, pcmk__str_casei)) { + if (pcmk__str_eq(role, PCMK__ROLE_STOPPED, pcmk__str_casei)) { return pcmk_role_stopped; } else if (pcmk__str_eq(role, RSC_ROLE_STARTED_S, pcmk__str_casei)) { return pcmk_role_started; } else if (pcmk__strcase_any_of(role, RSC_ROLE_UNPROMOTED_S, RSC_ROLE_UNPROMOTED_LEGACY_S, NULL)) { return pcmk_role_unpromoted; } else if (pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S, RSC_ROLE_PROMOTED_LEGACY_S, NULL)) { return pcmk_role_promoted; } else if (pcmk__str_eq(role, PCMK__ROLE_UNKNOWN, pcmk__str_casei)) { return pcmk_role_unknown; } crm_err("Unknown role: %s", role); return pcmk_role_unknown; } void add_hash_param(GHashTable * hash, const char *name, const char *value) { CRM_CHECK(hash != NULL, return); crm_trace("Adding name='%s' value='%s' to hash table", pcmk__s(name, ""), pcmk__s(value, "")); if (name == NULL || value == NULL) { return; } else if (pcmk__str_eq(value, "#default", pcmk__str_casei)) { return; } else if (g_hash_table_lookup(hash, name) == NULL) { g_hash_table_insert(hash, strdup(name), strdup(value)); } } /*! * \internal * \brief Look up an attribute value on the appropriate node * * If \p node is a guest node and either the \c XML_RSC_ATTR_TARGET meta * attribute is set to "host" for \p rsc or \p force_host is \c true, query the * attribute on the node's host. Otherwise, query the attribute on \p node * itself. * * \param[in] node Node to query attribute value on by default * \param[in] name Name of attribute to query * \param[in] rsc Resource on whose behalf we're querying * \param[in] node_type Type of resource location lookup * \param[in] force_host Force a lookup on the guest node's host, regardless of * the \c XML_RSC_ATTR_TARGET value * * \return Value of the attribute on \p node or on the host of \p node * * \note If \p force_host is \c true, \p node \e must be a guest node. */ const char * pe__node_attribute_calculated(const pe_node_t *node, const char *name, const pe_resource_t *rsc, enum pe__rsc_node node_type, bool force_host) { // @TODO: Use pe__is_guest_node() after merging libpe_{rules,status} bool is_guest = (node != NULL) && (node->details->type == node_remote) && (node->details->remote_rsc != NULL) && (node->details->remote_rsc->container != NULL); const char *source = NULL; const char *node_type_s = NULL; const char *reason = NULL; const pe_resource_t *container = NULL; const pe_node_t *host = NULL; CRM_ASSERT((node != NULL) && (name != NULL) && (rsc != NULL) && (!force_host || is_guest)); /* Ignore XML_RSC_ATTR_TARGET if node is not a guest node. This represents a * user configuration error. */ source = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET); if (!force_host && (!is_guest || !pcmk__str_eq(source, "host", pcmk__str_casei))) { return g_hash_table_lookup(node->details->attrs, name); } container = node->details->remote_rsc->container; switch (node_type) { case pe__rsc_node_assigned: node_type_s = "assigned"; host = container->allocated_to; if (host == NULL) { reason = "not assigned"; } break; case pe__rsc_node_current: node_type_s = "current"; if (container->running_on != NULL) { host = container->running_on->data; } if (host == NULL) { reason = "inactive"; } break; default: // Add support for other enum pe__rsc_node values if needed CRM_ASSERT(false); break; } if (host != NULL) { const char *value = g_hash_table_lookup(host->details->attrs, name); pe_rsc_trace(rsc, "%s: Value lookup for %s on %s container host %s %s%s", rsc->id, name, node_type_s, pe__node_name(host), ((value != NULL)? "succeeded: " : "failed"), pcmk__s(value, "")); return value; } pe_rsc_trace(rsc, "%s: Not looking for %s on %s container host: %s is %s", rsc->id, name, node_type_s, container->id, reason); return NULL; } const char * pe_node_attribute_raw(const pe_node_t *node, const char *name) { if(node == NULL) { return NULL; } return g_hash_table_lookup(node->details->attrs, name); }