diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h index af9823ac54..b2bb4e687d 100644 --- a/include/crm/pengine/pe_types.h +++ b/include/crm/pengine/pe_types.h @@ -1,484 +1,478 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_PENGINE_PE_TYPES__H # define PCMK__CRM_PENGINE_PE_TYPES__H # include // bool # include // time_t # include // xmlNode # include // gboolean, guint, GList, GHashTable # include # include # include #ifdef __cplusplus extern "C" { #endif /*! * \file * \brief Data types for cluster status * \ingroup pengine */ typedef struct pe_node_s pe_node_t; typedef struct pe_action_s pe_action_t; typedef struct pe_resource_s pe_resource_t; typedef struct pe_working_set_s pe_working_set_t; typedef struct resource_object_functions_s { gboolean (*unpack) (pe_resource_t*, pe_working_set_t*); pe_resource_t *(*find_rsc) (pe_resource_t *parent, const char *search, const pe_node_t *node, int flags); /* parameter result must be free'd */ char *(*parameter) (pe_resource_t*, pe_node_t*, gboolean, const char*, pe_working_set_t*); //! \deprecated will be removed in a future release void (*print) (pe_resource_t*, const char*, long, void*); gboolean (*active) (pe_resource_t*, gboolean); enum rsc_role_e (*state) (const pe_resource_t*, gboolean); pe_node_t *(*location) (const pe_resource_t*, GList**, int); void (*free) (pe_resource_t*); void (*count) (pe_resource_t*); gboolean (*is_filtered) (const pe_resource_t*, GList *, gboolean); /*! * \brief Find a node (and optionally count all) where resource is active * * \param[in] rsc Resource to check * \param[out] count_all If not NULL, set this to count of active nodes * \param[out] count_clean If not NULL, set this to count of clean nodes * * \return A node where the resource is active, preferring the source node * if the resource is involved in a partial migration or a clean, * online node if the resource's "requires" is "quorum" or * "nothing", or NULL if the resource is inactive. */ pe_node_t *(*active_node)(const pe_resource_t *rsc, unsigned int *count_all, unsigned int *count_clean); /*! * \brief Get maximum resource instances per node * * \param[in] rsc Resource to check * * \return Maximum number of \p rsc instances that can be active on one node */ unsigned int (*max_per_node)(const pe_resource_t *rsc); } resource_object_functions_t; typedef struct resource_alloc_functions_s resource_alloc_functions_t; -/*! - * When scheduling, only unpack the CIB (including constraints), calculate - * as much cluster status as possible, and apply node health. - */ -# define pe_flag_check_config 0x08000000ULL - struct pe_working_set_s { xmlNode *input; crm_time_t *now; /* options extracted from the input */ char *dc_uuid; pe_node_t *dc_node; const char *stonith_action; const char *placement_strategy; unsigned long long flags; int stonith_timeout; enum pe_quorum_policy no_quorum_policy; GHashTable *config_hash; GHashTable *tickets; // Actions for which there can be only one (e.g. fence nodeX) GHashTable *singletons; GList *nodes; GList *resources; GList *placement_constraints; GList *ordering_constraints; GList *colocation_constraints; GList *ticket_constraints; GList *actions; xmlNode *failed; xmlNode *op_defaults; xmlNode *rsc_defaults; /* stats */ int num_synapse; int max_valid_nodes; //! Deprecated (will be removed in a future release) int order_id; int action_id; /* final output */ xmlNode *graph; GHashTable *template_rsc_sets; const char *localhost; GHashTable *tags; int blocked_resources; int disabled_resources; GList *param_check; // History entries that need to be checked GList *stop_needed; // Containers that need stop actions time_t recheck_by; // Hint to controller to re-run scheduler by this time int ninstances; // Total number of resource instances guint shutdown_lock;// How long (seconds) to lock resources to shutdown node int priority_fencing_delay; // Priority fencing delay void *priv; guint node_pending_timeout; // Node pending timeout }; struct pe_node_shared_s { const char *id; const char *uname; enum node_type type; /* @TODO convert these flags into a bitfield */ gboolean online; gboolean standby; gboolean standby_onfail; gboolean pending; gboolean unclean; gboolean unseen; gboolean shutdown; gboolean expected_up; gboolean is_dc; gboolean maintenance; gboolean rsc_discovery_enabled; gboolean remote_requires_reset; gboolean remote_was_fenced; gboolean remote_maintenance; /* what the remote-rsc is thinking */ gboolean unpacked; int num_resources; pe_resource_t *remote_rsc; GList *running_rsc; /* pe_resource_t* */ GList *allocated_rsc; /* pe_resource_t* */ GHashTable *attrs; /* char* => char* */ GHashTable *utilization; GHashTable *digest_cache; //!< cache of calculated resource digests int priority; // calculated based on the priority of resources running on the node pe_working_set_t *data_set; //!< Cluster that this node is part of }; struct pe_node_s { int weight; gboolean fixed; //!< \deprecated Will be removed in a future release int count; struct pe_node_shared_s *details; int rsc_discover_mode; }; # define pe_rsc_orphan 0x00000001ULL # define pe_rsc_managed 0x00000002ULL # define pe_rsc_block 0x00000004ULL # define pe_rsc_orphan_container_filler 0x00000008ULL # define pe_rsc_notify 0x00000010ULL # define pe_rsc_unique 0x00000020ULL # define pe_rsc_fence_device 0x00000040ULL # define pe_rsc_promotable 0x00000080ULL # define pe_rsc_provisional 0x00000100ULL # define pe_rsc_allocating 0x00000200ULL # define pe_rsc_merging 0x00000400ULL # define pe_rsc_restarting 0x00000800ULL # define pe_rsc_stop 0x00001000ULL # define pe_rsc_reload 0x00002000ULL # define pe_rsc_allow_remote_remotes 0x00004000ULL # define pe_rsc_critical 0x00008000ULL # define pe_rsc_failed 0x00010000ULL # define pe_rsc_detect_loop 0x00020000ULL # define pe_rsc_runnable 0x00040000ULL # define pe_rsc_start_pending 0x00080000ULL //!< \deprecated Do not use # define pe_rsc_starting 0x00100000ULL //!< \deprecated Do not use # define pe_rsc_stopping 0x00200000ULL # define pe_rsc_stop_unexpected 0x00400000ULL # define pe_rsc_allow_migrate 0x00800000ULL # define pe_rsc_failure_ignored 0x01000000ULL # define pe_rsc_replica_container 0x02000000ULL # define pe_rsc_maintenance 0x04000000ULL # define pe_rsc_is_container 0x08000000ULL # define pe_rsc_needs_quorum 0x10000000ULL # define pe_rsc_needs_fencing 0x20000000ULL # define pe_rsc_needs_unfencing 0x40000000ULL /* *INDENT-OFF* */ enum pe_action_flags { pe_action_pseudo = 0x00001, pe_action_runnable = 0x00002, pe_action_optional = 0x00004, pe_action_print_always = 0x00008, pe_action_have_node_attrs = 0x00010, pe_action_implied_by_stonith = 0x00040, pe_action_migrate_runnable = 0x00080, pe_action_dumped = 0x00100, pe_action_processed = 0x00200, #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) pe_action_clear = 0x00400, //! \deprecated Unused #endif pe_action_dangle = 0x00800, /* This action requires one or more of its dependencies to be runnable. * We use this to clear the runnable flag before checking dependencies. */ pe_action_requires_any = 0x01000, pe_action_reschedule = 0x02000, pe_action_tracking = 0x04000, pe_action_dedup = 0x08000, //! Internal state tracking when creating graph pe_action_dc = 0x10000, //! Action may run on DC instead of target }; /* *INDENT-ON* */ struct pe_resource_s { char *id; char *clone_name; xmlNode *xml; xmlNode *orig_xml; xmlNode *ops_xml; pe_working_set_t *cluster; pe_resource_t *parent; enum pe_obj_types variant; void *variant_opaque; resource_object_functions_t *fns; resource_alloc_functions_t *cmds; enum rsc_recovery_type recovery_type; enum pe_restart restart_type; //!< \deprecated will be removed in future release int priority; int stickiness; int sort_index; int failure_timeout; int migration_threshold; guint remote_reconnect_ms; char *pending_task; unsigned long long flags; // @TODO merge these into flags gboolean is_remote_node; gboolean exclusive_discover; /* Pay special attention to whether you want to use rsc_cons_lhs and * rsc_cons directly, which include only colocations explicitly involving * this resource, or call libpacemaker's pcmk__with_this_colocations() and * pcmk__this_with_colocations() functions, which may return relevant * colocations involving the resource's ancestors as well. */ //!@{ //! This field should be treated as internal to Pacemaker GList *rsc_cons_lhs; // List of pcmk__colocation_t* GList *rsc_cons; // List of pcmk__colocation_t* GList *rsc_location; // List of pe__location_t* GList *actions; // List of pe_action_t* GList *rsc_tickets; // List of rsc_ticket* //!@} pe_node_t *allocated_to; pe_node_t *partial_migration_target; pe_node_t *partial_migration_source; GList *running_on; /* pe_node_t* */ GHashTable *known_on; /* pe_node_t* */ GHashTable *allowed_nodes; /* pe_node_t* */ enum rsc_role_e role; enum rsc_role_e next_role; GHashTable *meta; GHashTable *parameters; //! \deprecated Use pe_rsc_params() instead GHashTable *utilization; GList *children; /* pe_resource_t* */ GList *dangling_migrations; /* pe_node_t* */ pe_resource_t *container; GList *fillers; // @COMPAT These should be made const at next API compatibility break pe_node_t *pending_node; // Node on which pending_task is happening pe_node_t *lock_node; // Resource is shutdown-locked to this node time_t lock_time; // When shutdown lock started /* Resource parameters may have node-attribute-based rules, which means the * values can vary by node. This table is a cache of parameter name/value * tables for each node (as needed). Use pe_rsc_params() to get the table * for a given node. */ GHashTable *parameter_cache; // Key = node name, value = parameters table }; struct pe_action_s { int id; int priority; pe_resource_t *rsc; pe_node_t *node; xmlNode *op_entry; char *task; char *uuid; char *cancel_task; char *reason; enum pe_action_flags flags; enum rsc_start_requirement needs; enum action_fail_response on_fail; enum rsc_role_e fail_role; GHashTable *meta; GHashTable *extra; /* * These two varables are associated with the constraint logic * that involves first having one or more actions runnable before * then allowing this action to execute. * * These varables are used with features such as 'clone-min' which * requires at minimum X number of cloned instances to be running * before an order dependency can run. Another option that uses * this is 'require-all=false' in ordering constrants. This option * says "only require one instance of a resource to start before * allowing dependencies to start" -- basically, require-all=false is * the same as clone-min=1. */ /* current number of known runnable actions in the before list. */ int runnable_before; /* the number of "before" runnable actions required for this action * to be considered runnable */ int required_runnable_before; GList *actions_before; /* pe_action_wrapper_t* */ GList *actions_after; /* pe_action_wrapper_t* */ /* Some of the above fields could be moved to the details, * except for API backward compatibility. */ void *action_details; // varies by type of action }; typedef struct pe_ticket_s { char *id; gboolean granted; time_t last_granted; gboolean standby; GHashTable *state; } pe_ticket_t; typedef struct pe_tag_s { char *id; GList *refs; } pe_tag_t; //! Internal tracking for transition graph creation enum pe_link_state { pe_link_not_dumped, //! Internal tracking for transition graph creation pe_link_dumped, //! Internal tracking for transition graph creation pe_link_dup, //! \deprecated No longer used by Pacemaker }; enum pe_discover_e { pe_discover_always = 0, pe_discover_never, pe_discover_exclusive, }; /* *INDENT-OFF* */ enum pe_ordering { pe_order_none = 0x0, /* deleted */ pe_order_optional = 0x1, /* pure ordering, nothing implied */ pe_order_apply_first_non_migratable = 0x2, /* Only apply this constraint's ordering if first is not migratable. */ pe_order_implies_first = 0x10, /* If 'then' is required, ensure 'first' is too */ pe_order_implies_then = 0x20, /* If 'first' is required, ensure 'then' is too */ pe_order_promoted_implies_first = 0x40, /* If 'then' is required and then's rsc is promoted, ensure 'first' becomes required too */ /* first requires then to be both runnable and migrate runnable. */ pe_order_implies_first_migratable = 0x80, pe_order_runnable_left = 0x100, /* 'then' requires 'first' to be runnable */ pe_order_pseudo_left = 0x200, /* 'then' can only be pseudo if 'first' is runnable */ pe_order_implies_then_on_node = 0x400, /* If 'first' is required on 'nodeX', * ensure instances of 'then' on 'nodeX' are too. * Only really useful if 'then' is a clone and 'first' is not */ pe_order_probe = 0x800, /* If 'first->rsc' is * - running but about to stop, ignore the constraint * - otherwise, behave as runnable_left */ pe_order_restart = 0x1000, /* 'then' is runnable if 'first' is optional or runnable */ pe_order_stonith_stop = 0x2000, // #endif #ifdef __cplusplus } #endif #endif // PCMK__CRM_PENGINE_PE_TYPES__H diff --git a/include/crm/pengine/pe_types_compat.h b/include/crm/pengine/pe_types_compat.h index 8d253155e0..727d8151a4 100644 --- a/include/crm/pengine/pe_types_compat.h +++ b/include/crm/pengine/pe_types_compat.h @@ -1,143 +1,146 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_PENGINE_PE_TYPES_COMPAT__H # define PCMK__CRM_PENGINE_PE_TYPES_COMPAT__H #include #ifdef __cplusplus extern "C" { #endif /** * \file * \brief Deprecated Pacemaker scheduler API * \ingroup pengine * \deprecated Do not include this header directly. The scheduler APIs in this * header, and the header itself, will be removed in a future * release. */ //! \deprecated Use pcmk_sched_quorate instead #define pe_flag_have_quorum pcmk_sched_quorate //! \deprecated Use pcmk_sched_symmetric_cluster instead #define pe_flag_symmetric_cluster pcmk_sched_symmetric_cluster //! \deprecated Use pcmk_sched_in_maintenance instead #define pe_flag_maintenance_mode pcmk_sched_in_maintenance //! \deprecated Use pcmk_sched_fencing_enabled instead #define pe_flag_stonith_enabled pcmk_sched_fencing_enabled //! \deprecated Use pcmk_sched_have_fencing instead #define pe_flag_have_stonith_resource pcmk_sched_have_fencing //! \deprecated Use pcmk_sched_enable_unfencing instead #define pe_flag_enable_unfencing pcmk_sched_enable_unfencing //! \deprecated Use pcmk_sched_concurrent_fencing instead #define pe_flag_concurrent_fencing pcmk_sched_concurrent_fencing //! \deprecated Use pcmk_sched_stop_removed_resources instead #define pe_flag_stop_rsc_orphans pcmk_sched_stop_removed_resources //! \deprecated Use pcmk_sched_cancel_removed_actions instead #define pe_flag_stop_action_orphans pcmk_sched_cancel_removed_actions //! \deprecated Use pcmk_sched_stop_all instead #define pe_flag_stop_everything pcmk_sched_stop_all //! \deprecated Use pcmk_sched_start_failure_fatal instead #define pe_flag_start_failure_fatal pcmk_sched_start_failure_fatal //! \deprecated Do not use #define pe_flag_remove_after_stop pcmk_sched_remove_after_stop //! \deprecated Use pcmk_sched_startup_fencing instead #define pe_flag_startup_fencing pcmk_sched_startup_fencing //! \deprecated Use pcmk_sched_shutdown_lock instead #define pe_flag_shutdown_lock pcmk_sched_shutdown_lock //! \deprecated Use pcmk_sched_probe_resources instead #define pe_flag_startup_probes pcmk_sched_probe_resources //! \deprecated Use pcmk_sched_have_status instead #define pe_flag_have_status pcmk_sched_have_status //! \deprecated Use pcmk_sched_have_remote_nodes instead #define pe_flag_have_remote_nodes pcmk_sched_have_remote_nodes //! \deprecated Use pcmk_sched_location_only instead #define pe_flag_quick_location pcmk_sched_location_only //! \deprecated Use pcmk_sched_sanitized instead #define pe_flag_sanitized pcmk_sched_sanitized //! \deprecated Do not use #define pe_flag_stdout (1ULL << 22) //! \deprecated Use pcmk_sched_no_counts instead #define pe_flag_no_counts pcmk_sched_no_counts //! \deprecated Use pcmk_sched_no_compat instead #define pe_flag_no_compat pcmk_sched_no_compat //! \deprecated Use pcmk_sched_output_scores instead #define pe_flag_show_scores pcmk_sched_output_scores //! \deprecated Use pcmk_sched_show_utilization instead #define pe_flag_show_utilization pcmk_sched_show_utilization +//! \deprecated Use pcmk_sched_validate_only instead +#define pe_flag_check_config pcmk_sched_validate_only + //!@{ //! \deprecated Do not use (unused by Pacemaker) enum pe_graph_flags { pe_graph_none = 0x00000, pe_graph_updated_first = 0x00001, pe_graph_updated_then = 0x00002, pe_graph_disable = 0x00004, }; //!@} //!@{ //! \deprecated Do not use enum pe_check_parameters { pe_check_last_failure, pe_check_active, }; //!@} //!< \deprecated Use pe_action_t instead typedef struct pe_action_s action_t; //!< \deprecated Use pe_action_wrapper_t instead typedef struct pe_action_wrapper_s action_wrapper_t; //!< \deprecated Use pe_node_t instead typedef struct pe_node_s node_t; //!< \deprecated Use enum pe_quorum_policy instead typedef enum pe_quorum_policy no_quorum_policy_t; //!< \deprecated use pe_resource_t instead typedef struct pe_resource_s resource_t; //!< \deprecated Use pe_tag_t instead typedef struct pe_tag_s tag_t; //!< \deprecated Use pe_ticket_t instead typedef struct pe_ticket_s ticket_t; #ifdef __cplusplus } #endif #endif // PCMK__CRM_PENGINE_PE_TYPES_COMPAT__H diff --git a/lib/pacemaker/pcmk_scheduler.c b/lib/pacemaker/pcmk_scheduler.c index 12a2344cd1..df669143d3 100644 --- a/lib/pacemaker/pcmk_scheduler.c +++ b/lib/pacemaker/pcmk_scheduler.c @@ -1,816 +1,816 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" CRM_TRACE_INIT_DATA(pacemaker); /*! * \internal * \brief Do deferred action checks after assignment * * When unpacking the resource history, the scheduler checks for resource * configurations that have changed since an action was run. However, at that * time, bundles using the REMOTE_CONTAINER_HACK don't have their final * parameter information, so instead they add a deferred check to a list. This * function processes one entry in that list. * * \param[in,out] rsc Resource that action history is for * \param[in,out] node Node that action history is for * \param[in] rsc_op Action history entry * \param[in] check Type of deferred check to do */ static void check_params(pe_resource_t *rsc, pe_node_t *node, const xmlNode *rsc_op, enum pcmk__check_parameters check) { const char *reason = NULL; op_digest_cache_t *digest_data = NULL; switch (check) { case pcmk__check_active: if (pcmk__check_action_config(rsc, node, rsc_op) && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL)) { reason = "action definition changed"; } break; case pcmk__check_last_failure: digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, rsc->cluster); switch (digest_data->rc) { case RSC_DIGEST_UNKNOWN: crm_trace("Resource %s history entry %s on %s has " "no digest to compare", rsc->id, ID(rsc_op), node->details->id); break; case RSC_DIGEST_MATCH: break; default: reason = "resource parameters have changed"; break; } break; } if (reason != NULL) { pe__clear_failcount(rsc, node, reason, rsc->cluster); } } /*! * \internal * \brief Check whether a resource has failcount clearing scheduled on a node * * \param[in] node Node to check * \param[in] rsc Resource to check * * \return true if \p rsc has failcount clearing scheduled on \p node, * otherwise false */ static bool failcount_clear_action_exists(const pe_node_t *node, const pe_resource_t *rsc) { GList *list = pe__resource_actions(rsc, node, PCMK_ACTION_CLEAR_FAILCOUNT, TRUE); if (list != NULL) { g_list_free(list); return true; } return false; } /*! * \internal * \brief Ban a resource from a node if it reached its failure threshold there * * \param[in,out] data Resource to check failure threshold for * \param[in] user_data Node to check resource on */ static void check_failure_threshold(gpointer data, gpointer user_data) { pe_resource_t *rsc = data; const pe_node_t *node = user_data; // If this is a collective resource, apply recursively to children instead if (rsc->children != NULL) { g_list_foreach(rsc->children, check_failure_threshold, user_data); return; } if (!failcount_clear_action_exists(node, rsc)) { /* Don't force the resource away from this node due to a failcount * that's going to be cleared. * * @TODO Failcount clearing can be scheduled in * pcmk__handle_rsc_config_changes() via process_rsc_history(), or in * schedule_resource_actions() via check_params(). This runs well before * then, so it cannot detect those, meaning we might check the migration * threshold when we shouldn't. Worst case, we stop or move the * resource, then move it back in the next transition. */ pe_resource_t *failed = NULL; if (pcmk__threshold_reached(rsc, node, &failed)) { resource_location(failed, node, -INFINITY, "__fail_limit__", rsc->cluster); } } } /*! * \internal * \brief If resource has exclusive discovery, ban node if not allowed * * Location constraints have a resource-discovery option that allows users to * specify where probes are done for the affected resource. If this is set to * exclusive, probes will only be done on nodes listed in exclusive constraints. * This function bans the resource from the node if the node is not listed. * * \param[in,out] data Resource to check * \param[in] user_data Node to check resource on */ static void apply_exclusive_discovery(gpointer data, gpointer user_data) { pe_resource_t *rsc = data; const pe_node_t *node = user_data; if (rsc->exclusive_discover || pe__const_top_resource(rsc, false)->exclusive_discover) { pe_node_t *match = NULL; // If this is a collective resource, apply recursively to children g_list_foreach(rsc->children, apply_exclusive_discovery, user_data); match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if ((match != NULL) && (match->rsc_discover_mode != pe_discover_exclusive)) { match->weight = -INFINITY; } } } /*! * \internal * \brief Apply stickiness to a resource if appropriate * * \param[in,out] data Resource to check for stickiness * \param[in] user_data Ignored */ static void apply_stickiness(gpointer data, gpointer user_data) { pe_resource_t *rsc = data; pe_node_t *node = NULL; // If this is a collective resource, apply recursively to children instead if (rsc->children != NULL) { g_list_foreach(rsc->children, apply_stickiness, NULL); return; } /* A resource is sticky if it is managed, has stickiness configured, and is * active on a single node. */ if (!pcmk_is_set(rsc->flags, pe_rsc_managed) || (rsc->stickiness < 1) || !pcmk__list_of_1(rsc->running_on)) { return; } node = rsc->running_on->data; /* In a symmetric cluster, stickiness can always be used. In an * asymmetric cluster, we have to check whether the resource is still * allowed on the node, so we don't keep the resource somewhere it is no * longer explicitly enabled. */ if (!pcmk_is_set(rsc->cluster->flags, pcmk_sched_symmetric_cluster) && (g_hash_table_lookup(rsc->allowed_nodes, node->details->id) == NULL)) { pe_rsc_debug(rsc, "Ignoring %s stickiness because the cluster is " "asymmetric and %s is not explicitly allowed", rsc->id, pe__node_name(node)); return; } pe_rsc_debug(rsc, "Resource %s has %d stickiness on %s", rsc->id, rsc->stickiness, pe__node_name(node)); resource_location(rsc, node, rsc->stickiness, "stickiness", rsc->cluster); } /*! * \internal * \brief Apply shutdown locks for all resources as appropriate * * \param[in,out] data_set Cluster working set */ static void apply_shutdown_locks(pe_working_set_t *data_set) { if (!pcmk_is_set(data_set->flags, pcmk_sched_shutdown_lock)) { return; } for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; rsc->cmds->shutdown_lock(rsc); } } /*! * \internal * \brief Calculate the number of available nodes in the cluster * * \param[in,out] data_set Cluster working set */ static void count_available_nodes(pe_working_set_t *data_set) { if (pcmk_is_set(data_set->flags, pcmk_sched_no_compat)) { return; } // @COMPAT for API backward compatibility only (cluster does not use value) for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; if ((node != NULL) && (node->weight >= 0) && node->details->online && (node->details->type != node_ping)) { data_set->max_valid_nodes++; } } crm_trace("Online node count: %d", data_set->max_valid_nodes); } /* * \internal * \brief Apply node-specific scheduling criteria * * After the CIB has been unpacked, process node-specific scheduling criteria * including shutdown locks, location constraints, resource stickiness, * migration thresholds, and exclusive resource discovery. */ static void apply_node_criteria(pe_working_set_t *data_set) { crm_trace("Applying node-specific scheduling criteria"); apply_shutdown_locks(data_set); count_available_nodes(data_set); pcmk__apply_locations(data_set); g_list_foreach(data_set->resources, apply_stickiness, NULL); for (GList *node_iter = data_set->nodes; node_iter != NULL; node_iter = node_iter->next) { for (GList *rsc_iter = data_set->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) { check_failure_threshold(rsc_iter->data, node_iter->data); apply_exclusive_discovery(rsc_iter->data, node_iter->data); } } } /*! * \internal * \brief Assign resources to nodes * * \param[in,out] data_set Cluster working set */ static void assign_resources(pe_working_set_t *data_set) { GList *iter = NULL; crm_trace("Assigning resources to nodes"); if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) { pcmk__sort_resources(data_set); } pcmk__show_node_capacities("Original", data_set); if (pcmk_is_set(data_set->flags, pcmk_sched_have_remote_nodes)) { /* Assign remote connection resources first (which will also assign any * colocation dependencies). If the connection is migrating, always * prefer the partial migration target. */ for (iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; if (rsc->is_remote_node) { pe_rsc_trace(rsc, "Assigning remote connection resource '%s'", rsc->id); rsc->cmds->assign(rsc, rsc->partial_migration_target, true); } } } /* now do the rest of the resources */ for (iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; if (!rsc->is_remote_node) { pe_rsc_trace(rsc, "Assigning %s resource '%s'", rsc->xml->name, rsc->id); rsc->cmds->assign(rsc, NULL, true); } } pcmk__show_node_capacities("Remaining", data_set); } /*! * \internal * \brief Schedule fail count clearing on online nodes if resource is orphaned * * \param[in,out] data Resource to check * \param[in] user_data Ignored */ static void clear_failcounts_if_orphaned(gpointer data, gpointer user_data) { pe_resource_t *rsc = data; if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) { return; } crm_trace("Clear fail counts for orphaned resource %s", rsc->id); /* There's no need to recurse into rsc->children because those * should just be unassigned clone instances. */ for (GList *iter = rsc->cluster->nodes; iter != NULL; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; pe_action_t *clear_op = NULL; if (!node->details->online) { continue; } if (pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL) == 0) { continue; } clear_op = pe__clear_failcount(rsc, node, "it is orphaned", rsc->cluster); /* We can't use order_action_then_stop() here because its * pe_order_preserve breaks things */ pcmk__new_ordering(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc), NULL, pe_order_optional, rsc->cluster); } } /*! * \internal * \brief Schedule any resource actions needed * * \param[in,out] data_set Cluster working set */ static void schedule_resource_actions(pe_working_set_t *data_set) { // Process deferred action checks pe__foreach_param_check(data_set, check_params); pe__free_param_checks(data_set); if (pcmk_is_set(data_set->flags, pcmk_sched_probe_resources)) { crm_trace("Scheduling probes"); pcmk__schedule_probes(data_set); } if (pcmk_is_set(data_set->flags, pcmk_sched_stop_removed_resources)) { g_list_foreach(data_set->resources, clear_failcounts_if_orphaned, NULL); } crm_trace("Scheduling resource actions"); for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; rsc->cmds->create_actions(rsc); } } /*! * \internal * \brief Check whether a resource or any of its descendants are managed * * \param[in] rsc Resource to check * * \return true if resource or any descendant is managed, otherwise false */ static bool is_managed(const pe_resource_t *rsc) { if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { return true; } for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { if (is_managed((pe_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether any resources in the cluster are managed * * \param[in] data_set Cluster working set * * \return true if any resource is managed, otherwise false */ static bool any_managed_resources(const pe_working_set_t *data_set) { for (const GList *iter = data_set->resources; iter != NULL; iter = iter->next) { if (is_managed((const pe_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether a node requires fencing * * \param[in] node Node to check * \param[in] have_managed Whether any resource in cluster is managed * * \return true if \p node should be fenced, otherwise false */ static bool needs_fencing(const pe_node_t *node, bool have_managed) { return have_managed && node->details->unclean && pe_can_fence(node->details->data_set, node); } /*! * \internal * \brief Check whether a node requires shutdown * * \param[in] node Node to check * * \return true if \p node should be shut down, otherwise false */ static bool needs_shutdown(const pe_node_t *node) { if (pe__is_guest_or_remote_node(node)) { /* Do not send shutdown actions for Pacemaker Remote nodes. * @TODO We might come up with a good use for this in the future. */ return false; } return node->details->online && node->details->shutdown; } /*! * \internal * \brief Track and order non-DC fencing * * \param[in,out] list List of existing non-DC fencing actions * \param[in,out] action Fencing action to prepend to \p list * \param[in] data_set Cluster working set * * \return (Possibly new) head of \p list */ static GList * add_nondc_fencing(GList *list, pe_action_t *action, const pe_working_set_t *data_set) { if (!pcmk_is_set(data_set->flags, pcmk_sched_concurrent_fencing) && (list != NULL)) { /* Concurrent fencing is disabled, so order each non-DC * fencing in a chain. If there is any DC fencing or * shutdown, it will be ordered after the last action in the * chain later. */ order_actions((pe_action_t *) list->data, action, pe_order_optional); } return g_list_prepend(list, action); } /*! * \internal * \brief Schedule a node for fencing * * \param[in,out] node Node that requires fencing */ static pe_action_t * schedule_fencing(pe_node_t *node) { pe_action_t *fencing = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, node->details->data_set); pe_warn("Scheduling node %s for fencing", pe__node_name(node)); pcmk__order_vs_fence(fencing, node->details->data_set); return fencing; } /*! * \internal * \brief Create and order node fencing and shutdown actions * * \param[in,out] data_set Cluster working set */ static void schedule_fencing_and_shutdowns(pe_working_set_t *data_set) { pe_action_t *dc_down = NULL; bool integrity_lost = false; bool have_managed = any_managed_resources(data_set); GList *fencing_ops = NULL; GList *shutdown_ops = NULL; crm_trace("Scheduling fencing and shutdowns as needed"); if (!have_managed) { crm_notice("No fencing will be done until there are resources " "to manage"); } // Check each node for whether it needs fencing or shutdown for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; pe_action_t *fencing = NULL; /* Guest nodes are "fenced" by recovering their container resource, * so handle them separately. */ if (pe__is_guest_node(node)) { if (node->details->remote_requires_reset && have_managed && pe_can_fence(data_set, node)) { pcmk__fence_guest(node); } continue; } if (needs_fencing(node, have_managed)) { fencing = schedule_fencing(node); // Track DC and non-DC fence actions separately if (node->details->is_dc) { dc_down = fencing; } else { fencing_ops = add_nondc_fencing(fencing_ops, fencing, data_set); } } else if (needs_shutdown(node)) { pe_action_t *down_op = pcmk__new_shutdown_action(node); // Track DC and non-DC shutdown actions separately if (node->details->is_dc) { dc_down = down_op; } else { shutdown_ops = g_list_prepend(shutdown_ops, down_op); } } if ((fencing == NULL) && node->details->unclean) { integrity_lost = true; pe_warn("Node %s is unclean but cannot be fenced", pe__node_name(node)); } } if (integrity_lost) { if (!pcmk_is_set(data_set->flags, pcmk_sched_fencing_enabled)) { pe_warn("Resource functionality and data integrity cannot be " "guaranteed (configure, enable, and test fencing to " "correct this)"); } else if (!pcmk_is_set(data_set->flags, pcmk_sched_quorate)) { crm_notice("Unclean nodes will not be fenced until quorum is " "attained or no-quorum-policy is set to ignore"); } } if (dc_down != NULL) { /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated * DC elections. However, we don't want to order non-DC shutdowns before * a DC *fencing*, because even though we don't want a node that's * shutting down to become DC, the DC fencing could be ordered before a * clone stop that's also ordered before the shutdowns, thus leading to * a graph loop. */ if (pcmk__str_eq(dc_down->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { pcmk__order_after_each(dc_down, shutdown_ops); } // Order any non-DC fencing before any DC fencing or shutdown if (pcmk_is_set(data_set->flags, pcmk_sched_concurrent_fencing)) { /* With concurrent fencing, order each non-DC fencing action * separately before any DC fencing or shutdown. */ pcmk__order_after_each(dc_down, fencing_ops); } else if (fencing_ops != NULL) { /* Without concurrent fencing, the non-DC fencing actions are * already ordered relative to each other, so we just need to order * the DC fencing after the last action in the chain (which is the * first item in the list). */ order_actions((pe_action_t *) fencing_ops->data, dc_down, pe_order_optional); } } g_list_free(fencing_ops); g_list_free(shutdown_ops); } static void log_resource_details(pe_working_set_t *data_set) { pcmk__output_t *out = data_set->priv; GList *all = NULL; /* Due to the `crm_mon --node=` feature, out->message() for all the * resource-related messages expects a list of nodes that we are allowed to * output information for. Here, we create a wildcard to match all nodes. */ all = g_list_prepend(all, (gpointer) "*"); for (GList *item = data_set->resources; item != NULL; item = item->next) { pe_resource_t *rsc = (pe_resource_t *) item->data; // Log all resources except inactive orphans if (!pcmk_is_set(rsc->flags, pe_rsc_orphan) || (rsc->role != pcmk_role_stopped)) { out->message(out, crm_map_element_name(rsc->xml), 0, rsc, all, all); } } g_list_free(all); } static void log_all_actions(pe_working_set_t *data_set) { /* This only ever outputs to the log, so ignore whatever output object was * previously set and just log instead. */ pcmk__output_t *prev_out = data_set->priv; pcmk__output_t *out = NULL; if (pcmk__log_output_new(&out) != pcmk_rc_ok) { return; } pe__register_messages(out); pcmk__register_lib_messages(out); pcmk__output_set_log_level(out, LOG_NOTICE); data_set->priv = out; out->begin_list(out, NULL, NULL, "Actions"); pcmk__output_actions(data_set); out->end_list(out); out->finish(out, CRM_EX_OK, true, NULL); pcmk__output_free(out); data_set->priv = prev_out; } /*! * \internal * \brief Log all required but unrunnable actions at trace level * * \param[in] data_set Cluster working set */ static void log_unrunnable_actions(const pe_working_set_t *data_set) { const uint64_t flags = pe_action_optional |pe_action_runnable |pe_action_pseudo; crm_trace("Required but unrunnable actions:"); for (const GList *iter = data_set->actions; iter != NULL; iter = iter->next) { const pe_action_t *action = (const pe_action_t *) iter->data; if (!pcmk_any_flags_set(action->flags, flags)) { pcmk__log_action("\t", action, true); } } } /*! * \internal * \brief Unpack the CIB for scheduling * * \param[in,out] cib CIB XML to unpack (may be NULL if already unpacked) * \param[in] flags Working set flags to set in addition to defaults * \param[in,out] data_set Cluster working set */ static void unpack_cib(xmlNode *cib, unsigned long long flags, pe_working_set_t *data_set) { const char* localhost_save = NULL; if (pcmk_is_set(data_set->flags, pcmk_sched_have_status)) { crm_trace("Reusing previously calculated cluster status"); pe__set_working_set_flags(data_set, flags); return; } if (data_set->localhost) { localhost_save = data_set->localhost; } CRM_ASSERT(cib != NULL); crm_trace("Calculating cluster status"); /* This will zero the entire struct without freeing anything first, so * callers should never call pcmk__schedule_actions() with a populated data * set unless pcmk_sched_have_status is set (i.e. cluster_status() was * previously called, whether directly or via pcmk__schedule_actions()). */ set_working_set_defaults(data_set); if (localhost_save) { data_set->localhost = localhost_save; } pe__set_working_set_flags(data_set, flags); data_set->input = cib; cluster_status(data_set); // Sets pcmk_sched_have_status } /*! * \internal * \brief Run the scheduler for a given CIB * * \param[in,out] cib CIB XML to use as scheduler input * \param[in] flags Working set flags to set in addition to defaults * \param[in,out] data_set Cluster working set */ void pcmk__schedule_actions(xmlNode *cib, unsigned long long flags, pe_working_set_t *data_set) { unpack_cib(cib, flags, data_set); pcmk__set_assignment_methods(data_set); pcmk__apply_node_health(data_set); pcmk__unpack_constraints(data_set); - if (pcmk_is_set(data_set->flags, pe_flag_check_config)) { + if (pcmk_is_set(data_set->flags, pcmk_sched_validate_only)) { return; } if (!pcmk_is_set(data_set->flags, pcmk_sched_location_only) && pcmk__is_daemon) { log_resource_details(data_set); } apply_node_criteria(data_set); if (pcmk_is_set(data_set->flags, pcmk_sched_location_only)) { return; } pcmk__create_internal_constraints(data_set); pcmk__handle_rsc_config_changes(data_set); assign_resources(data_set); schedule_resource_actions(data_set); /* Remote ordering constraints need to happen prior to calculating fencing * because it is one more place we can mark nodes as needing fencing. */ pcmk__order_remote_connection_actions(data_set); schedule_fencing_and_shutdowns(data_set); pcmk__apply_orderings(data_set); log_all_actions(data_set); pcmk__create_graph(data_set); if (get_crm_log_level() == LOG_TRACE) { log_unrunnable_actions(data_set); } } diff --git a/tools/crm_verify.c b/tools/crm_verify.c index 6b874f7961..9b6e438179 100644 --- a/tools/crm_verify.c +++ b/tools/crm_verify.c @@ -1,300 +1,300 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const char *SUMMARY = "Check a Pacemaker configuration for errors\n\n" "Check the well-formedness of a complete Pacemaker XML configuration,\n" "its conformance to the configured schema, and the presence of common\n" "misconfigurations. Problems reported as errors must be fixed before the\n" "cluster will work properly. It is left to the administrator to decide\n" "whether to fix problems reported as warnings."; struct { char *cib_save; gboolean use_live_cib; char *xml_file; gboolean xml_stdin; char *xml_string; } options; static GOptionEntry data_entries[] = { { "live-check", 'L', 0, G_OPTION_ARG_NONE, &options.use_live_cib, "Check the configuration used by the running cluster", NULL }, { "xml-file", 'x', 0, G_OPTION_ARG_FILENAME, &options.xml_file, "Check the configuration in the named file", "FILE" }, { "xml-pipe", 'p', 0, G_OPTION_ARG_NONE, &options.xml_stdin, "Check the configuration piped in via stdin", NULL }, { "xml-text", 'X', 0, G_OPTION_ARG_STRING, &options.xml_string, "Check the configuration in the supplied string", "XML" }, { NULL } }; static GOptionEntry addl_entries[] = { { "save-xml", 'S', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME, &options.cib_save, "Save verified XML to named file (most useful with -L)", "FILE" }, { NULL } }; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; const char *description = "Examples:\n\n" "Check the consistency of the configuration in the running cluster:\n\n" "\tcrm_verify --live-check\n\n" "Check the consistency of the configuration in a given file and " "produce quiet output:\n\n" "\tcrm_verify --xml-file file.xml --quiet\n\n" "Check the consistency of the configuration in a given file and " "produce verbose output:\n\n" "\tcrm_verify --xml-file file.xml --verbose\n\n"; GOptionEntry extra_prog_entries[] = { { "quiet", 'q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Don't print verify information", NULL }, { NULL } }; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, description); pcmk__add_arg_group(context, "data", "Data sources:", "Show data options", data_entries); pcmk__add_arg_group(context, "additional", "Additional options:", "Show additional options", addl_entries); return context; } int main(int argc, char **argv) { xmlNode *cib_object = NULL; xmlNode *status = NULL; pe_working_set_t *data_set = NULL; int rc = pcmk_rc_ok; crm_exit_t exit_code = CRM_EX_OK; GError *error = NULL; pcmk__output_t *out = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "xSX"); GOptionContext *context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } if (args->verbosity > 0) { args->verbosity -= args->quiet; } pcmk__cli_init_logging("crm_verify", args->verbosity); rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } pcmk__register_lib_messages(out); crm_info("=#=#=#=#= Getting XML =#=#=#=#="); if (options.use_live_cib) { crm_info("Reading XML from: live cluster"); rc = cib__signon_query(out, NULL, &cib_object); if (rc != pcmk_rc_ok) { // cib__signon_query() outputs any relevant error goto done; } } else if (options.xml_file != NULL) { cib_object = filename2xml(options.xml_file); if (cib_object == NULL) { rc = ENODATA; g_set_error(&error, PCMK__RC_ERROR, rc, "Couldn't parse input file: %s", options.xml_file); goto done; } } else if (options.xml_string != NULL) { cib_object = string2xml(options.xml_string); if (cib_object == NULL) { rc = ENODATA; g_set_error(&error, PCMK__RC_ERROR, rc, "Couldn't parse input string: %s", options.xml_string); goto done; } } else if (options.xml_stdin) { cib_object = stdin2xml(); if (cib_object == NULL) { rc = ENODATA; g_set_error(&error, PCMK__RC_ERROR, rc, "Couldn't parse input from STDIN."); goto done; } } else { rc = ENODATA; g_set_error(&error, PCMK__RC_ERROR, rc, "No configuration source specified. Use --help for usage information."); goto done; } if (!pcmk__xe_is(cib_object, XML_TAG_CIB)) { rc = EBADMSG; g_set_error(&error, PCMK__RC_ERROR, rc, "This tool can only check complete configurations (i.e. those starting with )."); goto done; } if (options.cib_save != NULL) { write_xml_file(cib_object, options.cib_save, FALSE); } status = pcmk_find_cib_element(cib_object, XML_CIB_TAG_STATUS); if (status == NULL) { create_xml_node(cib_object, XML_CIB_TAG_STATUS); } if (validate_xml(cib_object, NULL, FALSE) == FALSE) { pcmk__config_err("CIB did not pass schema validation"); free_xml(cib_object); cib_object = NULL; } else if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { crm_config_error = TRUE; free_xml(cib_object); cib_object = NULL; out->err(out, "The cluster will NOT be able to use this configuration.\n" "Please manually update the configuration to conform to the %s syntax.", xml_latest_schema()); } data_set = pe_new_working_set(); if (data_set == NULL) { rc = errno; crm_perror(LOG_CRIT, "Unable to allocate working set"); goto done; } data_set->priv = out; /* Process the configuration to set crm_config_error/crm_config_warning. * * @TODO Some parts of the configuration are unpacked only when needed (for * example, action configuration), so we aren't necessarily checking those. */ if (cib_object != NULL) { unsigned long long flags = pcmk_sched_no_counts|pcmk_sched_no_compat; if ((status == NULL) && !options.use_live_cib) { // No status available, so do minimal checks - flags |= pe_flag_check_config; + flags |= pcmk_sched_validate_only; } pcmk__schedule_actions(cib_object, flags, data_set); } pe_free_working_set(data_set); if (crm_config_error) { rc = pcmk_rc_schema_validation; if (args->verbosity > 0) { g_set_error(&error, PCMK__RC_ERROR, rc, "Errors found during check: config not valid"); } else { g_set_error(&error, PCMK__RC_ERROR, rc, "Errors found during check: config not valid\n-V may provide more details"); } } else if (crm_config_warning) { rc = pcmk_rc_schema_validation; if (args->verbosity > 0) { g_set_error(&error, PCMK__RC_ERROR, rc, "Warnings found during check: config may not be valid"); } else { g_set_error(&error, PCMK__RC_ERROR, rc, "Warnings found during check: config may not be valid\n-V may provide more details"); } } done: g_strfreev(processed_args); pcmk__free_arg_context(context); free(options.cib_save); free(options.xml_file); free(options.xml_string); if (exit_code == CRM_EX_OK) { exit_code = pcmk_rc2exitc(rc); } pcmk__output_and_clear_error(&error, NULL); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); }