diff --git a/include/crm/common/actions.h b/include/crm/common/actions.h index 6ccd70c76a..5721a1666d 100644 --- a/include/crm/common/actions.h +++ b/include/crm/common/actions.h @@ -1,60 +1,61 @@ /* * Copyright 2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_ACTIONS__H #define PCMK__CRM_COMMON_ACTIONS__H #ifdef __cplusplus extern "C" { #endif /*! * \file * \brief APIs related to actions * \ingroup core */ // Action names as strings #define PCMK_ACTION_CANCEL "cancel" +#define PCMK_ACTION_CLEAR_FAILCOUNT "clear_failcount" #define PCMK_ACTION_CLONE_ONE_OR_MORE "clone-one-or-more" #define PCMK_ACTION_DELETE "delete" #define PCMK_ACTION_DEMOTE "demote" #define PCMK_ACTION_DEMOTED "demoted" #define PCMK_ACTION_DO_SHUTDOWN "do_shutdown" #define PCMK_ACTION_LIST "list" #define PCMK_ACTION_LRM_DELETE "lrm_delete" #define PCMK_ACTION_LOAD_STOPPED "load_stopped" #define PCMK_ACTION_MAINTENANCE_NODES "maintenance_nodes" #define PCMK_ACTION_META_DATA "meta-data" #define PCMK_ACTION_MIGRATE_FROM "migrate_from" #define PCMK_ACTION_MIGRATE_TO "migrate_to" #define PCMK_ACTION_MONITOR "monitor" #define PCMK_ACTION_NOTIFIED "notified" #define PCMK_ACTION_NOTIFY "notify" #define PCMK_ACTION_OFF "off" #define PCMK_ACTION_ON "on" #define PCMK_ACTION_ONE_OR_MORE "one-or-more" #define PCMK_ACTION_PROMOTE "promote" #define PCMK_ACTION_PROMOTED "promoted" #define PCMK_ACTION_REBOOT "reboot" #define PCMK_ACTION_RELOAD "reload" #define PCMK_ACTION_RELOAD_AGENT "reload-agent" #define PCMK_ACTION_RUNNING "running" #define PCMK_ACTION_START "start" #define PCMK_ACTION_STATUS "status" #define PCMK_ACTION_STONITH "stonith" #define PCMK_ACTION_STOP "stop" #define PCMK_ACTION_STOPPED "stopped" #define PCMK_ACTION_VALIDATE_ALL "validate-all" #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_ACTIONS__H diff --git a/include/crm/crm.h b/include/crm/crm.h index c3a69e1bb0..b9bd329767 100644 --- a/include/crm/crm.h +++ b/include/crm/crm.h @@ -1,185 +1,185 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_CRM__H # define PCMK__CRM_CRM__H # include # include # include # include # include # include #ifdef __cplusplus extern "C" { #endif /** * \file * \brief A dumping ground * \ingroup core */ #ifndef PCMK_ALLOW_DEPRECATED /*! * \brief Allow use of deprecated Pacemaker APIs * * By default, external code using Pacemaker headers is allowed to use * deprecated Pacemaker APIs. If PCMK_ALLOW_DEPRECATED is defined to 0 before * including any Pacemaker headers, deprecated APIs will be unusable. It is * strongly recommended to leave this unchanged for production and release * builds, to avoid breakage when users upgrade to new Pacemaker releases that * deprecate more APIs. This should be defined to 0 only for development and * testing builds when desiring to check for usage of currently deprecated APIs. */ #define PCMK_ALLOW_DEPRECATED 1 #endif /*! * The CRM feature set assists with compatibility in mixed-version clusters. * The major version number increases when nodes with different versions * would not work (rolling upgrades are not allowed). The minor version * number increases when mixed-version clusters are allowed only during * rolling upgrades (a node with the oldest feature set will be elected DC). The * minor-minor version number is ignored, but allows resource agents to detect * cluster support for various features. * * The feature set also affects the processing of old saved CIBs (such as for * many scheduler regression tests). * * Particular feature points currently tested by Pacemaker code: * * >2.1: Operation updates include timing data * >=3.0.5: XML v2 digests are created * >=3.0.8: Peers do not need acks for cancellations * >=3.0.9: DC will send its own shutdown request to all peers * XML v2 patchsets are created by default * >=3.0.13: Fail counts include operation name and interval * >=3.2.0: DC supports PCMK_EXEC_INVALID and PCMK_EXEC_NOT_CONNECTED */ # define CRM_FEATURE_SET "3.18.0" /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and * recipient of a CPG message. This imposes an arbitrary limit on cluster node * names. */ //! \brief Maximum length of a Corosync cluster node name (in bytes) #define MAX_NAME 256 # define CRM_META "CRM_meta" extern char *crm_system_name; // How we represent "infinite" scores # define CRM_SCORE_INFINITY 1000000 # define CRM_INFINITY_S "INFINITY" # define CRM_PLUS_INFINITY_S "+" CRM_INFINITY_S # define CRM_MINUS_INFINITY_S "-" CRM_INFINITY_S /* @COMPAT API < 2.0.0 Deprecated "infinity" aliases * * INFINITY might be defined elsewhere (e.g. math.h), so undefine it first. * This, of course, complicates any attempt to use the other definition in any * code that includes this header. */ # undef INFINITY # define INFINITY_S "INFINITY" # define MINUS_INFINITY_S "-INFINITY" # define INFINITY 1000000 /* Sub-systems */ # define CRM_SYSTEM_DC "dc" #define CRM_SYSTEM_DCIB "dcib" // Primary instance of CIB manager # define CRM_SYSTEM_CIB "cib" # define CRM_SYSTEM_CRMD "crmd" # define CRM_SYSTEM_LRMD "lrmd" # define CRM_SYSTEM_PENGINE "pengine" # define CRM_SYSTEM_TENGINE "tengine" # define CRM_SYSTEM_STONITHD "stonithd" # define CRM_SYSTEM_MCP "pacemakerd" // Names of internally generated node attributes # define CRM_ATTR_UNAME "#uname" # define CRM_ATTR_ID "#id" # define CRM_ATTR_KIND "#kind" # define CRM_ATTR_ROLE "#role" # define CRM_ATTR_IS_DC "#is_dc" # define CRM_ATTR_CLUSTER_NAME "#cluster-name" # define CRM_ATTR_SITE_NAME "#site-name" # define CRM_ATTR_UNFENCED "#node-unfenced" # define CRM_ATTR_DIGESTS_ALL "#digests-all" # define CRM_ATTR_DIGESTS_SECURE "#digests-secure" # define CRM_ATTR_PROTOCOL "#attrd-protocol" # define CRM_ATTR_FEATURE_SET "#feature-set" /* Valid operations */ # define CRM_OP_NOOP "noop" # define CRM_OP_JOIN_ANNOUNCE "join_announce" # define CRM_OP_JOIN_OFFER "join_offer" # define CRM_OP_JOIN_REQUEST "join_request" # define CRM_OP_JOIN_ACKNAK "join_ack_nack" # define CRM_OP_JOIN_CONFIRM "join_confirm" # define CRM_OP_PING "ping" # define CRM_OP_NODE_INFO "node-info" # define CRM_OP_THROTTLE "throttle" # define CRM_OP_VOTE "vote" # define CRM_OP_NOVOTE "no-vote" # define CRM_OP_HELLO "hello" # define CRM_OP_PECALC "pe_calc" # define CRM_OP_QUIT "quit" # define CRM_OP_LOCAL_SHUTDOWN "start_shutdown" # define CRM_OP_SHUTDOWN_REQ "req_shutdown" # define CRM_OP_SHUTDOWN PCMK_ACTION_DO_SHUTDOWN # define CRM_OP_REGISTER "register" # define CRM_OP_IPC_FWD "ipc_fwd" # define CRM_OP_INVOKE_LRM "lrm_invoke" # define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10 # define CRM_OP_LRM_DELETE PCMK_ACTION_LRM_DELETE # define CRM_OP_LRM_FAIL "lrm_fail" # define CRM_OP_PROBED "probe_complete" # define CRM_OP_REPROBE "probe_again" -# define CRM_OP_CLEAR_FAILCOUNT "clear_failcount" +# define CRM_OP_CLEAR_FAILCOUNT PCMK_ACTION_CLEAR_FAILCOUNT # define CRM_OP_REMOTE_STATE "remote_state" # define CRM_OP_RM_NODE_CACHE "rm_node_cache" # define CRM_OP_MAINTENANCE_NODES PCMK_ACTION_MAINTENANCE_NODES /* Possible cluster membership states */ # define CRMD_JOINSTATE_DOWN "down" # define CRMD_JOINSTATE_PENDING "pending" # define CRMD_JOINSTATE_MEMBER "member" # define CRMD_JOINSTATE_NACK "banned" # define CRMD_METADATA_CALL_TIMEOUT 30000 # include # include # include # include static inline const char * crm_action_str(const char *task, guint interval_ms) { if ((task != NULL) && (interval_ms == 0) && (strcasecmp(task, PCMK_ACTION_MONITOR) == 0)) { return "probe"; } return task; } #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) #include #endif #ifdef __cplusplus } #endif #endif diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c index 03025748f8..dbc92761ae 100644 --- a/lib/pacemaker/pcmk_graph_producer.c +++ b/lib/pacemaker/pcmk_graph_producer.c @@ -1,1092 +1,1092 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk_is_set((flags), pe_action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->details->uname) /*! * \internal * \brief Add an XML node tag for a specified ID * * \param[in] id Node UUID to add * \param[in,out] xml Parent XML tag to add to */ static xmlNode* add_node_to_xml_by_id(const char *id, xmlNode *xml) { xmlNode *node_xml; node_xml = create_xml_node(xml, XML_CIB_TAG_NODE); crm_xml_add(node_xml, XML_ATTR_ID, id); return node_xml; } /*! * \internal * \brief Add an XML node tag for a specified node * * \param[in] node Node to add * \param[in,out] xml XML to add node to */ static void add_node_to_xml(const pe_node_t *node, void *xml) { add_node_to_xml_by_id(node->details->id, (xmlNode *) xml); } /*! * \internal * \brief Count (optionally add to XML) nodes needing maintenance state update * * \param[in,out] xml Parent XML tag to add to, if any * \param[in] data_set Working set for cluster * * \return Count of nodes added * \note Only Pacemaker Remote nodes are considered currently */ static int add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set) { xmlNode *maintenance = NULL; int count = 0; if (xml != NULL) { maintenance = create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE); } for (const GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { const pe_node_t *node = iter->data; if (pe__is_guest_or_remote_node(node) && (node->details->maintenance != node->details->remote_maintenance)) { if (maintenance != NULL) { crm_xml_add(add_node_to_xml_by_id(node->details->id, maintenance), XML_NODE_IS_MAINTENANCE, (node->details->maintenance? "1" : "0")); } count++; } } crm_trace("%s %d nodes in need of maintenance mode update in state", ((maintenance == NULL)? "Counted" : "Added"), count); return count; } /*! * \internal * \brief Add pseudo action with nodes needing maintenance state update * * \param[in,out] data_set Working set for cluster */ static void add_maintenance_update(pe_working_set_t *data_set) { pe_action_t *action = NULL; if (add_maintenance_nodes(NULL, data_set) != 0) { action = get_pseudo_op(PCMK_ACTION_MAINTENANCE_NODES, data_set); pe__set_action_flags(action, pe_action_print_always); } } /*! * \internal * \brief Add XML with nodes that an action is expected to bring down * * If a specified action is expected to bring any nodes down, add an XML block * with their UUIDs. When a node is lost, this allows the controller to * determine whether it was expected. * * \param[in,out] xml Parent XML tag to add to * \param[in] action Action to check for downed nodes */ static void add_downed_nodes(xmlNode *xml, const pe_action_t *action) { CRM_CHECK((xml != NULL) && (action != NULL) && (action->node != NULL), return); if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { /* Shutdown makes the action's node down */ xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->node->details->id, downed); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { /* Fencing makes the action's node and any hosted guest nodes down */ const char *fence = g_hash_table_lookup(action->meta, "stonith_action"); if (pcmk__is_fencing_action(fence)) { xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->node->details->id, downed); pe_foreach_guest_node(action->node->details->data_set, action->node, add_node_to_xml, downed); } } else if (action->rsc && action->rsc->is_remote_node && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* Stopping a remote connection resource makes connected node down, * unless it's part of a migration */ GList *iter; pe_action_t *input; bool migrating = false; for (iter = action->actions_before; iter != NULL; iter = iter->next) { input = ((pe_action_wrapper_t *) iter->data)->action; if ((input->rsc != NULL) && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_none) && pcmk__str_eq(input->task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) { migrating = true; break; } } if (!migrating) { xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->rsc->id, downed); } } } /*! * \internal * \brief Create a transition graph operation key for a clone action * * \param[in] action Clone action * \param[in] interval_ms Action interval in milliseconds * * \return Newly allocated string with transition graph operation key */ static char * clone_op_key(const pe_action_t *action, guint interval_ms) { if (pcmk__str_eq(action->task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); CRM_LOG_ASSERT((n_type != NULL) && (n_task != NULL)); return pcmk__notify_key(action->rsc->clone_name, n_type, n_task); } else if (action->cancel_task != NULL) { return pcmk__op_key(action->rsc->clone_name, action->cancel_task, interval_ms); } else { return pcmk__op_key(action->rsc->clone_name, action->task, interval_ms); } } /*! * \internal * \brief Add node details to transition graph action XML * * \param[in] action Scheduled action * \param[in,out] xml Transition graph action XML for \p action */ static void add_node_details(const pe_action_t *action, xmlNode *xml) { pe_node_t *router_node = pcmk__connection_host_for_action(action); crm_xml_add(xml, XML_LRM_ATTR_TARGET, action->node->details->uname); crm_xml_add(xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id); if (router_node != NULL) { crm_xml_add(xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname); } } /*! * \internal * \brief Add resource details to transition graph action XML * * \param[in] action Scheduled action * \param[in,out] action_xml Transition graph action XML for \p action */ static void add_resource_details(const pe_action_t *action, xmlNode *action_xml) { xmlNode *rsc_xml = NULL; const char *attr_list[] = { XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER, XML_ATTR_TYPE }; /* If a resource is locked to a node via shutdown-lock, mark its actions * so the controller can preserve the lock when the action completes. */ if (pcmk__action_locks_rsc_to_node(action)) { crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, (long long) action->rsc->lock_time); } // List affected resource rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml)); if (pcmk_is_set(action->rsc->flags, pe_rsc_orphan) && (action->rsc->clone_name != NULL)) { /* Use the numbered instance name here, because if there is more * than one instance on a node, we need to make sure the command * goes to the right one. * * This is important even for anonymous clones, because the clone's * unique meta-attribute might have just been toggled from on to * off. */ crm_debug("Using orphan clone name %s instead of %s", action->rsc->id, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } else if (!pcmk_is_set(action->rsc->flags, pe_rsc_unique)) { const char *xml_id = ID(action->rsc->xml); crm_debug("Using anonymous clone name %s for %s (aka %s)", xml_id, action->rsc->id, action->rsc->clone_name); /* ID is what we'd like client to use * ID_LONG is what they might know it as instead * * ID_LONG is only strictly needed /here/ during the * transition period until all nodes in the cluster * are running the new software /and/ have rebooted * once (meaning that they've only ever spoken to a DC * supporting this feature). * * If anyone toggles the unique flag to 'on', the * 'instance free' name will correspond to an orphan * and fall into the clause above instead */ crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); if ((action->rsc->clone_name != NULL) && !pcmk__str_eq(xml_id, action->rsc->clone_name, pcmk__str_none)) { crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); } else { crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } } else { CRM_ASSERT(action->rsc->clone_name == NULL); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); } for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) { crm_xml_add(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); } } /*! * \internal * \brief Add action attributes to transition graph action XML * * \param[in,out] action Scheduled action * \param[in,out] action_xml Transition graph action XML for \p action */ static void add_action_attributes(pe_action_t *action, xmlNode *action_xml) { xmlNode *args_xml = NULL; /* We create free-standing XML to start, so we can sort the attributes * before adding it to action_xml, which keeps the scheduler regression * test graphs comparable. */ args_xml = create_xml_node(NULL, XML_TAG_ATTRS); crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if ((action->rsc != NULL) && (action->node != NULL)) { // Get the resource instance attributes, evaluated properly for node GHashTable *params = pe_rsc_params(action->rsc, action->node, action->rsc->cluster); pcmk__substitute_remote_addr(action->rsc, params); g_hash_table_foreach(params, hash2smartfield, args_xml); } else if ((action->rsc != NULL) && (action->rsc->variant <= pe_native)) { GHashTable *params = pe_rsc_params(action->rsc, NULL, action->rsc->cluster); g_hash_table_foreach(params, hash2smartfield, args_xml); } g_hash_table_foreach(action->meta, hash2metafield, args_xml); if (action->rsc != NULL) { pe_resource_t *parent = action->rsc; while (parent != NULL) { parent->cmds->add_graph_meta(parent, args_xml); parent = parent->parent; } pcmk__add_bundle_meta_to_xml(args_xml, action); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none) && (action->node != NULL)) { /* Pass the node's attributes as meta-attributes. * * @TODO: Determine whether it is still necessary to do this. It was * added in 33d99707, probably for the libfence-based implementation in * c9a90bd, which is no longer used. */ g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml); } sorted_xml(args_xml, action_xml, FALSE); free_xml(args_xml); } /*! * \internal * \brief Create the transition graph XML for a scheduled action * * \param[in,out] parent Parent XML element to add action to * \param[in,out] action Scheduled action * \param[in] skip_details If false, add action details as sub-elements * \param[in] data_set Cluster working set */ static void create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details, const pe_working_set_t *data_set) { bool needs_node_info = true; bool needs_maintenance_info = false; xmlNode *action_xml = NULL; if ((action == NULL) || (data_set == NULL)) { return; } // Create the top-level element based on task if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { /* All fences need node info; guest node fences are pseudo-events */ if (pcmk_is_set(action->flags, pe_action_pseudo)) { action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT); } else { action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); } } else if (pcmk__str_any_of(action->task, PCMK_ACTION_DO_SHUTDOWN, - CRM_OP_CLEAR_FAILCOUNT, NULL)) { + PCMK_ACTION_CLEAR_FAILCOUNT, NULL)) { action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); } else if (pcmk__str_eq(action->task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { // CIB-only clean-up for shutdown locks action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB); } else if (pcmk_is_set(action->flags, pe_action_pseudo)) { if (pcmk__str_eq(action->task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_none)) { needs_maintenance_info = true; } action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT); needs_node_info = false; } else { action_xml = create_xml_node(parent, XML_GRAPH_TAG_RSC_OP); } crm_xml_add_int(action_xml, XML_ATTR_ID, action->id); crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task); if ((action->rsc != NULL) && (action->rsc->clone_name != NULL)) { char *clone_key = NULL; guint interval_ms; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } clone_key = clone_op_key(action, interval_ms); crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key); crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid); free(clone_key); } else { crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid); } if (needs_node_info && (action->node != NULL)) { add_node_details(action, action_xml); g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET), strdup(action->node->details->uname)); g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET_UUID), strdup(action->node->details->id)); } if (skip_details) { return; } if ((action->rsc != NULL) && !pcmk_is_set(action->flags, pe_action_pseudo)) { // This is a real resource action, so add resource details add_resource_details(action, action_xml); } /* List any attributes in effect */ add_action_attributes(action, action_xml); /* List any nodes this action is expected to make down */ if (needs_node_info && (action->node != NULL)) { add_downed_nodes(action_xml, action); } if (needs_maintenance_info) { add_maintenance_nodes(action_xml, data_set); } } /*! * \internal * \brief Check whether an action should be added to the transition graph * * \param[in] action Action to check * * \return true if action should be added to graph, otherwise false */ static bool should_add_action_to_graph(const pe_action_t *action) { if (!pcmk_is_set(action->flags, pe_action_runnable)) { crm_trace("Ignoring action %s (%d): unrunnable", action->uuid, action->id); return false; } if (pcmk_is_set(action->flags, pe_action_optional) && !pcmk_is_set(action->flags, pe_action_print_always)) { crm_trace("Ignoring action %s (%d): optional", action->uuid, action->id); return false; } /* Actions for unmanaged resources should be excluded from the graph, * with the exception of monitors and cancellation of recurring monitors. */ if ((action->rsc != NULL) && !pcmk_is_set(action->rsc->flags, pe_rsc_managed) && !pcmk__str_eq(action->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { const char *interval_ms_s; /* A cancellation of a recurring monitor will get here because the task * is cancel rather than monitor, but the interval can still be used to * recognize it. The interval has been normalized to milliseconds by * this point, so a string comparison is sufficient. */ interval_ms_s = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS); if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) { crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)", action->uuid, action->id, action->rsc->id); return false; } } /* Always add pseudo-actions, fence actions, and shutdown actions (already * determined to be required and runnable by this point) */ if (pcmk_is_set(action->flags, pe_action_pseudo) || pcmk__strcase_any_of(action->task, PCMK_ACTION_STONITH, PCMK_ACTION_DO_SHUTDOWN, NULL)) { return true; } if (action->node == NULL) { pe_err("Skipping action %s (%d) " "because it was not assigned to a node (bug?)", action->uuid, action->id); pcmk__log_action("Unassigned", action, false); return false; } if (pcmk_is_set(action->flags, pe_action_dc)) { crm_trace("Action %s (%d) should be dumped: " "can run on DC instead of %s", action->uuid, action->id, pe__node_name(action->node)); } else if (pe__is_guest_node(action->node) && !action->node->details->remote_requires_reset) { crm_trace("Action %s (%d) should be dumped: " "assuming will be runnable on guest %s", action->uuid, action->id, pe__node_name(action->node)); } else if (!action->node->details->online) { pe_err("Skipping action %s (%d) " "because it was scheduled for offline node (bug?)", action->uuid, action->id); pcmk__log_action("Offline node", action, false); return false; } else if (action->node->details->unclean) { pe_err("Skipping action %s (%d) " "because it was scheduled for unclean node (bug?)", action->uuid, action->id); pcmk__log_action("Unclean node", action, false); return false; } return true; } /*! * \internal * \brief Check whether an ordering's flags can change an action * * \param[in] ordering Ordering to check * * \return true if ordering has flags that can change an action, false otherwise */ static bool ordering_can_change_actions(const pe_action_wrapper_t *ordering) { return pcmk_any_flags_set(ordering->type, ~(pe_order_implies_first_printed |pe_order_implies_then_printed |pe_order_optional)); } /*! * \internal * \brief Check whether an action input should be in the transition graph * * \param[in] action Action to check * \param[in,out] input Action input to check * * \return true if input should be in graph, false otherwise * \note This function may not only check an input, but disable it under certian * circumstances (load or anti-colocation orderings that are not needed). */ static bool should_add_input_to_graph(const pe_action_t *action, pe_action_wrapper_t *input) { if (input->state == pe_link_dumped) { return true; } if (input->type == pe_order_none) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering disabled", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pe_action_runnable) && !ordering_can_change_actions(input)) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pe_action_runnable) && pcmk_is_set(input->type, pe_order_one_or_more)) { crm_trace("Ignoring %s (%d) input %s (%d): " "one-or-more and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->type, pe_order_implies_first_migratable) && !pcmk_is_set(input->action->flags, pe_action_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "implies input migratable but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->type, pe_order_apply_first_non_migratable) && pcmk_is_set(input->action->flags, pe_action_migrate_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "only if input unmigratable but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if ((input->type == pe_order_optional) && pcmk_is_set(input->action->flags, pe_action_migrate_runnable) && pcmk__ends_with(input->action->uuid, "_stop_0")) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional but stop in migration", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (input->type == pe_order_load) { pe_node_t *input_node = input->action->node; // load orderings are relevant only if actions are for same node if ((action->rsc != NULL) && pcmk__str_eq(action->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none)) { pe_node_t *assigned = action->rsc->allocated_to; /* For load_stopped -> migrate_to orderings, we care about where it * has been assigned to, not where it will be executed. */ if (!pe__same_node(input_node, assigned)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, (assigned? assigned->details->uname : ""), (input_node? input_node->details->uname : "")); input->type = pe_order_none; return false; } } else if (!pe__same_node(input_node, action->node)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, (action->node? action->node->details->uname : ""), (input_node? input_node->details->uname : "")); input->type = pe_order_none; return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering input optional", action->uuid, action->id, input->action->uuid, input->action->id); input->type = pe_order_none; return false; } } else if (input->type == pe_order_anti_colocation) { if (input->action->node && action->node && !pe__same_node(input->action->node, action->node)) { crm_trace("Ignoring %s (%d) input %s (%d): " "anti-colocation node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, pe__node_name(action->node), pe__node_name(input->action->node)); input->type = pe_order_none; return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "anti-colocation input optional", action->uuid, action->id, input->action->uuid, input->action->id); input->type = pe_order_none; return false; } } else if (input->action->rsc && input->action->rsc != action->rsc && pcmk_is_set(input->action->rsc->flags, pe_rsc_failed) && !pcmk_is_set(input->action->rsc->flags, pe_rsc_managed) && pcmk__ends_with(input->action->uuid, "_stop_0") && action->rsc && pe_rsc_is_clone(action->rsc)) { crm_warn("Ignoring requirement that %s complete before %s:" " unmanaged failed resources cannot prevent clone shutdown", input->action->uuid, action->uuid); return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional) && !pcmk_any_flags_set(input->action->flags, pe_action_print_always|pe_action_dumped) && !should_add_action_to_graph(input->action)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input optional", action->uuid, action->id, input->action->uuid, input->action->id); return false; } crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x", action->uuid, action->id, action_type_str(input->action->flags), input->action->uuid, input->action->id, action_node_str(input->action), action_runnable_str(input->action->flags), action_optional_str(input->action->flags), input->type); return true; } /*! * \internal * \brief Check whether an ordering creates an ordering loop * * \param[in] init_action "First" action in ordering * \param[in] action Callers should always set this the same as * \p init_action (this function may use a different * value for recursive calls) * \param[in,out] input Action wrapper for "then" action in ordering * * \return true if the ordering creates a loop, otherwise false */ bool pcmk__graph_has_loop(const pe_action_t *init_action, const pe_action_t *action, pe_action_wrapper_t *input) { bool has_loop = false; if (pcmk_is_set(input->action->flags, pe_action_tracking)) { crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->details->uname : "", action->uuid, action->node? action->node->details->uname : "", input->type); return false; } // Don't need to check inputs that won't be used if (!should_add_input_to_graph(action, input)) { return false; } if (input->action == init_action) { crm_debug("Input loop found in %s@%s ->...-> %s@%s", action->uuid, action->node? action->node->details->uname : "", init_action->uuid, init_action->node? init_action->node->details->uname : ""); return true; } pe__set_action_flags(input->action, pe_action_tracking); crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)" "for graph loop with %s@%s ", action->uuid, action->node? action->node->details->uname : "", input->action->uuid, input->action->node? input->action->node->details->uname : "", input->type, init_action->uuid, init_action->node? init_action->node->details->uname : ""); // Recursively check input itself for loops for (GList *iter = input->action->actions_before; iter != NULL; iter = iter->next) { if (pcmk__graph_has_loop(init_action, input->action, (pe_action_wrapper_t *) iter->data)) { // Recursive call already logged a debug message has_loop = true; break; } } pe__clear_action_flags(input->action, pe_action_tracking); if (!has_loop) { crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->details->uname : "", action->uuid, action->node? action->node->details->uname : "", input->type); } return has_loop; } /*! * \internal * \brief Create a synapse XML element for a transition graph * * \param[in] action Action that synapse is for * \param[in,out] data_set Cluster working set containing graph * * \return Newly added XML element for new graph synapse */ static xmlNode * create_graph_synapse(const pe_action_t *action, pe_working_set_t *data_set) { int synapse_priority = 0; xmlNode *syn = create_xml_node(data_set->graph, "synapse"); crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse); data_set->num_synapse++; if (action->rsc != NULL) { synapse_priority = action->rsc->priority; } if (action->priority > synapse_priority) { synapse_priority = action->priority; } if (synapse_priority > 0) { crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); } return syn; } /*! * \internal * \brief Add an action to the transition graph XML if appropriate * * \param[in,out] data Action to possibly add * \param[in,out] user_data Cluster working set * * \note This will de-duplicate the action inputs, meaning that the * pe_action_wrapper_t:type flags can no longer be relied on to retain * their original settings. That means this MUST be called after * pcmk__apply_orderings() is complete, and nothing after this should rely * on those type flags. (For example, some code looks for type equal to * some flag rather than whether the flag is set, and some code looks for * particular combinations of flags -- such code must be done before * pcmk__create_graph().) */ static void add_action_to_graph(gpointer data, gpointer user_data) { pe_action_t *action = (pe_action_t *) data; pe_working_set_t *data_set = (pe_working_set_t *) user_data; xmlNode *syn = NULL; xmlNode *set = NULL; xmlNode *in = NULL; /* If we haven't already, de-duplicate inputs (even if we won't be adding * the action to the graph, so that crm_simulate's dot graphs don't have * duplicates). */ if (!pcmk_is_set(action->flags, pe_action_dedup)) { pcmk__deduplicate_action_inputs(action); pe__set_action_flags(action, pe_action_dedup); } if (pcmk_is_set(action->flags, pe_action_dumped) // Already added, or || !should_add_action_to_graph(action)) { // shouldn't be added return; } pe__set_action_flags(action, pe_action_dumped); crm_trace("Adding action %d (%s%s%s) to graph", action->id, action->uuid, ((action->node == NULL)? "" : " on "), ((action->node == NULL)? "" : action->node->details->uname)); syn = create_graph_synapse(action, data_set); set = create_xml_node(syn, "action_set"); in = create_xml_node(syn, "inputs"); create_graph_action(set, action, false, data_set); for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { pe_action_wrapper_t *input = (pe_action_wrapper_t *) lpc->data; if (should_add_input_to_graph(action, input)) { xmlNode *input_xml = create_xml_node(in, "trigger"); input->state = pe_link_dumped; create_graph_action(input_xml, input->action, true, data_set); } } } static int transition_id = -1; /*! * \internal * \brief Log a message after calculating a transition * * \param[in] filename Where transition input is stored */ void pcmk__log_transition_summary(const char *filename) { if (was_processing_error) { crm_err("Calculated transition %d (with errors)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else if (was_processing_warning) { crm_warn("Calculated transition %d (with warnings)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else { crm_notice("Calculated transition %d%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } if (crm_config_error) { crm_notice("Configuration errors found during scheduler processing," " please run \"crm_verify -L\" to identify issues"); } } /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc) { GList *iter = NULL; CRM_ASSERT(rsc != NULL); pe_rsc_trace(rsc, "Adding actions for %s to graph", rsc->id); // First add the resource's own actions g_list_foreach(rsc->actions, add_action_to_graph, rsc->cluster); // Then recursively add its children's actions (appropriate to variant) for (iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *child_rsc = (pe_resource_t *) iter->data; child_rsc->cmds->add_actions_to_graph(child_rsc); } } /*! * \internal * \brief Create a transition graph with all cluster actions needed * * \param[in,out] data_set Cluster working set */ void pcmk__create_graph(pe_working_set_t *data_set) { GList *iter = NULL; const char *value = NULL; long long limit = 0LL; transition_id++; crm_trace("Creating transition graph %d", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); value = pe_pref(data_set->config_hash, "stonith-timeout"); crm_xml_add(data_set->graph, "stonith-timeout", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); value = pe_pref(data_set->config_hash, "migration-limit"); if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) { crm_xml_add(data_set->graph, "migration-limit", value); } if (data_set->recheck_by > 0) { char *recheck_epoch = NULL; recheck_epoch = crm_strdup_printf("%llu", (long long) data_set->recheck_by); crm_xml_add(data_set->graph, "recheck-by", recheck_epoch); free(recheck_epoch); } /* The following code will de-duplicate action inputs, so nothing past this * should rely on the action input type flags retaining their original * values. */ // Add resource actions to graph for (iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; pe_rsc_trace(rsc, "Processing actions for %s", rsc->id); rsc->cmds->add_actions_to_graph(rsc); } // Add pseudo-action for list of nodes with maintenance state update add_maintenance_update(data_set); // Add non-resource (node) actions for (iter = data_set->actions; iter != NULL; iter = iter->next) { pe_action_t *action = (pe_action_t *) iter->data; if ((action->rsc != NULL) && (action->node != NULL) && action->node->details->shutdown && !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance) && !pcmk_any_flags_set(action->flags, pe_action_optional|pe_action_runnable) && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* Eventually we should just ignore the 'fence' case, but for now * it's the best way to detect (in CTS) when CIB resource updates * are being lost. */ if (pcmk_is_set(data_set->flags, pe_flag_have_quorum) || (data_set->no_quorum_policy == no_quorum_ignore)) { const bool managed = pcmk_is_set(action->rsc->flags, pe_rsc_managed); const bool failed = pcmk_is_set(action->rsc->flags, pe_rsc_failed); crm_crit("Cannot %s %s because of %s:%s%s (%s)", action->node->details->unclean? "fence" : "shut down", pe__node_name(action->node), action->rsc->id, (managed? " blocked" : " unmanaged"), (failed? " failed" : ""), action->uuid); } } add_action_to_graph((gpointer) action, (gpointer) data_set); } crm_log_xml_trace(data_set->graph, "graph"); } diff --git a/lib/pacemaker/pcmk_sched_remote.c b/lib/pacemaker/pcmk_sched_remote.c index 4e2fc68908..9cafafd419 100644 --- a/lib/pacemaker/pcmk_sched_remote.c +++ b/lib/pacemaker/pcmk_sched_remote.c @@ -1,725 +1,725 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" enum remote_connection_state { remote_state_unknown = 0, remote_state_alive = 1, remote_state_resting = 2, remote_state_failed = 3, remote_state_stopped = 4 }; static const char * state2text(enum remote_connection_state state) { switch (state) { case remote_state_unknown: return "unknown"; case remote_state_alive: return "alive"; case remote_state_resting: return "resting"; case remote_state_failed: return "failed"; case remote_state_stopped: return "stopped"; } return "impossible"; } /* We always use pe_order_preserve with these convenience functions to exempt * internally generated constraints from the prohibition of user constraints * involving remote connection resources. * * The start ordering additionally uses pe_order_runnable_left so that the * specified action is not runnable if the start is not runnable. */ static inline void order_start_then_action(pe_resource_t *first_rsc, pe_action_t *then_action, uint32_t extra) { if ((first_rsc != NULL) && (then_action != NULL)) { pcmk__new_ordering(first_rsc, start_key(first_rsc), NULL, then_action->rsc, NULL, then_action, pe_order_preserve|pe_order_runnable_left|extra, first_rsc->cluster); } } static inline void order_action_then_stop(pe_action_t *first_action, pe_resource_t *then_rsc, uint32_t extra) { if ((first_action != NULL) && (then_rsc != NULL)) { pcmk__new_ordering(first_action->rsc, NULL, first_action, then_rsc, stop_key(then_rsc), NULL, pe_order_preserve|extra, then_rsc->cluster); } } static enum remote_connection_state get_remote_node_state(const pe_node_t *node) { const pe_resource_t *remote_rsc = NULL; const pe_node_t *cluster_node = NULL; CRM_ASSERT(node != NULL); remote_rsc = node->details->remote_rsc; CRM_ASSERT(remote_rsc != NULL); cluster_node = pe__current_node(remote_rsc); /* If the cluster node the remote connection resource resides on * is unclean or went offline, we can't process any operations * on that remote node until after it starts elsewhere. */ if ((remote_rsc->next_role == RSC_ROLE_STOPPED) || (remote_rsc->allocated_to == NULL)) { // The connection resource is not going to run anywhere if ((cluster_node != NULL) && cluster_node->details->unclean) { /* The remote connection is failed because its resource is on a * failed node and can't be recovered elsewhere, so we must fence. */ return remote_state_failed; } if (!pcmk_is_set(remote_rsc->flags, pe_rsc_failed)) { /* Connection resource is cleanly stopped */ return remote_state_stopped; } /* Connection resource is failed */ if ((remote_rsc->next_role == RSC_ROLE_STOPPED) && remote_rsc->remote_reconnect_ms && node->details->remote_was_fenced && !pe__shutdown_requested(node)) { /* We won't know whether the connection is recoverable until the * reconnect interval expires and we reattempt connection. */ return remote_state_unknown; } /* The remote connection is in a failed state. If there are any * resources known to be active on it (stop) or in an unknown state * (probe), we must assume the worst and fence it. */ return remote_state_failed; } else if (cluster_node == NULL) { /* Connection is recoverable but not currently running anywhere, so see * if we can recover it first */ return remote_state_unknown; } else if (cluster_node->details->unclean || !(cluster_node->details->online)) { // Connection is running on a dead node, see if we can recover it first return remote_state_resting; } else if (pcmk__list_of_multiple(remote_rsc->running_on) && (remote_rsc->partial_migration_source != NULL) && (remote_rsc->partial_migration_target != NULL)) { /* We're in the middle of migrating a connection resource, so wait until * after the migration completes before performing any actions. */ return remote_state_resting; } return remote_state_alive; } /*! * \internal * \brief Order actions on remote node relative to actions for the connection * * \param[in,out] action An action scheduled on a Pacemaker Remote node */ static void apply_remote_ordering(pe_action_t *action) { pe_resource_t *remote_rsc = NULL; enum action_tasks task = text2task(action->task); enum remote_connection_state state = get_remote_node_state(action->node); uint32_t order_opts = pe_order_none; if (action->rsc == NULL) { return; } CRM_ASSERT(pe__is_guest_or_remote_node(action->node)); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc != NULL); crm_trace("Order %s action %s relative to %s%s (state: %s)", action->task, action->uuid, pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", remote_rsc->id, state2text(state)); if (pcmk__strcase_any_of(action->task, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ task = stop_rsc; } switch (task) { case start_rsc: case action_promote: order_opts = pe_order_none; if (state == remote_state_failed) { /* Force recovery, by making this action required */ pe__set_order_flags(order_opts, pe_order_implies_then); } /* Ensure connection is up before running this action */ order_start_then_action(remote_rsc, action, order_opts); break; case stop_rsc: if (state == remote_state_alive) { order_action_then_stop(action, remote_rsc, pe_order_implies_first); } else if (state == remote_state_failed) { /* The resource is active on the node, but since we don't have a * valid connection, the only way to stop the resource is by * fencing the node. There is no need to order the stop relative * to the remote connection, since the stop will become implied * by the fencing. */ pe_fence_node(remote_rsc->cluster, action->node, "resources are active but " "connection is unrecoverable", FALSE); } else if (remote_rsc->next_role == RSC_ROLE_STOPPED) { /* State must be remote_state_unknown or remote_state_stopped. * Since the connection is not coming back up in this * transition, stop this resource first. */ order_action_then_stop(action, remote_rsc, pe_order_implies_first); } else { /* The connection is going to be started somewhere else, so * stop this resource after that completes. */ order_start_then_action(remote_rsc, action, pe_order_none); } break; case action_demote: /* Only order this demote relative to the connection start if the * connection isn't being torn down. Otherwise, the demote would be * blocked because the connection start would not be allowed. */ if ((state == remote_state_resting) || (state == remote_state_unknown)) { order_start_then_action(remote_rsc, action, pe_order_none); } /* Otherwise we can rely on the stop ordering */ break; default: /* Wait for the connection resource to be up */ if (pcmk__action_is_recurring(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ order_start_then_action(remote_rsc, action, pe_order_implies_then); } else { pe_node_t *cluster_node = pe__current_node(remote_rsc); if ((task == monitor_rsc) && (state == remote_state_failed)) { /* We would only be here if we do not know the state of the * resource on the remote node. Since we have no way to find * out, it is necessary to fence the node. */ pe_fence_node(remote_rsc->cluster, action->node, "resources are in unknown state " "and connection is unrecoverable", FALSE); } if ((cluster_node != NULL) && (state == remote_state_stopped)) { /* The connection is currently up, but is going down * permanently. Make sure we check services are actually * stopped _before_ we let the connection get closed. */ order_action_then_stop(action, remote_rsc, pe_order_runnable_left); } else { order_start_then_action(remote_rsc, action, pe_order_none); } } break; } } static void apply_container_ordering(pe_action_t *action) { /* VMs are also classified as containers for these purposes... in * that they both involve a 'thing' running on a real or remote * cluster node. * * This allows us to be smarter about the type and extent of * recovery actions required in various scenarios */ pe_resource_t *remote_rsc = NULL; pe_resource_t *container = NULL; enum action_tasks task = text2task(action->task); CRM_ASSERT(action->rsc != NULL); CRM_ASSERT(action->node != NULL); CRM_ASSERT(pe__is_guest_or_remote_node(action->node)); remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc != NULL); container = remote_rsc->container; CRM_ASSERT(container != NULL); if (pcmk_is_set(container->flags, pe_rsc_failed)) { pe_fence_node(action->rsc->cluster, action->node, "container failed", FALSE); } crm_trace("Order %s action %s relative to %s%s for %s%s", action->task, action->uuid, pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", remote_rsc->id, pcmk_is_set(container->flags, pe_rsc_failed)? "failed " : "", container->id); if (pcmk__strcase_any_of(action->task, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ task = stop_rsc; } switch (task) { case start_rsc: case action_promote: // Force resource recovery if the container is recovered order_start_then_action(container, action, pe_order_implies_then); // Wait for the connection resource to be up, too order_start_then_action(remote_rsc, action, pe_order_none); break; case stop_rsc: case action_demote: if (pcmk_is_set(container->flags, pe_rsc_failed)) { /* When the container representing a guest node fails, any stop * or demote actions for resources running on the guest node * are implied by the container stopping. This is similar to * how fencing operations work for cluster nodes and remote * nodes. */ } else { /* Ensure the operation happens before the connection is brought * down. * * If we really wanted to, we could order these after the * connection start, IFF the container's current role was * stopped (otherwise we re-introduce an ordering loop when the * connection is restarting). */ order_action_then_stop(action, remote_rsc, pe_order_none); } break; default: /* Wait for the connection resource to be up */ if (pcmk__action_is_recurring(action)) { /* In case we ever get the recovery logic wrong, force * recurring monitors to be restarted, even if just * the connection was re-established */ if (task != no_action) { order_start_then_action(remote_rsc, action, pe_order_implies_then); } } else { order_start_then_action(remote_rsc, action, pe_order_none); } break; } } /*! * \internal * \brief Order all relevant actions relative to remote connection actions * * \param[in,out] data_set Cluster working set */ void pcmk__order_remote_connection_actions(pe_working_set_t *data_set) { if (!pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) { return; } crm_trace("Creating remote connection orderings"); for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) { pe_action_t *action = iter->data; pe_resource_t *remote = NULL; // We are only interested in resource actions if (action->rsc == NULL) { continue; } /* Special case: If we are clearing the failcount of an actual * remote connection resource, then make sure this happens before * any start of the resource in this transition. */ if (action->rsc->is_remote_node && - pcmk__str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT, + pcmk__str_eq(action->task, PCMK_ACTION_CLEAR_FAILCOUNT, pcmk__str_none)) { pcmk__new_ordering(action->rsc, NULL, action, action->rsc, pcmk__op_key(action->rsc->id, PCMK_ACTION_START, 0), NULL, pe_order_optional, data_set); continue; } // We are only interested in actions assigned to a node if (action->node == NULL) { continue; } if (!pe__is_guest_or_remote_node(action->node)) { continue; } /* We are only interested in real actions. * * @TODO This is probably wrong; pseudo-actions might be converted to * real actions and vice versa later in update_actions() at the end of * pcmk__apply_orderings(). */ if (pcmk_is_set(action->flags, pe_action_pseudo)) { continue; } remote = action->node->details->remote_rsc; if (remote == NULL) { // Orphaned continue; } /* Another special case: if a resource is moving to a Pacemaker Remote * node, order the stop on the original node after any start of the * remote connection. This ensures that if the connection fails to * start, we leave the resource running on the original node. */ if (pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_none)) { for (GList *item = action->rsc->actions; item != NULL; item = item->next) { pe_action_t *rsc_action = item->data; if (!pe__same_node(rsc_action->node, action->node) && pcmk__str_eq(rsc_action->task, PCMK_ACTION_STOP, pcmk__str_none)) { pcmk__new_ordering(remote, start_key(remote), NULL, action->rsc, NULL, rsc_action, pe_order_optional, data_set); } } } /* The action occurs across a remote connection, so create * ordering constraints that guarantee the action occurs while the node * is active (after start, before stop ... things like that). * * This is somewhat brittle in that we need to make sure the results of * this ordering are compatible with the result of get_router_node(). * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part * of this logic rather than create_graph_action(). */ if (remote->container) { crm_trace("Container ordering for %s", action->uuid); apply_container_ordering(action); } else { crm_trace("Remote ordering for %s", action->uuid); apply_remote_ordering(action); } } } /*! * \internal * \brief Check whether a node is a failed remote node * * \param[in] node Node to check * * \return true if \p node is a failed remote node, false otherwise */ bool pcmk__is_failed_remote_node(const pe_node_t *node) { return pe__is_remote_node(node) && (node->details->remote_rsc != NULL) && (get_remote_node_state(node) == remote_state_failed); } /*! * \internal * \brief Check whether a given resource corresponds to a given node as guest * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return true if \p node is a guest node and \p rsc is its containing * resource, otherwise false */ bool pcmk__rsc_corresponds_to_guest(const pe_resource_t *rsc, const pe_node_t *node) { return (rsc != NULL) && (rsc->fillers != NULL) && (node != NULL) && (node->details->remote_rsc != NULL) && (node->details->remote_rsc->container == rsc); } /*! * \internal * \brief Get proper connection host that a remote action must be routed through * * A remote connection resource might be starting, stopping, or migrating in the * same transition that an action needs to be executed on its Pacemaker Remote * node. Determine the proper node that the remote action should be routed * through. * * \param[in] action (Potentially remote) action to route * * \return Connection host that action should be routed through if remote, * otherwise NULL */ pe_node_t * pcmk__connection_host_for_action(const pe_action_t *action) { pe_node_t *began_on = NULL; pe_node_t *ended_on = NULL; bool partial_migration = false; const char *task = action->task; if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_none) || !pe__is_guest_or_remote_node(action->node)) { return NULL; } CRM_ASSERT(action->node->details->remote_rsc != NULL); began_on = pe__current_node(action->node->details->remote_rsc); ended_on = action->node->details->remote_rsc->allocated_to; if (action->node->details->remote_rsc && (action->node->details->remote_rsc->container == NULL) && action->node->details->remote_rsc->partial_migration_target) { partial_migration = true; } if (began_on == NULL) { crm_trace("Routing %s for %s through remote connection's " "next node %s (starting)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (ended_on? ended_on->details->uname : "none"), partial_migration? " (partial migration)" : ""); return ended_on; } if (ended_on == NULL) { crm_trace("Routing %s for %s through remote connection's " "current node %s (stopping)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (began_on? began_on->details->uname : "none"), partial_migration? " (partial migration)" : ""); return began_on; } if (pe__same_node(began_on, ended_on)) { crm_trace("Routing %s for %s through remote connection's " "current node %s (not moving)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (began_on? began_on->details->uname : "none"), partial_migration? " (partial migration)" : ""); return began_on; } /* If we get here, the remote connection is moving during this transition. * This means some actions for resources behind the connection will get * routed through the cluster node the connection resource is currently on, * and others are routed through the cluster node the connection will end up * on. */ if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { task = g_hash_table_lookup(action->meta, "notify_operation"); } /* * Stop, demote, and migration actions must occur before the connection can * move (these actions are required before the remote resource can stop). In * this case, we know these actions have to be routed through the initial * cluster node the connection resource lived on before the move takes * place. * * The exception is a partial migration of a (non-guest) remote connection * resource; in that case, all actions (even these) will be ordered after * the connection's pseudo-start on the migration target, so the target is * the router node. */ if (pcmk__strcase_any_of(task, PCMK_ACTION_CANCEL, PCMK_ACTION_STOP, PCMK_ACTION_DEMOTE, PCMK_ACTION_MIGRATE_FROM, PCMK_ACTION_MIGRATE_TO, NULL) && !partial_migration) { crm_trace("Routing %s for %s through remote connection's " "current node %s (moving)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (began_on? began_on->details->uname : "none"), partial_migration? " (partial migration)" : ""); return began_on; } /* Everything else (start, promote, monitor, probe, refresh, * clear failcount, delete, ...) must occur after the connection starts on * the node it is moving to. */ crm_trace("Routing %s for %s through remote connection's " "next node %s (moving)%s", action->task, (action->rsc? action->rsc->id : "no resource"), (ended_on? ended_on->details->uname : "none"), partial_migration? " (partial migration)" : ""); return ended_on; } /*! * \internal * \brief Replace remote connection's addr="#uname" with actual address * * REMOTE_CONTAINER_HACK: If a given resource is a remote connection resource * with its "addr" parameter set to "#uname", pull the actual value from the * parameters evaluated without a node (which was put there earlier in * pcmk__create_graph() when the bundle's expand() method was called). * * \param[in,out] rsc Resource to check * \param[in,out] params Resource parameters evaluated per node */ void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params) { const char *remote_addr = g_hash_table_lookup(params, XML_RSC_ATTR_REMOTE_RA_ADDR); if (pcmk__str_eq(remote_addr, "#uname", pcmk__str_none)) { GHashTable *base = pe_rsc_params(rsc, NULL, rsc->cluster); remote_addr = g_hash_table_lookup(base, XML_RSC_ATTR_REMOTE_RA_ADDR); if (remote_addr != NULL) { g_hash_table_insert(params, strdup(XML_RSC_ATTR_REMOTE_RA_ADDR), strdup(remote_addr)); } } } /*! * \brief Add special bundle meta-attributes to XML * * If a given action will be executed on a guest node (including a bundle), * add the special bundle meta-attribute "container-attribute-target" and * environment variable "physical_host" as XML attributes (using meta-attribute * naming). * * \param[in,out] args_xml XML to add attributes to * \param[in] action Action to check */ void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, const pe_action_t *action) { const pe_node_t *guest = action->node; const pe_node_t *host = NULL; enum action_tasks task; if (!pe__is_guest_node(guest)) { return; } task = text2task(action->task); if ((task == action_notify) || (task == action_notified)) { task = text2task(g_hash_table_lookup(action->meta, "notify_operation")); } switch (task) { case stop_rsc: case stopped_rsc: case action_demote: case action_demoted: // "Down" actions take place on guest's current host host = pe__current_node(guest->details->remote_rsc->container); break; case start_rsc: case started_rsc: case monitor_rsc: case action_promote: case action_promoted: // "Up" actions take place on guest's next host host = guest->details->remote_rsc->container->allocated_to; break; default: break; } if (host != NULL) { hash2metafield((gpointer) XML_RSC_ATTR_TARGET, (gpointer) g_hash_table_lookup(action->rsc->meta, XML_RSC_ATTR_TARGET), (gpointer) args_xml); hash2metafield((gpointer) PCMK__ENV_PHYSICAL_HOST, (gpointer) host->details->uname, (gpointer) args_xml); } } diff --git a/lib/pacemaker/pcmk_scheduler.c b/lib/pacemaker/pcmk_scheduler.c index 6dcba52bba..e744f501d3 100644 --- a/lib/pacemaker/pcmk_scheduler.c +++ b/lib/pacemaker/pcmk_scheduler.c @@ -1,814 +1,815 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" CRM_TRACE_INIT_DATA(pacemaker); /*! * \internal * \brief Do deferred action checks after assignment * * When unpacking the resource history, the scheduler checks for resource * configurations that have changed since an action was run. However, at that * time, bundles using the REMOTE_CONTAINER_HACK don't have their final * parameter information, so instead they add a deferred check to a list. This * function processes one entry in that list. * * \param[in,out] rsc Resource that action history is for * \param[in,out] node Node that action history is for * \param[in] rsc_op Action history entry * \param[in] check Type of deferred check to do */ static void check_params(pe_resource_t *rsc, pe_node_t *node, const xmlNode *rsc_op, enum pe_check_parameters check) { const char *reason = NULL; op_digest_cache_t *digest_data = NULL; switch (check) { case pe_check_active: if (pcmk__check_action_config(rsc, node, rsc_op) && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL)) { reason = "action definition changed"; } break; case pe_check_last_failure: digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, rsc->cluster); switch (digest_data->rc) { case RSC_DIGEST_UNKNOWN: crm_trace("Resource %s history entry %s on %s has " "no digest to compare", rsc->id, ID(rsc_op), node->details->id); break; case RSC_DIGEST_MATCH: break; default: reason = "resource parameters have changed"; break; } break; } if (reason != NULL) { pe__clear_failcount(rsc, node, reason, rsc->cluster); } } /*! * \internal * \brief Check whether a resource has failcount clearing scheduled on a node * * \param[in] node Node to check * \param[in] rsc Resource to check * * \return true if \p rsc has failcount clearing scheduled on \p node, * otherwise false */ static bool failcount_clear_action_exists(const pe_node_t *node, const pe_resource_t *rsc) { - GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE); + GList *list = pe__resource_actions(rsc, node, PCMK_ACTION_CLEAR_FAILCOUNT, + TRUE); if (list != NULL) { g_list_free(list); return true; } return false; } /*! * \internal * \brief Ban a resource from a node if it reached its failure threshold there * * \param[in,out] data Resource to check failure threshold for * \param[in] user_data Node to check resource on */ static void check_failure_threshold(gpointer data, gpointer user_data) { pe_resource_t *rsc = data; const pe_node_t *node = user_data; // If this is a collective resource, apply recursively to children instead if (rsc->children != NULL) { g_list_foreach(rsc->children, check_failure_threshold, user_data); return; } if (!failcount_clear_action_exists(node, rsc)) { /* Don't force the resource away from this node due to a failcount * that's going to be cleared. * * @TODO Failcount clearing can be scheduled in * pcmk__handle_rsc_config_changes() via process_rsc_history(), or in * schedule_resource_actions() via check_params(). This runs well before * then, so it cannot detect those, meaning we might check the migration * threshold when we shouldn't. Worst case, we stop or move the * resource, then move it back in the next transition. */ pe_resource_t *failed = NULL; if (pcmk__threshold_reached(rsc, node, &failed)) { resource_location(failed, node, -INFINITY, "__fail_limit__", rsc->cluster); } } } /*! * \internal * \brief If resource has exclusive discovery, ban node if not allowed * * Location constraints have a resource-discovery option that allows users to * specify where probes are done for the affected resource. If this is set to * exclusive, probes will only be done on nodes listed in exclusive constraints. * This function bans the resource from the node if the node is not listed. * * \param[in,out] data Resource to check * \param[in] user_data Node to check resource on */ static void apply_exclusive_discovery(gpointer data, gpointer user_data) { pe_resource_t *rsc = data; const pe_node_t *node = user_data; if (rsc->exclusive_discover || pe__const_top_resource(rsc, false)->exclusive_discover) { pe_node_t *match = NULL; // If this is a collective resource, apply recursively to children g_list_foreach(rsc->children, apply_exclusive_discovery, user_data); match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if ((match != NULL) && (match->rsc_discover_mode != pe_discover_exclusive)) { match->weight = -INFINITY; } } } /*! * \internal * \brief Apply stickiness to a resource if appropriate * * \param[in,out] data Resource to check for stickiness * \param[in] user_data Ignored */ static void apply_stickiness(gpointer data, gpointer user_data) { pe_resource_t *rsc = data; pe_node_t *node = NULL; // If this is a collective resource, apply recursively to children instead if (rsc->children != NULL) { g_list_foreach(rsc->children, apply_stickiness, NULL); return; } /* A resource is sticky if it is managed, has stickiness configured, and is * active on a single node. */ if (!pcmk_is_set(rsc->flags, pe_rsc_managed) || (rsc->stickiness < 1) || !pcmk__list_of_1(rsc->running_on)) { return; } node = rsc->running_on->data; /* In a symmetric cluster, stickiness can always be used. In an * asymmetric cluster, we have to check whether the resource is still * allowed on the node, so we don't keep the resource somewhere it is no * longer explicitly enabled. */ if (!pcmk_is_set(rsc->cluster->flags, pe_flag_symmetric_cluster) && (g_hash_table_lookup(rsc->allowed_nodes, node->details->id) == NULL)) { pe_rsc_debug(rsc, "Ignoring %s stickiness because the cluster is " "asymmetric and %s is not explicitly allowed", rsc->id, pe__node_name(node)); return; } pe_rsc_debug(rsc, "Resource %s has %d stickiness on %s", rsc->id, rsc->stickiness, pe__node_name(node)); resource_location(rsc, node, rsc->stickiness, "stickiness", rsc->cluster); } /*! * \internal * \brief Apply shutdown locks for all resources as appropriate * * \param[in,out] data_set Cluster working set */ static void apply_shutdown_locks(pe_working_set_t *data_set) { if (!pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) { return; } for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; rsc->cmds->shutdown_lock(rsc); } } /*! * \internal * \brief Calculate the number of available nodes in the cluster * * \param[in,out] data_set Cluster working set */ static void count_available_nodes(pe_working_set_t *data_set) { if (pcmk_is_set(data_set->flags, pe_flag_no_compat)) { return; } // @COMPAT for API backward compatibility only (cluster does not use value) for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; if ((node != NULL) && (node->weight >= 0) && node->details->online && (node->details->type != node_ping)) { data_set->max_valid_nodes++; } } crm_trace("Online node count: %d", data_set->max_valid_nodes); } /* * \internal * \brief Apply node-specific scheduling criteria * * After the CIB has been unpacked, process node-specific scheduling criteria * including shutdown locks, location constraints, resource stickiness, * migration thresholds, and exclusive resource discovery. */ static void apply_node_criteria(pe_working_set_t *data_set) { crm_trace("Applying node-specific scheduling criteria"); apply_shutdown_locks(data_set); count_available_nodes(data_set); pcmk__apply_locations(data_set); g_list_foreach(data_set->resources, apply_stickiness, NULL); for (GList *node_iter = data_set->nodes; node_iter != NULL; node_iter = node_iter->next) { for (GList *rsc_iter = data_set->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) { check_failure_threshold(rsc_iter->data, node_iter->data); apply_exclusive_discovery(rsc_iter->data, node_iter->data); } } } /*! * \internal * \brief Assign resources to nodes * * \param[in,out] data_set Cluster working set */ static void assign_resources(pe_working_set_t *data_set) { GList *iter = NULL; crm_trace("Assigning resources to nodes"); if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) { pcmk__sort_resources(data_set); } pcmk__show_node_capacities("Original", data_set); if (pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) { /* Assign remote connection resources first (which will also assign any * colocation dependencies). If the connection is migrating, always * prefer the partial migration target. */ for (iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; if (rsc->is_remote_node) { pe_rsc_trace(rsc, "Assigning remote connection resource '%s'", rsc->id); rsc->cmds->assign(rsc, rsc->partial_migration_target, true); } } } /* now do the rest of the resources */ for (iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; if (!rsc->is_remote_node) { pe_rsc_trace(rsc, "Assigning %s resource '%s'", crm_element_name(rsc->xml), rsc->id); rsc->cmds->assign(rsc, NULL, true); } } pcmk__show_node_capacities("Remaining", data_set); } /*! * \internal * \brief Schedule fail count clearing on online nodes if resource is orphaned * * \param[in,out] data Resource to check * \param[in] user_data Ignored */ static void clear_failcounts_if_orphaned(gpointer data, gpointer user_data) { pe_resource_t *rsc = data; if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) { return; } crm_trace("Clear fail counts for orphaned resource %s", rsc->id); /* There's no need to recurse into rsc->children because those * should just be unassigned clone instances. */ for (GList *iter = rsc->cluster->nodes; iter != NULL; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; pe_action_t *clear_op = NULL; if (!node->details->online) { continue; } if (pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL) == 0) { continue; } clear_op = pe__clear_failcount(rsc, node, "it is orphaned", rsc->cluster); /* We can't use order_action_then_stop() here because its * pe_order_preserve breaks things */ pcmk__new_ordering(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc), NULL, pe_order_optional, rsc->cluster); } } /*! * \internal * \brief Schedule any resource actions needed * * \param[in,out] data_set Cluster working set */ static void schedule_resource_actions(pe_working_set_t *data_set) { // Process deferred action checks pe__foreach_param_check(data_set, check_params); pe__free_param_checks(data_set); if (pcmk_is_set(data_set->flags, pe_flag_startup_probes)) { crm_trace("Scheduling probes"); pcmk__schedule_probes(data_set); } if (pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)) { g_list_foreach(data_set->resources, clear_failcounts_if_orphaned, NULL); } crm_trace("Scheduling resource actions"); for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; rsc->cmds->create_actions(rsc); } } /*! * \internal * \brief Check whether a resource or any of its descendants are managed * * \param[in] rsc Resource to check * * \return true if resource or any descendant is managed, otherwise false */ static bool is_managed(const pe_resource_t *rsc) { if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { return true; } for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { if (is_managed((pe_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether any resources in the cluster are managed * * \param[in] data_set Cluster working set * * \return true if any resource is managed, otherwise false */ static bool any_managed_resources(const pe_working_set_t *data_set) { for (const GList *iter = data_set->resources; iter != NULL; iter = iter->next) { if (is_managed((const pe_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether a node requires fencing * * \param[in] node Node to check * \param[in] have_managed Whether any resource in cluster is managed * * \return true if \p node should be fenced, otherwise false */ static bool needs_fencing(const pe_node_t *node, bool have_managed) { return have_managed && node->details->unclean && pe_can_fence(node->details->data_set, node); } /*! * \internal * \brief Check whether a node requires shutdown * * \param[in] node Node to check * * \return true if \p node should be shut down, otherwise false */ static bool needs_shutdown(const pe_node_t *node) { if (pe__is_guest_or_remote_node(node)) { /* Do not send shutdown actions for Pacemaker Remote nodes. * @TODO We might come up with a good use for this in the future. */ return false; } return node->details->online && node->details->shutdown; } /*! * \internal * \brief Track and order non-DC fencing * * \param[in,out] list List of existing non-DC fencing actions * \param[in,out] action Fencing action to prepend to \p list * \param[in] data_set Cluster working set * * \return (Possibly new) head of \p list */ static GList * add_nondc_fencing(GList *list, pe_action_t *action, const pe_working_set_t *data_set) { if (!pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing) && (list != NULL)) { /* Concurrent fencing is disabled, so order each non-DC * fencing in a chain. If there is any DC fencing or * shutdown, it will be ordered after the last action in the * chain later. */ order_actions((pe_action_t *) list->data, action, pe_order_optional); } return g_list_prepend(list, action); } /*! * \internal * \brief Schedule a node for fencing * * \param[in,out] node Node that requires fencing */ static pe_action_t * schedule_fencing(pe_node_t *node) { pe_action_t *fencing = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, node->details->data_set); pe_warn("Scheduling node %s for fencing", pe__node_name(node)); pcmk__order_vs_fence(fencing, node->details->data_set); return fencing; } /*! * \internal * \brief Create and order node fencing and shutdown actions * * \param[in,out] data_set Cluster working set */ static void schedule_fencing_and_shutdowns(pe_working_set_t *data_set) { pe_action_t *dc_down = NULL; bool integrity_lost = false; bool have_managed = any_managed_resources(data_set); GList *fencing_ops = NULL; GList *shutdown_ops = NULL; crm_trace("Scheduling fencing and shutdowns as needed"); if (!have_managed) { crm_notice("No fencing will be done until there are resources " "to manage"); } // Check each node for whether it needs fencing or shutdown for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; pe_action_t *fencing = NULL; /* Guest nodes are "fenced" by recovering their container resource, * so handle them separately. */ if (pe__is_guest_node(node)) { if (node->details->remote_requires_reset && have_managed && pe_can_fence(data_set, node)) { pcmk__fence_guest(node); } continue; } if (needs_fencing(node, have_managed)) { fencing = schedule_fencing(node); // Track DC and non-DC fence actions separately if (node->details->is_dc) { dc_down = fencing; } else { fencing_ops = add_nondc_fencing(fencing_ops, fencing, data_set); } } else if (needs_shutdown(node)) { pe_action_t *down_op = pcmk__new_shutdown_action(node); // Track DC and non-DC shutdown actions separately if (node->details->is_dc) { dc_down = down_op; } else { shutdown_ops = g_list_prepend(shutdown_ops, down_op); } } if ((fencing == NULL) && node->details->unclean) { integrity_lost = true; pe_warn("Node %s is unclean but cannot be fenced", pe__node_name(node)); } } if (integrity_lost) { if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { pe_warn("Resource functionality and data integrity cannot be " "guaranteed (configure, enable, and test fencing to " "correct this)"); } else if (!pcmk_is_set(data_set->flags, pe_flag_have_quorum)) { crm_notice("Unclean nodes will not be fenced until quorum is " "attained or no-quorum-policy is set to ignore"); } } if (dc_down != NULL) { /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated * DC elections. However, we don't want to order non-DC shutdowns before * a DC *fencing*, because even though we don't want a node that's * shutting down to become DC, the DC fencing could be ordered before a * clone stop that's also ordered before the shutdowns, thus leading to * a graph loop. */ if (pcmk__str_eq(dc_down->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { pcmk__order_after_each(dc_down, shutdown_ops); } // Order any non-DC fencing before any DC fencing or shutdown if (pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)) { /* With concurrent fencing, order each non-DC fencing action * separately before any DC fencing or shutdown. */ pcmk__order_after_each(dc_down, fencing_ops); } else if (fencing_ops != NULL) { /* Without concurrent fencing, the non-DC fencing actions are * already ordered relative to each other, so we just need to order * the DC fencing after the last action in the chain (which is the * first item in the list). */ order_actions((pe_action_t *) fencing_ops->data, dc_down, pe_order_optional); } } g_list_free(fencing_ops); g_list_free(shutdown_ops); } static void log_resource_details(pe_working_set_t *data_set) { pcmk__output_t *out = data_set->priv; GList *all = NULL; /* Due to the `crm_mon --node=` feature, out->message() for all the * resource-related messages expects a list of nodes that we are allowed to * output information for. Here, we create a wildcard to match all nodes. */ all = g_list_prepend(all, (gpointer) "*"); for (GList *item = data_set->resources; item != NULL; item = item->next) { pe_resource_t *rsc = (pe_resource_t *) item->data; // Log all resources except inactive orphans if (!pcmk_is_set(rsc->flags, pe_rsc_orphan) || (rsc->role != RSC_ROLE_STOPPED)) { out->message(out, crm_map_element_name(rsc->xml), 0, rsc, all, all); } } g_list_free(all); } static void log_all_actions(pe_working_set_t *data_set) { /* This only ever outputs to the log, so ignore whatever output object was * previously set and just log instead. */ pcmk__output_t *prev_out = data_set->priv; pcmk__output_t *out = NULL; if (pcmk__log_output_new(&out) != pcmk_rc_ok) { return; } pe__register_messages(out); pcmk__register_lib_messages(out); pcmk__output_set_log_level(out, LOG_NOTICE); data_set->priv = out; out->begin_list(out, NULL, NULL, "Actions"); pcmk__output_actions(data_set); out->end_list(out); out->finish(out, CRM_EX_OK, true, NULL); pcmk__output_free(out); data_set->priv = prev_out; } /*! * \internal * \brief Log all required but unrunnable actions at trace level * * \param[in] data_set Cluster working set */ static void log_unrunnable_actions(const pe_working_set_t *data_set) { const uint64_t flags = pe_action_optional |pe_action_runnable |pe_action_pseudo; crm_trace("Required but unrunnable actions:"); for (const GList *iter = data_set->actions; iter != NULL; iter = iter->next) { const pe_action_t *action = (const pe_action_t *) iter->data; if (!pcmk_any_flags_set(action->flags, flags)) { pcmk__log_action("\t", action, true); } } } /*! * \internal * \brief Unpack the CIB for scheduling * * \param[in,out] cib CIB XML to unpack (may be NULL if already unpacked) * \param[in] flags Working set flags to set in addition to defaults * \param[in,out] data_set Cluster working set */ static void unpack_cib(xmlNode *cib, unsigned long long flags, pe_working_set_t *data_set) { const char* localhost_save = NULL; if (pcmk_is_set(data_set->flags, pe_flag_have_status)) { crm_trace("Reusing previously calculated cluster status"); pe__set_working_set_flags(data_set, flags); return; } if (data_set->localhost) { localhost_save = data_set->localhost; } CRM_ASSERT(cib != NULL); crm_trace("Calculating cluster status"); /* This will zero the entire struct without freeing anything first, so * callers should never call pcmk__schedule_actions() with a populated data * set unless pe_flag_have_status is set (i.e. cluster_status() was * previously called, whether directly or via pcmk__schedule_actions()). */ set_working_set_defaults(data_set); if (localhost_save) { data_set->localhost = localhost_save; } pe__set_working_set_flags(data_set, flags); data_set->input = cib; cluster_status(data_set); // Sets pe_flag_have_status } /*! * \internal * \brief Run the scheduler for a given CIB * * \param[in,out] cib CIB XML to use as scheduler input * \param[in] flags Working set flags to set in addition to defaults * \param[in,out] data_set Cluster working set */ void pcmk__schedule_actions(xmlNode *cib, unsigned long long flags, pe_working_set_t *data_set) { unpack_cib(cib, flags, data_set); pcmk__set_assignment_methods(data_set); pcmk__apply_node_health(data_set); pcmk__unpack_constraints(data_set); if (pcmk_is_set(data_set->flags, pe_flag_check_config)) { return; } if (!pcmk_is_set(data_set->flags, pe_flag_quick_location) && pcmk__is_daemon) { log_resource_details(data_set); } apply_node_criteria(data_set); if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) { return; } pcmk__create_internal_constraints(data_set); pcmk__handle_rsc_config_changes(data_set); assign_resources(data_set); schedule_resource_actions(data_set); /* Remote ordering constraints need to happen prior to calculating fencing * because it is one more place we can mark nodes as needing fencing. */ pcmk__order_remote_connection_actions(data_set); schedule_fencing_and_shutdowns(data_set); pcmk__apply_orderings(data_set); log_all_actions(data_set); pcmk__create_graph(data_set); if (get_crm_log_level() == LOG_TRACE) { log_unrunnable_actions(data_set); } } diff --git a/lib/pengine/failcounts.c b/lib/pengine/failcounts.c index a4a3e11cc6..9d5dfea962 100644 --- a/lib/pengine/failcounts.c +++ b/lib/pengine/failcounts.c @@ -1,403 +1,403 @@ /* * Copyright 2008-2023 the Pacemaker project contributors * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include static gboolean is_matched_failure(const char *rsc_id, const xmlNode *conf_op_xml, const xmlNode *lrm_op_xml) { gboolean matched = FALSE; const char *conf_op_name = NULL; const char *lrm_op_task = NULL; const char *conf_op_interval_spec = NULL; guint conf_op_interval_ms = 0; guint lrm_op_interval_ms = 0; const char *lrm_op_id = NULL; char *last_failure_key = NULL; if (rsc_id == NULL || conf_op_xml == NULL || lrm_op_xml == NULL) { return FALSE; } // Get name and interval from configured op conf_op_name = crm_element_value(conf_op_xml, "name"); conf_op_interval_spec = crm_element_value(conf_op_xml, XML_LRM_ATTR_INTERVAL); conf_op_interval_ms = crm_parse_interval_spec(conf_op_interval_spec); // Get name and interval from op history entry lrm_op_task = crm_element_value(lrm_op_xml, XML_LRM_ATTR_TASK); crm_element_value_ms(lrm_op_xml, XML_LRM_ATTR_INTERVAL_MS, &lrm_op_interval_ms); if ((conf_op_interval_ms != lrm_op_interval_ms) || !pcmk__str_eq(conf_op_name, lrm_op_task, pcmk__str_casei)) { return FALSE; } lrm_op_id = ID(lrm_op_xml); last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0); if (pcmk__str_eq(last_failure_key, lrm_op_id, pcmk__str_casei)) { matched = TRUE; } else { char *expected_op_key = pcmk__op_key(rsc_id, conf_op_name, conf_op_interval_ms); if (pcmk__str_eq(expected_op_key, lrm_op_id, pcmk__str_casei)) { int rc = 0; int target_rc = pe__target_rc_from_xml(lrm_op_xml); crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_RC, &rc); if (rc != target_rc) { matched = TRUE; } } free(expected_op_key); } free(last_failure_key); return matched; } static gboolean block_failure(const pe_node_t *node, pe_resource_t *rsc, const xmlNode *xml_op) { char *xml_name = clone_strip(rsc->id); /* @TODO This xpath search occurs after template expansion, but it is unable * to properly detect on-fail in id-ref, operation meta-attributes, or * op_defaults, or evaluate rules. * * Also, on-fail defaults to block (in unpack_operation()) for stop actions * when stonith is disabled. * * Ideally, we'd unpack the operation before this point, and pass in a * meta-attributes table that takes all that into consideration. */ char *xpath = crm_strdup_printf("//" XML_CIB_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" "//" XML_ATTR_OP "[@" XML_OP_ATTR_ON_FAIL "='block']", xml_name); xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath); gboolean should_block = FALSE; free(xpath); if (xpathObj) { int max = numXpathResults(xpathObj); int lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *pref = getXpathResult(xpathObj, lpc); if (xml_op) { should_block = is_matched_failure(xml_name, pref, xml_op); if (should_block) { break; } } else { const char *conf_op_name = NULL; const char *conf_op_interval_spec = NULL; guint conf_op_interval_ms = 0; char *lrm_op_xpath = NULL; xmlXPathObject *lrm_op_xpathObj = NULL; // Get name and interval from configured op conf_op_name = crm_element_value(pref, "name"); conf_op_interval_spec = crm_element_value(pref, XML_LRM_ATTR_INTERVAL); conf_op_interval_ms = crm_parse_interval_spec(conf_op_interval_spec); #define XPATH_FMT "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ "//" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ "/" XML_LRM_TAG_RSC_OP "[@" XML_LRM_ATTR_TASK "='%s']" \ "[@" XML_LRM_ATTR_INTERVAL "='%u']" lrm_op_xpath = crm_strdup_printf(XPATH_FMT, node->details->uname, xml_name, conf_op_name, conf_op_interval_ms); lrm_op_xpathObj = xpath_search(rsc->cluster->input, lrm_op_xpath); free(lrm_op_xpath); if (lrm_op_xpathObj) { int max2 = numXpathResults(lrm_op_xpathObj); int lpc2 = 0; for (lpc2 = 0; lpc2 < max2; lpc2++) { xmlNode *lrm_op_xml = getXpathResult(lrm_op_xpathObj, lpc2); should_block = is_matched_failure(xml_name, pref, lrm_op_xml); if (should_block) { break; } } } freeXpathObject(lrm_op_xpathObj); if (should_block) { break; } } } } free(xml_name); freeXpathObject(xpathObj); return should_block; } /*! * \internal * \brief Get resource name as used in failure-related node attributes * * \param[in] rsc Resource to check * * \return Newly allocated string containing resource's fail name * \note The caller is responsible for freeing the result. */ static inline char * rsc_fail_name(const pe_resource_t *rsc) { const char *name = (rsc->clone_name? rsc->clone_name : rsc->id); return pcmk_is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name); } /*! * \internal * \brief Compile regular expression to match a failure-related node attribute * * \param[in] prefix Attribute prefix to match * \param[in] rsc_name Resource name to match as used in failure attributes * \param[in] is_legacy Whether DC uses per-resource fail counts * \param[in] is_unique Whether the resource is a globally unique clone * \param[out] re Where to store resulting regular expression * * \return Standard Pacemaker return code * \note Fail attributes are named like PREFIX-RESOURCE#OP_INTERVAL. * The caller is responsible for freeing re with regfree(). */ static int generate_fail_regex(const char *prefix, const char *rsc_name, gboolean is_legacy, gboolean is_unique, regex_t *re) { char *pattern; /* @COMPAT DC < 1.1.17: Fail counts used to be per-resource rather than * per-operation. */ const char *op_pattern = (is_legacy? "" : "#.+_[0-9]+"); /* Ignore instance numbers for anything other than globally unique clones. * Anonymous clone fail counts could contain an instance number if the * clone was initially unique, failed, then was converted to anonymous. * @COMPAT Also, before 1.1.8, anonymous clone fail counts always contained * clone instance numbers. */ const char *instance_pattern = (is_unique? "" : "(:[0-9]+)?"); pattern = crm_strdup_printf("^%s-%s%s%s$", prefix, rsc_name, instance_pattern, op_pattern); if (regcomp(re, pattern, REG_EXTENDED|REG_NOSUB) != 0) { free(pattern); return EINVAL; } free(pattern); return pcmk_rc_ok; } /*! * \internal * \brief Compile regular expressions to match failure-related node attributes * * \param[in] rsc Resource being checked for failures * \param[in] data_set Data set (for CRM feature set version) * \param[out] failcount_re Storage for regular expression for fail count * \param[out] lastfailure_re Storage for regular expression for last failure * * \return Standard Pacemaker return code * \note On success, the caller is responsible for freeing the expressions with * regfree(). */ static int generate_fail_regexes(const pe_resource_t *rsc, const pe_working_set_t *data_set, regex_t *failcount_re, regex_t *lastfailure_re) { char *rsc_name = rsc_fail_name(rsc); const char *version = crm_element_value(data_set->input, XML_ATTR_CRM_VERSION); gboolean is_legacy = (compare_version(version, "3.0.13") < 0); int rc = pcmk_rc_ok; if (generate_fail_regex(PCMK__FAIL_COUNT_PREFIX, rsc_name, is_legacy, pcmk_is_set(rsc->flags, pe_rsc_unique), failcount_re) != pcmk_rc_ok) { rc = EINVAL; } else if (generate_fail_regex(PCMK__LAST_FAILURE_PREFIX, rsc_name, is_legacy, pcmk_is_set(rsc->flags, pe_rsc_unique), lastfailure_re) != pcmk_rc_ok) { rc = EINVAL; regfree(failcount_re); } free(rsc_name); return rc; } int pe_get_failcount(const pe_node_t *node, pe_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op) { char *key = NULL; const char *value = NULL; regex_t failcount_re, lastfailure_re; int failcount = 0; time_t last = 0; GHashTableIter iter; CRM_CHECK(generate_fail_regexes(rsc, rsc->cluster, &failcount_re, &lastfailure_re) == pcmk_rc_ok, return 0); /* Resource fail count is sum of all matching operation fail counts */ g_hash_table_iter_init(&iter, node->details->attrs); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { if (regexec(&failcount_re, key, 0, NULL, 0) == 0) { failcount = pcmk__add_scores(failcount, char2score(value)); crm_trace("Added %s (%s) to %s fail count (now %s)", key, value, rsc->id, pcmk_readable_score(failcount)); } else if (regexec(&lastfailure_re, key, 0, NULL, 0) == 0) { long long last_ll; if (pcmk__scan_ll(value, &last_ll, 0LL) == pcmk_rc_ok) { last = (time_t) QB_MAX(last, last_ll); } } } regfree(&failcount_re); regfree(&lastfailure_re); if ((failcount > 0) && (last > 0) && (last_failure != NULL)) { *last_failure = last; } /* If failure blocks the resource, disregard any failure timeout */ if ((failcount > 0) && rsc->failure_timeout && block_failure(node, rsc, xml_op)) { pe_warn("Ignoring failure timeout %d for %s because it conflicts with on-fail=block", rsc->failure_timeout, rsc->id); rsc->failure_timeout = 0; } /* If all failures have expired, ignore fail count */ if (pcmk_is_set(flags, pe_fc_effective) && (failcount > 0) && (last > 0) && rsc->failure_timeout) { time_t now = get_effective_time(rsc->cluster); if (now > (last + rsc->failure_timeout)) { crm_debug("Failcount for %s on %s expired after %ds", rsc->id, pe__node_name(node), rsc->failure_timeout); failcount = 0; } } /* We never want the fail counts of a bundle container's fillers to * count towards the container's fail count. * * Most importantly, a Pacemaker Remote connection to a bundle container * is a filler of the container, but can reside on a different node than the * container itself. Counting its fail count on its node towards the * container's fail count on that node could lead to attempting to stop the * container on the wrong node. */ if (pcmk_is_set(flags, pe_fc_fillers) && rsc->fillers && !pe_rsc_is_bundled(rsc)) { GList *gIter = NULL; for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) { pe_resource_t *filler = (pe_resource_t *) gIter->data; time_t filler_last_failure = 0; failcount += pe_get_failcount(node, filler, &filler_last_failure, flags, xml_op); if (last_failure && filler_last_failure > *last_failure) { *last_failure = filler_last_failure; } } if (failcount > 0) { crm_info("Container %s and the resources within it " "have failed %s time%s on %s", rsc->id, pcmk_readable_score(failcount), pcmk__plural_s(failcount), pe__node_name(node)); } } else if (failcount > 0) { crm_info("%s has failed %s time%s on %s", rsc->id, pcmk_readable_score(failcount), pcmk__plural_s(failcount), pe__node_name(node)); } return failcount; } /*! * \brief Schedule a controller operation to clear a fail count * * \param[in,out] rsc Resource with failure * \param[in] node Node failure occurred on * \param[in] reason Readable description why needed (for logging) * \param[in,out] data_set Working set for cluster * * \return Scheduled action */ pe_action_t * pe__clear_failcount(pe_resource_t *rsc, const pe_node_t *node, const char *reason, pe_working_set_t *data_set) { char *key = NULL; pe_action_t *clear = NULL; CRM_CHECK(rsc && node && reason && data_set, return NULL); - key = pcmk__op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); - clear = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, - data_set); + key = pcmk__op_key(rsc->id, PCMK_ACTION_CLEAR_FAILCOUNT, 0); + clear = custom_action(rsc, key, PCMK_ACTION_CLEAR_FAILCOUNT, node, FALSE, + TRUE, data_set); add_hash_param(clear->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); crm_notice("Clearing failure of %s on %s because %s " CRM_XS " %s", rsc->id, pe__node_name(node), reason, clear->uuid); return clear; }