diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c index 92a4056446..05e2e14230 100644 --- a/lib/pacemaker/pcmk_graph_producer.c +++ b/lib/pacemaker/pcmk_graph_producer.c @@ -1,1121 +1,1114 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk_is_set((flags), pe_action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->details->uname) /*! * \internal * \brief Add an XML node tag for a specified ID * * \param[in] id Node UUID to add * \param[in,out] xml Parent XML tag to add to */ static xmlNode* add_node_to_xml_by_id(const char *id, xmlNode *xml) { xmlNode *node_xml; node_xml = create_xml_node(xml, XML_CIB_TAG_NODE); crm_xml_add(node_xml, XML_ATTR_UUID, id); return node_xml; } /*! * \internal * \brief Add an XML node tag for a specified node * * \param[in] node Node to add * \param[in,out] xml XML to add node to */ static void add_node_to_xml(const pe_node_t *node, void *xml) { add_node_to_xml_by_id(node->details->id, (xmlNode *) xml); } /*! * \internal * \brief Add XML with nodes that need an update of their maintenance state * * \param[in,out] xml Parent XML tag to add to * \param[in] data_set Working set for cluster */ static int add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set) { GList *gIter = NULL; xmlNode *maintenance = xml?create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE):NULL; int count = 0; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; struct pe_node_shared_s *details = node->details; if (!pe__is_guest_or_remote_node(node)) { continue; /* just remote nodes need to know atm */ } if (details->maintenance != details->remote_maintenance) { if (maintenance) { crm_xml_add( add_node_to_xml_by_id(node->details->id, maintenance), XML_NODE_IS_MAINTENANCE, details->maintenance?"1":"0"); } count++; } } crm_trace("%s %d nodes to adjust maintenance-mode " "to transition", maintenance?"Added":"Counted", count); return count; } /*! * \internal * \brief Add pseudo action with nodes needing maintenance state update * * \param[in,out] data_set Working set for cluster */ static void add_maintenance_update(pe_working_set_t *data_set) { pe_action_t *action = NULL; if (add_maintenance_nodes(NULL, data_set)) { crm_trace("adding maintenance state update pseudo action"); action = get_pseudo_op(CRM_OP_MAINTENANCE_NODES, data_set); pe__set_action_flags(action, pe_action_print_always); } } /*! * \internal * \brief Add XML with nodes that an action is expected to bring down * * If a specified action is expected to bring any nodes down, add an XML block * with their UUIDs. When a node is lost, this allows the controller to * determine whether it was expected. * * \param[in,out] xml Parent XML tag to add to * \param[in] action Action to check for downed nodes * \param[in] data_set Working set for cluster */ static void add_downed_nodes(xmlNode *xml, const pe_action_t *action, const pe_working_set_t *data_set) { CRM_CHECK(xml && action && action->node && data_set, return); if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { /* Shutdown makes the action's node down */ xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->node->details->id, downed); } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { /* Fencing makes the action's node and any hosted guest nodes down */ const char *fence = g_hash_table_lookup(action->meta, "stonith_action"); if (pcmk__is_fencing_action(fence)) { xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->node->details->id, downed); pe_foreach_guest_node(data_set, action->node, add_node_to_xml, downed); } } else if (action->rsc && action->rsc->is_remote_node && pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) { /* Stopping a remote connection resource makes connected node down, * unless it's part of a migration */ GList *iter; pe_action_t *input; gboolean migrating = FALSE; for (iter = action->actions_before; iter != NULL; iter = iter->next) { input = ((pe_action_wrapper_t *) iter->data)->action; if (input->rsc && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_casei) && pcmk__str_eq(input->task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { migrating = TRUE; break; } } if (!migrating) { xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->rsc->id, downed); } } } /*! * \internal * \brief Create a transition graph operation key for a clone action * * \param[in] action Clone action * \param[in] interval_ms Action interval in milliseconds * * \return Newly allocated string with transition graph operation key */ static char * clone_op_key(pe_action_t *action, guint interval_ms) { if (pcmk__str_eq(action->task, RSC_NOTIFY, pcmk__str_none)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); CRM_LOG_ASSERT((n_type != NULL) && (n_task != NULL)); return pcmk__notify_key(action->rsc->clone_name, n_type, n_task); } else if (action->cancel_task != NULL) { return pcmk__op_key(action->rsc->clone_name, action->cancel_task, interval_ms); } else { return pcmk__op_key(action->rsc->clone_name, action->task, interval_ms); } } /*! * \internal * \brief Add node details to transition graph action XML * * \param[in] action Scheduled action * \param[in] xml Transition graph action XML for \p action */ static void add_node_details(pe_action_t *action, xmlNode *xml) { pe_node_t *router_node = pcmk__connection_host_for_action(action); crm_xml_add(xml, XML_LRM_ATTR_TARGET, action->node->details->uname); crm_xml_add(xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id); if (router_node != NULL) { crm_xml_add(xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname); } } /*! * \internal * \brief Add resource details to transition graph action XML * * \param[in] action Scheduled action * \param[in] action_xml Transition graph action XML for \p action */ static void add_resource_details(pe_action_t *action, xmlNode *action_xml) { xmlNode *rsc_xml = NULL; const char *attr_list[] = { XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER, XML_ATTR_TYPE }; /* If a resource is locked to a node via shutdown-lock, mark its actions * so the controller can preserve the lock when the action completes. */ if (pcmk__action_locks_rsc_to_node(action)) { crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, (long long) action->rsc->lock_time); } // List affected resource rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml)); if (pcmk_is_set(action->rsc->flags, pe_rsc_orphan) && (action->rsc->clone_name != NULL)) { /* Use the numbered instance name here, because if there is more * than one instance on a node, we need to make sure the command * goes to the right one. * * This is important even for anonymous clones, because the clone's * unique meta-attribute might have just been toggled from on to * off. */ crm_debug("Using orphan clone name %s instead of %s", action->rsc->id, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } else if (!pcmk_is_set(action->rsc->flags, pe_rsc_unique)) { const char *xml_id = ID(action->rsc->xml); crm_debug("Using anonymous clone name %s for %s (aka %s)", xml_id, action->rsc->id, action->rsc->clone_name); /* ID is what we'd like client to use * ID_LONG is what they might know it as instead * * ID_LONG is only strictly needed /here/ during the * transition period until all nodes in the cluster * are running the new software /and/ have rebooted * once (meaning that they've only ever spoken to a DC * supporting this feature). * * If anyone toggles the unique flag to 'on', the * 'instance free' name will correspond to an orphan * and fall into the clause above instead */ crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); if ((action->rsc->clone_name != NULL) && !pcmk__str_eq(xml_id, action->rsc->clone_name, pcmk__str_none)) { crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); } else { crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } } else { CRM_ASSERT(action->rsc->clone_name == NULL); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); } for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) { crm_xml_add(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); } } /*! * \internal * \brief Add action attributes to transition graph action XML * * \param[in] action Scheduled action * \param[in] action_xml Transition graph action XML for \p action */ static void add_action_attributes(pe_action_t *action, xmlNode *action_xml) { xmlNode *args_xml = NULL; /* We create free-standing XML to start, so we can sort the attributes * before adding it to action_xml, which keeps the scheduler regression * test graphs comparable. */ args_xml = create_xml_node(NULL, XML_TAG_ATTRS); crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if ((action->rsc != NULL) && (action->node != NULL)) { // Get the resource instance attributes, evaluated properly for node GHashTable *params = pe_rsc_params(action->rsc, action->node, action->rsc->cluster); pcmk__substitute_remote_addr(action->rsc, params); g_hash_table_foreach(params, hash2smartfield, args_xml); #if ENABLE_VERSIONED_ATTRS { xmlNode *versioned_parameters = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS); pe_get_versioned_attributes(versioned_parameters, action->rsc, action->node, action->rsc->cluster); if (xml_has_children(versioned_parameters)) { add_node_copy(action_xml, versioned_parameters); } free_xml(versioned_parameters); } #endif } else if ((action->rsc != NULL) && (action->rsc->variant <= pe_native)) { GHashTable *params = pe_rsc_params(action->rsc, NULL, action->rsc->cluster); g_hash_table_foreach(params, hash2smartfield, args_xml); #if ENABLE_VERSIONED_ATTRS if (xml_has_children(action->rsc->versioned_parameters)) { add_node_copy(action_xml, action->rsc->versioned_parameters); } #endif } #if ENABLE_VERSIONED_ATTRS if (rsc_details != NULL) { if (xml_has_children(rsc_details->versioned_parameters)) { add_node_copy(action_xml, rsc_details->versioned_parameters); } if (xml_has_children(rsc_details->versioned_meta)) { add_node_copy(action_xml, rsc_details->versioned_meta); } } #endif g_hash_table_foreach(action->meta, hash2metafield, args_xml); if (action->rsc != NULL) { - const char *value = g_hash_table_lookup(action->rsc->meta, - "external-ip"); pe_resource_t *parent = action->rsc; while (parent != NULL) { parent->cmds->add_graph_meta(parent, args_xml); parent = parent->parent; } - if (value != NULL) { - hash2smartfield((gpointer) "pcmk_external_ip", (gpointer) value, - (gpointer) args_xml); - } - pcmk__add_bundle_meta_to_xml(args_xml, action); } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_none) && (action->node != NULL)) { /* Pass the node's attributes as meta-attributes. * * @TODO: Determine whether it is still necessary to do this. It was * added in 33d99707, probably for the libfence-based implementation in * c9a90bd, which is no longer used. */ g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml); } sorted_xml(args_xml, action_xml, FALSE); free_xml(args_xml); } /*! * \internal * \brief Create the transition graph XML for a scheduled action * * \param[in] parent Parent XML element to add action to * \param[in] action Scheduled action * \param[in] skip_details If false, add action details as sub-elements * \param[in] data_set Cluster working set */ static void create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details, pe_working_set_t *data_set) { bool needs_node_info = true; bool needs_maintenance_info = false; xmlNode *action_xml = NULL; #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t *rsc_details = NULL; #endif if ((action == NULL) || (data_set == NULL)) { return; } // Create the top-level element based on task if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { /* All fences need node info; guest node fences are pseudo-events */ action_xml = create_xml_node(parent, pcmk_is_set(action->flags, pe_action_pseudo)? XML_GRAPH_TAG_PSEUDO_EVENT : XML_GRAPH_TAG_CRM_EVENT); } else if (pcmk__str_any_of(action->task, CRM_OP_SHUTDOWN, CRM_OP_CLEAR_FAILCOUNT, NULL)) { action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) { // CIB-only clean-up for shutdown locks action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB); } else if (pcmk_is_set(action->flags, pe_action_pseudo)) { if (pcmk__str_eq(action->task, CRM_OP_MAINTENANCE_NODES, pcmk__str_none)) { needs_maintenance_info = true; } action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT); needs_node_info = false; } else { action_xml = create_xml_node(parent, XML_GRAPH_TAG_RSC_OP); #if ENABLE_VERSIONED_ATTRS rsc_details = pe_rsc_action_details(action); #endif } crm_xml_add_int(action_xml, XML_ATTR_ID, action->id); crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task); if ((action->rsc != NULL) && (action->rsc->clone_name != NULL)) { char *clone_key = NULL; guint interval_ms; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } clone_key = clone_op_key(action, interval_ms); crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key); crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid); free(clone_key); } else { crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid); } if (needs_node_info && (action->node != NULL)) { add_node_details(action, action_xml); g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET), strdup(action->node->details->uname)); g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET_UUID), strdup(action->node->details->id)); } if (skip_details) { return; } if ((action->rsc != NULL) && !pcmk_is_set(action->flags, pe_action_pseudo)) { // This is a real resource action, so add resource details add_resource_details(action, action_xml); } /* List any attributes in effect */ add_action_attributes(action, action_xml); /* List any nodes this action is expected to make down */ if (needs_node_info && (action->node != NULL)) { add_downed_nodes(action_xml, action, data_set); } if (needs_maintenance_info) { add_maintenance_nodes(action_xml, data_set); } } /*! * \internal * \brief Check whether an action should be added to the transition graph * * \param[in] action Action to check * * \return true if action should be added to graph, otherwise false */ static bool should_add_action_to_graph(pe_action_t *action) { if (!pcmk_is_set(action->flags, pe_action_runnable)) { crm_trace("Ignoring action %s (%d): unrunnable", action->uuid, action->id); return false; } if (pcmk_is_set(action->flags, pe_action_optional) && !pcmk_is_set(action->flags, pe_action_print_always)) { crm_trace("Ignoring action %s (%d): optional", action->uuid, action->id); return false; } /* Actions for unmanaged resources should be excluded from the graph, * with the exception of monitors and cancellation of recurring monitors. */ if ((action->rsc != NULL) && !pcmk_is_set(action->rsc->flags, pe_rsc_managed) && !pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_none)) { const char *interval_ms_s; /* A cancellation of a recurring monitor will get here because the task * is cancel rather than monitor, but the interval can still be used to * recognize it. The interval has been normalized to milliseconds by * this point, so a string comparison is sufficient. */ interval_ms_s = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS); if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) { crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)", action->uuid, action->id, action->rsc->id); return false; } } /* Always add pseudo-actions, fence actions, and shutdown actions (already * determined to be required and runnable by this point) */ if (pcmk_is_set(action->flags, pe_action_pseudo) || pcmk__strcase_any_of(action->task, CRM_OP_FENCE, CRM_OP_SHUTDOWN, NULL)) { return true; } if (action->node == NULL) { pe_err("Skipping action %s (%d) " "because it was not allocated to a node (bug?)", action->uuid, action->id); pcmk__log_action("Unallocated", action, false); return false; } if (pcmk_is_set(action->flags, pe_action_dc)) { crm_trace("Action %s (%d) should be dumped: " "can run on DC instead of %s", action->uuid, action->id, pe__node_name(action->node)); } else if (pe__is_guest_node(action->node) && !action->node->details->remote_requires_reset) { crm_trace("Action %s (%d) should be dumped: " "assuming will be runnable on guest %s", action->uuid, action->id, pe__node_name(action->node)); } else if (!action->node->details->online) { pe_err("Skipping action %s (%d) " "because it was scheduled for offline node (bug?)", action->uuid, action->id); pcmk__log_action("Offline node", action, false); return false; } else if (action->node->details->unclean) { pe_err("Skipping action %s (%d) " "because it was scheduled for unclean node (bug?)", action->uuid, action->id); pcmk__log_action("Unclean node", action, false); return false; } return true; } /*! * \internal * \brief Check whether an ordering's flags can change an action * * \param[in] ordering Ordering to check * * \return true if ordering has flags that can change an action, false otherwise */ static bool ordering_can_change_actions(pe_action_wrapper_t *ordering) { return pcmk_any_flags_set(ordering->type, ~(pe_order_implies_first_printed |pe_order_implies_then_printed |pe_order_optional)); } /*! * \internal * \brief Check whether an action input should be in the transition graph * * \param[in] action Action to check * \param[in,out] input Action input to check * * \return true if input should be in graph, false otherwise * \note This function may not only check an input, but disable it under certian * circumstances (load or anti-colocation orderings that are not needed). */ static bool should_add_input_to_graph(pe_action_t *action, pe_action_wrapper_t *input) { if (input->state == pe_link_dumped) { return true; } if (input->type == pe_order_none) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering disabled", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pe_action_runnable) && !ordering_can_change_actions(input)) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pe_action_runnable) && pcmk_is_set(input->type, pe_order_one_or_more)) { crm_trace("Ignoring %s (%d) input %s (%d): " "one-or-more and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->type, pe_order_implies_first_migratable) && !pcmk_is_set(input->action->flags, pe_action_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "implies input migratable but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->type, pe_order_apply_first_non_migratable) && pcmk_is_set(input->action->flags, pe_action_migrate_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "only if input unmigratable but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if ((input->type == pe_order_optional) && pcmk_is_set(input->action->flags, pe_action_migrate_runnable) && pcmk__ends_with(input->action->uuid, "_stop_0")) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional but stop in migration", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (input->type == pe_order_load) { pe_node_t *input_node = input->action->node; // load orderings are relevant only if actions are for same node if (action->rsc && pcmk__str_eq(action->task, RSC_MIGRATE, pcmk__str_casei)) { pe_node_t *allocated = action->rsc->allocated_to; /* For load_stopped -> migrate_to orderings, we care about where it * has been allocated to, not where it will be executed. */ if ((input_node == NULL) || (allocated == NULL) || (input_node->details != allocated->details)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, (allocated? allocated->details->uname : ""), (input_node? input_node->details->uname : "")); input->type = pe_order_none; return false; } } else if ((input_node == NULL) || (action->node == NULL) || (input_node->details != action->node->details)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, (action->node? action->node->details->uname : ""), (input_node? input_node->details->uname : "")); input->type = pe_order_none; return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering input optional", action->uuid, action->id, input->action->uuid, input->action->id); input->type = pe_order_none; return false; } } else if (input->type == pe_order_anti_colocation) { if (input->action->node && action->node && (input->action->node->details != action->node->details)) { crm_trace("Ignoring %s (%d) input %s (%d): " "anti-colocation node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, pe__node_name(action->node), pe__node_name(input->action->node)); input->type = pe_order_none; return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "anti-colocation input optional", action->uuid, action->id, input->action->uuid, input->action->id); input->type = pe_order_none; return false; } } else if (input->action->rsc && input->action->rsc != action->rsc && pcmk_is_set(input->action->rsc->flags, pe_rsc_failed) && !pcmk_is_set(input->action->rsc->flags, pe_rsc_managed) && pcmk__ends_with(input->action->uuid, "_stop_0") && action->rsc && pe_rsc_is_clone(action->rsc)) { crm_warn("Ignoring requirement that %s complete before %s:" " unmanaged failed resources cannot prevent clone shutdown", input->action->uuid, action->uuid); return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional) && !pcmk_any_flags_set(input->action->flags, pe_action_print_always|pe_action_dumped) && !should_add_action_to_graph(input->action)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input optional", action->uuid, action->id, input->action->uuid, input->action->id); return false; } crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x", action->uuid, action->id, action_type_str(input->action->flags), input->action->uuid, input->action->id, action_node_str(input->action), action_runnable_str(input->action->flags), action_optional_str(input->action->flags), input->type); return true; } /*! * \internal * \brief Check whether an ordering creates an ordering loop * * \param[in] init_action "First" action in ordering * \param[in] action Callers should always set this the same as * \p init_action (this function may use a different * value for recursive calls) * \param[in] input Action wrapper for "then" action in ordering * * \return true if the ordering creates a loop, otherwise false */ bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action, pe_action_wrapper_t *input) { bool has_loop = false; if (pcmk_is_set(input->action->flags, pe_action_tracking)) { crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->details->uname : "", action->uuid, action->node? action->node->details->uname : "", input->type); return false; } // Don't need to check inputs that won't be used if (!should_add_input_to_graph(action, input)) { return false; } if (input->action == init_action) { crm_debug("Input loop found in %s@%s ->...-> %s@%s", action->uuid, action->node? action->node->details->uname : "", init_action->uuid, init_action->node? init_action->node->details->uname : ""); return true; } pe__set_action_flags(input->action, pe_action_tracking); crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)" "for graph loop with %s@%s ", action->uuid, action->node? action->node->details->uname : "", input->action->uuid, input->action->node? input->action->node->details->uname : "", input->type, init_action->uuid, init_action->node? init_action->node->details->uname : ""); // Recursively check input itself for loops for (GList *iter = input->action->actions_before; iter != NULL; iter = iter->next) { if (pcmk__graph_has_loop(init_action, input->action, (pe_action_wrapper_t *) iter->data)) { // Recursive call already logged a debug message has_loop = true; break; } } pe__clear_action_flags(input->action, pe_action_tracking); if (!has_loop) { crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->details->uname : "", action->uuid, action->node? action->node->details->uname : "", input->type); } return has_loop; } /*! * \internal * \brief Create a synapse XML element for a transition graph * * \param[in] action Action that synapse is for * \param[in] data_set Cluster working set containing graph * * \return Newly added XML element for new graph synapse */ static xmlNode * create_graph_synapse(pe_action_t *action, pe_working_set_t *data_set) { int synapse_priority = 0; xmlNode *syn = create_xml_node(data_set->graph, "synapse"); crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse); data_set->num_synapse++; if (action->rsc != NULL) { synapse_priority = action->rsc->priority; } if (action->priority > synapse_priority) { synapse_priority = action->priority; } if (synapse_priority > 0) { crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); } return syn; } /*! * \internal * \brief Add an action to the transition graph XML if appropriate * * \param[in] data Action to possibly add * \param[in] user_data Cluster working set * * \note This will de-duplicate the action inputs, meaning that the * pe_action_wrapper_t:type flags can no longer be relied on to retain * their original settings. That means this MUST be called after * pcmk__apply_orderings() is complete, and nothing after this should rely * on those type flags. (For example, some code looks for type equal to * some flag rather than whether the flag is set, and some code looks for * particular combinations of flags -- such code must be done before * pcmk__create_graph().) */ static void add_action_to_graph(gpointer data, gpointer user_data) { pe_action_t *action = (pe_action_t *) data; pe_working_set_t *data_set = (pe_working_set_t *) user_data; xmlNode *syn = NULL; xmlNode *set = NULL; xmlNode *in = NULL; /* If we haven't already, de-duplicate inputs (even if we won't be adding * the action to the graph, so that crm_simulate's dot graphs don't have * duplicates). */ if (!pcmk_is_set(action->flags, pe_action_dedup)) { pcmk__deduplicate_action_inputs(action); pe__set_action_flags(action, pe_action_dedup); } if (pcmk_is_set(action->flags, pe_action_dumped) // Already added, or || !should_add_action_to_graph(action)) { // shouldn't be added return; } pe__set_action_flags(action, pe_action_dumped); crm_trace("Adding action %d (%s%s%s) to graph", action->id, action->uuid, ((action->node == NULL)? "" : " on "), ((action->node == NULL)? "" : action->node->details->uname)); syn = create_graph_synapse(action, data_set); set = create_xml_node(syn, "action_set"); in = create_xml_node(syn, "inputs"); create_graph_action(set, action, false, data_set); for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { pe_action_wrapper_t *input = (pe_action_wrapper_t *) lpc->data; if (should_add_input_to_graph(action, input)) { xmlNode *input_xml = create_xml_node(in, "trigger"); input->state = pe_link_dumped; create_graph_action(input_xml, input->action, true, data_set); } } } static int transition_id = -1; /*! * \internal * \brief Log a message after calculating a transition * * \param[in] filename Where transition input is stored */ void pcmk__log_transition_summary(const char *filename) { if (was_processing_error) { crm_err("Calculated transition %d (with errors)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else if (was_processing_warning) { crm_warn("Calculated transition %d (with warnings)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else { crm_notice("Calculated transition %d%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } if (crm_config_error) { crm_notice("Configuration errors found during scheduler processing," " please run \"crm_verify -L\" to identify issues"); } } /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in] rsc Resource whose actions should be added */ void pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc) { GList *iter = NULL; CRM_ASSERT(rsc != NULL); pe_rsc_trace(rsc, "Adding actions for %s to graph", rsc->id); // First add the resource's own actions g_list_foreach(rsc->actions, add_action_to_graph, rsc->cluster); // Then recursively add its children's actions (appropriate to variant) for (iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *child_rsc = (pe_resource_t *) iter->data; child_rsc->cmds->add_actions_to_graph(child_rsc); } } /*! * \internal * \brief Create a transition graph with all cluster actions needed * * \param[in] data_set Cluster working set */ void pcmk__create_graph(pe_working_set_t *data_set) { GList *iter = NULL; const char *value = NULL; long long limit = 0LL; transition_id++; crm_trace("Creating transition graph %d", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); value = pe_pref(data_set->config_hash, "stonith-timeout"); crm_xml_add(data_set->graph, "stonith-timeout", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); value = pe_pref(data_set->config_hash, "migration-limit"); if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) { crm_xml_add(data_set->graph, "migration-limit", value); } if (data_set->recheck_by > 0) { char *recheck_epoch = NULL; recheck_epoch = crm_strdup_printf("%llu", (long long) data_set->recheck_by); crm_xml_add(data_set->graph, "recheck-by", recheck_epoch); free(recheck_epoch); } /* The following code will de-duplicate action inputs, so nothing past this * should rely on the action input type flags retaining their original * values. */ // Add resource actions to graph for (iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; pe_rsc_trace(rsc, "Processing actions for %s", rsc->id); rsc->cmds->add_actions_to_graph(rsc); } // Add pseudo-action for list of nodes with maintenance state update add_maintenance_update(data_set); // Add non-resource (node) actions for (iter = data_set->actions; iter != NULL; iter = iter->next) { pe_action_t *action = (pe_action_t *) iter->data; if ((action->rsc != NULL) && (action->node != NULL) && action->node->details->shutdown && !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance) && !pcmk_any_flags_set(action->flags, pe_action_optional|pe_action_runnable) && pcmk__str_eq(action->task, RSC_STOP, pcmk__str_none)) { /* Eventually we should just ignore the 'fence' case, but for now * it's the best way to detect (in CTS) when CIB resource updates * are being lost. */ if (pcmk_is_set(data_set->flags, pe_flag_have_quorum) || (data_set->no_quorum_policy == no_quorum_ignore)) { crm_crit("Cannot %s %s because of %s:%s%s (%s)", action->node->details->unclean? "fence" : "shut down", pe__node_name(action->node), action->rsc->id, pcmk_is_set(action->rsc->flags, pe_rsc_managed)? " blocked" : " unmanaged", pcmk_is_set(action->rsc->flags, pe_rsc_failed)? " failed" : "", action->uuid); } } add_action_to_graph((gpointer) action, (gpointer) data_set); } crm_log_xml_trace(data_set->graph, "graph"); } diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c index 05bfc76c62..72b1aadc49 100644 --- a/lib/pacemaker/pcmk_sched_primitive.c +++ b/lib/pacemaker/pcmk_sched_primitive.c @@ -1,1534 +1,1543 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include "libpacemaker_private.h" static void stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional); static void start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional); static void demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional); static void promote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional); static void assert_role_error(pe_resource_t *rsc, pe_node_t *node, bool optional); static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* This array lists the immediate next role when transitioning from one role * to a target role. For example, when going from Stopped to Promoted, the * next role is Unpromoted, because the resource must be started before it * can be promoted. The current state then becomes Started, which is fed * into this array again, giving a next role of Promoted. * * Current role Immediate next role Final target role * ------------ ------------------- ----------------- */ /* Unknown */ { RSC_ROLE_UNKNOWN, /* Unknown */ RSC_ROLE_STOPPED, /* Stopped */ RSC_ROLE_STOPPED, /* Started */ RSC_ROLE_STOPPED, /* Unpromoted */ RSC_ROLE_STOPPED, /* Promoted */ }, /* Stopped */ { RSC_ROLE_STOPPED, /* Unknown */ RSC_ROLE_STOPPED, /* Stopped */ RSC_ROLE_STARTED, /* Started */ RSC_ROLE_UNPROMOTED, /* Unpromoted */ RSC_ROLE_UNPROMOTED, /* Promoted */ }, /* Started */ { RSC_ROLE_STOPPED, /* Unknown */ RSC_ROLE_STOPPED, /* Stopped */ RSC_ROLE_STARTED, /* Started */ RSC_ROLE_UNPROMOTED, /* Unpromoted */ RSC_ROLE_PROMOTED, /* Promoted */ }, /* Unpromoted */ { RSC_ROLE_STOPPED, /* Unknown */ RSC_ROLE_STOPPED, /* Stopped */ RSC_ROLE_STOPPED, /* Started */ RSC_ROLE_UNPROMOTED, /* Unpromoted */ RSC_ROLE_PROMOTED, /* Promoted */ }, /* Promoted */ { RSC_ROLE_STOPPED, /* Unknown */ RSC_ROLE_UNPROMOTED, /* Stopped */ RSC_ROLE_UNPROMOTED, /* Started */ RSC_ROLE_UNPROMOTED, /* Unpromoted */ RSC_ROLE_PROMOTED, /* Promoted */ }, }; /*! * \internal * \brief Function to schedule actions needed for a role change * * \param[in,out] rsc Resource whose role is changing * \param[in] node Node where resource will be in its next role * \param[in] optional Whether scheduled actions should be optional */ typedef void (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *node, bool optional); static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* This array lists the function needed to transition directly from one role * to another. NULL indicates that nothing is needed. * * Current role Transition function Next role * ------------ ------------------- ---------- */ /* Unknown */ { assert_role_error, /* Unknown */ stop_resource, /* Stopped */ assert_role_error, /* Started */ assert_role_error, /* Unpromoted */ assert_role_error, /* Promoted */ }, /* Stopped */ { assert_role_error, /* Unknown */ NULL, /* Stopped */ start_resource, /* Started */ start_resource, /* Unpromoted */ assert_role_error, /* Promoted */ }, /* Started */ { assert_role_error, /* Unknown */ stop_resource, /* Stopped */ NULL, /* Started */ NULL, /* Unpromoted */ promote_resource, /* Promoted */ }, /* Unpromoted */ { assert_role_error, /* Unknown */ stop_resource, /* Stopped */ stop_resource, /* Started */ NULL, /* Unpromoted */ promote_resource, /* Promoted */ }, /* Promoted */ { assert_role_error, /* Unknown */ demote_resource, /* Stopped */ demote_resource, /* Started */ demote_resource, /* Unpromoted */ NULL, /* Promoted */ }, }; /*! * \internal * \brief Get a list of a resource's allowed nodes sorted by node weight * * \param[in] rsc Resource to check * * \return List of allowed nodes sorted by node weight */ static GList * sorted_allowed_nodes(const pe_resource_t *rsc) { if (rsc->allowed_nodes != NULL) { GList *nodes = g_hash_table_get_values(rsc->allowed_nodes); if (nodes != NULL) { return pcmk__sort_nodes(nodes, pe__current_node(rsc)); } } return NULL; } /*! * \internal * \brief Assign a resource to its best allowed node, if possible * * \param[in] rsc Resource to choose a node for * \param[in] prefer If not NULL, prefer this node when all else equal * * \return true if \p rsc could be assigned to a node, otherwise false */ static bool assign_best_node(pe_resource_t *rsc, pe_node_t *prefer) { GList *nodes = NULL; pe_node_t *chosen = NULL; pe_node_t *best = NULL; bool result = false; pcmk__ban_insufficient_capacity(rsc, &prefer); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { // We've already finished assignment of resources to nodes return rsc->allocated_to != NULL; } // Sort allowed nodes by weight nodes = sorted_allowed_nodes(rsc); if (nodes != NULL) { best = (pe_node_t *) nodes->data; // First node has best score } if ((prefer != NULL) && (nodes != NULL)) { // Get the allowed node version of prefer chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (chosen == NULL) { pe_rsc_trace(rsc, "Preferred node %s for %s was unknown", pe__node_name(prefer), rsc->id); /* Favor the preferred node as long as its weight is at least as good as * the best allowed node's. * * An alternative would be to favor the preferred node even if the best * node is better, when the best node's weight is less than INFINITY. */ } else if (chosen->weight < best->weight) { pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable", pe__node_name(chosen), rsc->id); chosen = NULL; } else if (!pcmk__node_available(chosen, true, false)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable", pe__node_name(chosen), rsc->id); chosen = NULL; } else { pe_rsc_trace(rsc, "Chose preferred node %s for %s (ignoring %d candidates)", pe__node_name(chosen), rsc->id, g_list_length(nodes)); } } if ((chosen == NULL) && (best != NULL)) { /* Either there is no preferred node, or the preferred node is not * suitable, but another node is allowed to run the resource. */ chosen = best; if (!pe_rsc_is_unique_clone(rsc->parent) && (chosen->weight > 0) // Zero not acceptable && pcmk__node_available(chosen, false, false)) { /* If the resource is already running on a node, prefer that node if * it is just as good as the chosen node. * * We don't do this for unique clone instances, because * distribute_children() has already assigned instances to their * running nodes when appropriate, and if we get here, we don't want * remaining unassigned instances to prefer a node that's already * running another instance. */ pe_node_t *running = pe__current_node(rsc); if (running == NULL) { // Nothing to do } else if (!pcmk__node_available(running, true, false)) { pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", rsc->id, pe__node_name(running)); } else { int nodes_with_best_score = 1; for (GList *iter = nodes->next; iter; iter = iter->next) { pe_node_t *allowed = (pe_node_t *) iter->data; if (allowed->weight != chosen->weight) { // The nodes are sorted by weight, so no more are equal break; } if (allowed->details == running->details) { // Scores are equal, so prefer the current node chosen = allowed; } nodes_with_best_score++; } if (nodes_with_best_score > 1) { do_crm_log(((chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO), "Chose %s for %s from %d nodes with score %s", pe__node_name(chosen), rsc->id, nodes_with_best_score, pcmk_readable_score(chosen->weight)); } } } pe_rsc_trace(rsc, "Chose %s for %s from %d candidates", pe__node_name(chosen), rsc->id, g_list_length(nodes)); } result = pcmk__finalize_assignment(rsc, chosen, false); g_list_free(nodes); return result; } /*! * \internal * \brief Apply a "this with" colocation to a node's allowed node scores * * \param[in] data Colocation to apply * \param[in] user_data Resource being assigned */ static void apply_this_with(void *data, void *user_data) { pcmk__colocation_t *colocation = (pcmk__colocation_t *) data; pe_resource_t *rsc = (pe_resource_t *) user_data; GHashTable *archive = NULL; pe_resource_t *other = colocation->primary; // In certain cases, we will need to revert the node scores if ((colocation->dependent_role >= RSC_ROLE_PROMOTED) || ((colocation->score < 0) && (colocation->score > -INFINITY))) { archive = pcmk__copy_node_table(rsc->allowed_nodes); } pe_rsc_trace(rsc, "%s: Assigning colocation %s primary %s first" "(score=%d role=%s)", rsc->id, colocation->id, other->id, colocation->score, role2text(colocation->dependent_role)); other->cmds->assign(other, NULL); // Apply the colocation score to this resource's allowed node scores rsc->cmds->apply_coloc_score(rsc, other, colocation, true); if ((archive != NULL) && !pcmk__any_node_available(rsc->allowed_nodes)) { pe_rsc_info(rsc, "%s: Reverting scores from colocation with %s " "because no nodes allowed", rsc->id, other->id); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = archive; archive = NULL; } if (archive != NULL) { g_hash_table_destroy(archive); } } /*! * \internal * \brief Apply a "with this" colocation to a node's allowed node scores * * \param[in] data Colocation to apply * \param[in] user_data Resource being assigned */ static void apply_with_this(void *data, void *user_data) { pcmk__colocation_t *colocation = (pcmk__colocation_t *) data; pe_resource_t *rsc = (pe_resource_t *) user_data; pe_resource_t *other = colocation->dependent; const float factor = colocation->score / (float) INFINITY; if (!pcmk__colocation_has_influence(colocation, NULL)) { return; } pe_rsc_trace(rsc, "%s: Incorporating attenuated %s assignment scores due " "to colocation %s", rsc->id, other->id, colocation->id); other->cmds->add_colocated_node_scores(other, rsc->id, &rsc->allowed_nodes, colocation->node_attribute, factor, pcmk__coloc_select_active); } /*! * \internal * \brief Update a Pacemaker Remote node once its connection has been assigned * * \param[in] connection Connection resource that has been assigned */ static void remote_connection_assigned(pe_resource_t *connection) { pe_node_t *remote_node = pe_find_node(connection->cluster->nodes, connection->id); CRM_CHECK(remote_node != NULL, return); if ((connection->allocated_to != NULL) && (connection->next_role != RSC_ROLE_STOPPED)) { crm_trace("Pacemaker Remote node %s will be online", remote_node->details->id); remote_node->details->online = TRUE; if (remote_node->details->unseen) { // Avoid unnecessary fence, since we will attempt connection remote_node->details->unclean = FALSE; } } else { crm_trace("Pacemaker Remote node %s will be shut down " "(%sassigned connection's next role is %s)", remote_node->details->id, ((connection->allocated_to == NULL)? "un" : ""), role2text(connection->next_role)); remote_node->details->shutdown = TRUE; } } /*! * \internal * \brief Assign a primitive resource to a node * * \param[in] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * * \return Node that \p rsc is assigned to, if assigned entirely to one node */ pe_node_t * pcmk__primitive_assign(pe_resource_t *rsc, pe_node_t *prefer) { CRM_ASSERT(rsc != NULL); // Never assign a child without parent being assigned first if ((rsc->parent != NULL) && !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "%s: Assigning parent %s first", rsc->id, rsc->parent->id); rsc->parent->cmds->assign(rsc->parent, prefer); } if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; // Assignment has already been done } // Ensure we detect assignment loops if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id); return NULL; } pe__set_resource_flags(rsc, pe_rsc_allocating); pe__show_node_weights(true, rsc, "Pre-assignment", rsc->allowed_nodes, rsc->cluster); g_list_foreach(rsc->rsc_cons, apply_this_with, rsc); pe__show_node_weights(true, rsc, "Post-this-with", rsc->allowed_nodes, rsc->cluster); g_list_foreach(rsc->rsc_cons_lhs, apply_with_this, rsc); if (rsc->next_role == RSC_ROLE_STOPPED) { pe_rsc_trace(rsc, "Banning %s from all nodes because it will be stopped", rsc->id); resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, rsc->cluster); } else if ((rsc->next_role > rsc->role) && !pcmk_is_set(rsc->cluster->flags, pe_flag_have_quorum) && (rsc->cluster->no_quorum_policy == no_quorum_freeze)) { crm_notice("Resource %s cannot be elevated from %s to %s due to " "no-quorum-policy=freeze", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze"); } pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, rsc->cluster); // Unmanage resource if fencing is enabled but no device is configured if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled) && !pcmk_is_set(rsc->cluster->flags, pe_flag_have_stonith_resource)) { pe__clear_resource_flags(rsc, pe_rsc_managed); } if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { // Unmanaged resources stay on their current node const char *reason = NULL; pe_node_t *assign_to = NULL; pe__set_next_role(rsc, rsc->role, "unmanaged"); assign_to = pe__current_node(rsc); if (assign_to == NULL) { reason = "inactive"; } else if (rsc->role == RSC_ROLE_PROMOTED) { reason = "promoted"; } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { reason = "failed"; } else { reason = "active"; } pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id, (assign_to? assign_to->details->uname : "no node"), reason); pcmk__finalize_assignment(rsc, assign_to, true); } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stop_everything)) { pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources", rsc->id); pcmk__finalize_assignment(rsc, NULL, true); } else if (pcmk_is_set(rsc->flags, pe_rsc_provisional) && assign_best_node(rsc, prefer)) { // Assignment successful } else if (rsc->allocated_to == NULL) { if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); } else if (rsc->running_on != NULL) { pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id); } } else { pe_rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id, pe__node_name(rsc->allocated_to)); } pe__clear_resource_flags(rsc, pe_rsc_allocating); if (rsc->is_remote_node) { remote_connection_assigned(rsc); } return rsc->allocated_to; } /*! * \internal * \brief Schedule actions to bring resource down and back to current role * * \param[in,out] rsc Resource to restart * \param[in] current Node that resource should be brought down on * \param[in] need_stop Whether the resource must be stopped * \param[in] need_promote Whether the resource must be promoted * * \return Role that resource would have after scheduled actions are taken */ static void schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current, bool need_stop, bool need_promote) { enum rsc_role_e role = rsc->role; enum rsc_role_e next_role; rsc_transition_fn fn = NULL; pe__set_resource_flags(rsc, pe_rsc_restarting); // Bring resource down to a stop on its current node while (role != RSC_ROLE_STOPPED) { next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED]; pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s", (need_stop? "required" : "optional"), rsc->id, role2text(role), role2text(next_role)); fn = rsc_action_matrix[role][next_role]; if (fn == NULL) { break; } fn(rsc, current, !need_stop); role = next_role; } // Bring resource up to its next role on its next node while ((rsc->role <= rsc->next_role) && (role != rsc->role) && !pcmk_is_set(rsc->flags, pe_rsc_block)) { bool required = need_stop; next_role = rsc_state_matrix[role][rsc->role]; if ((next_role == RSC_ROLE_PROMOTED) && need_promote) { required = true; } pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s", (required? "required" : "optional"), rsc->id, role2text(role), role2text(next_role)); fn = rsc_action_matrix[role][next_role]; if (fn == NULL) { break; } fn(rsc, rsc->allocated_to, !required); role = next_role; } pe__clear_resource_flags(rsc, pe_rsc_restarting); } /*! * \internal * \brief If a resource's next role is not explicitly specified, set a default * * \param[in,out] rsc Resource to set next role for * * \return "explicit" if next role was explicitly set, otherwise "implicit" */ static const char * set_default_next_role(pe_resource_t *rsc) { if (rsc->next_role != RSC_ROLE_UNKNOWN) { return "explicit"; } if (rsc->allocated_to == NULL) { pe__set_next_role(rsc, RSC_ROLE_STOPPED, "assignment"); } else { pe__set_next_role(rsc, RSC_ROLE_STARTED, "assignment"); } return "implicit"; } /*! * \internal * \brief Create an action to represent an already pending start * * \param[in,out] rsc Resource to create start action for */ static void create_pending_start(pe_resource_t *rsc) { pe_action_t *start = NULL; pe_rsc_trace(rsc, "Creating action for %s to represent already pending start", rsc->id); start = start_action(rsc, rsc->allocated_to, TRUE); pe__set_action_flags(start, pe_action_print_always); } /*! * \internal * \brief Schedule actions needed to take a resource to its next role * * \param[in,out] rsc Resource to schedule actions for */ static void schedule_role_transition_actions(pe_resource_t *rsc) { enum rsc_role_e role = rsc->role; while (role != rsc->next_role) { enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role]; rsc_transition_fn fn = NULL; pe_rsc_trace(rsc, "Creating action to take %s from %s to %s (ending at %s)", rsc->id, role2text(role), role2text(next_role), role2text(rsc->next_role)); fn = rsc_action_matrix[role][next_role]; if (fn == NULL) { break; } fn(rsc, rsc->allocated_to, false); role = next_role; } } /*! * \internal * \brief Create all actions needed for a given primitive resource * * \param[in,out] rsc Primitive resource to create actions for */ void pcmk__primitive_create_actions(pe_resource_t *rsc) { bool need_stop = false; bool need_promote = false; bool is_moving = false; bool allow_migrate = false; bool multiply_active = false; pe_node_t *current = NULL; unsigned int num_all_active = 0; unsigned int num_clean_active = 0; const char *next_role_source = NULL; CRM_ASSERT(rsc != NULL); next_role_source = set_default_next_role(rsc); pe_rsc_trace(rsc, "Creating all actions for %s transition from %s to %s " "(%s) on %s", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next_role_source, pe__node_name(rsc->allocated_to)); current = pe__find_active_on(rsc, &num_all_active, &num_clean_active); g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration, rsc); if ((current != NULL) && (rsc->allocated_to != NULL) && (current->details != rsc->allocated_to->details) && (rsc->next_role >= RSC_ROLE_STARTED)) { pe_rsc_trace(rsc, "Moving %s from %s to %s", rsc->id, pe__node_name(current), pe__node_name(rsc->allocated_to)); is_moving = true; allow_migrate = pcmk__rsc_can_migrate(rsc, current); // This is needed even if migrating (though I'm not sure why ...) need_stop = true; } // Check whether resource is partially migrated and/or multiply active if ((rsc->partial_migration_source != NULL) && (rsc->partial_migration_target != NULL) && allow_migrate && (num_all_active == 2) && (current->details == rsc->partial_migration_source->details) && (rsc->allocated_to->details == rsc->partial_migration_target->details)) { /* A partial migration is in progress, and the migration target remains * the same as when the migration began. */ pe_rsc_trace(rsc, "Partial migration of %s from %s to %s will continue", rsc->id, pe__node_name(rsc->partial_migration_source), pe__node_name(rsc->partial_migration_target)); } else if ((rsc->partial_migration_source != NULL) || (rsc->partial_migration_target != NULL)) { // A partial migration is in progress but can't be continued if (num_all_active > 2) { // The resource is migrating *and* multiply active! crm_notice("Forcing recovery of %s because it is migrating " "from %s to %s and possibly active elsewhere", rsc->id, pe__node_name(rsc->partial_migration_source), pe__node_name(rsc->partial_migration_target)); } else { // The migration source or target isn't available crm_notice("Forcing recovery of %s because it can no longer " "migrate from %s to %s", rsc->id, pe__node_name(rsc->partial_migration_source), pe__node_name(rsc->partial_migration_target)); } need_stop = true; rsc->partial_migration_source = rsc->partial_migration_target = NULL; allow_migrate = false; } else if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) { multiply_active = (num_all_active > 1); } else { /* If a resource has "requires" set to nothing or quorum, don't consider * it active on unclean nodes (similar to how all resources behave when * stonith-enabled is false). We can start such resources elsewhere * before fencing completes, and if we considered the resource active on * the failed node, we would attempt recovery for being active on * multiple nodes. */ multiply_active = (num_clean_active > 1); } if (multiply_active) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); // Resource was (possibly) incorrectly multiply active pe_proc_err("%s resource %s might be active on %u nodes (%s)", pcmk__s(class, "Untyped"), rsc->id, num_all_active, recovery2text(rsc->recovery_type)); crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ" "#Resource_is_Too_Active for more information"); switch (rsc->recovery_type) { case recovery_stop_start: need_stop = true; break; case recovery_stop_unexpected: need_stop = true; // stop_resource() will skip expected node pe__set_resource_flags(rsc, pe_rsc_stop_unexpected); break; default: break; } } else { pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected); } if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) { create_pending_start(rsc); } if (is_moving) { // Remaining tests are only for resources staying where they are } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { if (pcmk_is_set(rsc->flags, pe_rsc_stop)) { need_stop = true; pe_rsc_trace(rsc, "Recovering %s", rsc->id); } else { pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id); if (rsc->next_role == RSC_ROLE_PROMOTED) { need_promote = true; } } } else if (pcmk_is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id); need_stop = true; } else if ((rsc->role > RSC_ROLE_STARTED) && (current != NULL) && (rsc->allocated_to != NULL)) { pe_action_t *start = NULL; pe_rsc_trace(rsc, "Creating start action for promoted resource %s", rsc->id); start = start_action(rsc, rsc->allocated_to, TRUE); if (!pcmk_is_set(start->flags, pe_action_optional)) { // Recovery of a promoted resource pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id); need_stop = true; } } // Create any actions needed to bring resource down and back up to same role schedule_restart_actions(rsc, current, need_stop, need_promote); // Create any actions needed to take resource from this role to the next schedule_role_transition_actions(rsc); pcmk__create_recurring_actions(rsc); if (allow_migrate) { pcmk__create_migration_actions(rsc, current); } } /*! * \internal * \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes * * \param[in] rsc Resource to check */ static void rsc_avoids_remote_nodes(const pe_resource_t *rsc) { GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if (node->details->remote_rsc != NULL) { node->weight = -INFINITY; } } } /*! * \internal * \brief Return allowed nodes as (possibly sorted) list * * Convert a resource's hash table of allowed nodes to a list. If printing to * stdout, sort the list, to keep action ID numbers consistent for regression * test output (while avoiding the performance hit on a live cluster). * * \param[in] rsc Resource to check for allowed nodes * \param[in] data_set Cluster working set * * \return List of resource's allowed nodes * \note Callers should take care not to rely on the list being sorted. */ static GList * allowed_nodes_as_list(pe_resource_t *rsc, pe_working_set_t *data_set) { GList *allowed_nodes = NULL; if (rsc->allowed_nodes) { allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes); } if (!pcmk__is_daemon) { allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name); } return allowed_nodes; } /*! * \internal * \brief Create implicit constraints needed for a primitive resource * * \param[in,out] rsc Primitive resource to create implicit constraints for */ void pcmk__primitive_internal_constraints(pe_resource_t *rsc) { pe_resource_t *top = NULL; GList *allowed_nodes = NULL; bool check_unfencing = false; bool check_utilization = false; CRM_ASSERT(rsc != NULL); if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping implicit constraints for unmanaged resource %s", rsc->id); return; } top = uber_parent(rsc); // Whether resource requires unfencing check_unfencing = !pcmk_is_set(rsc->flags, pe_rsc_fence_device) && pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing) && pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing); // Whether a non-default placement strategy is used check_utilization = (g_hash_table_size(rsc->utilization) > 0) && !pcmk__str_eq(rsc->cluster->placement_strategy, "default", pcmk__str_casei); // Order stops before starts (i.e. restart) pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional|pe_order_implies_then|pe_order_restart, rsc->cluster); // Promotable ordering: demote before stop, start before promote if (pcmk_is_set(top->flags, pe_rsc_promotable) || (rsc->role > RSC_ROLE_UNPROMOTED)) { pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_DEMOTE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_promoted_implies_first, rsc->cluster); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_PROMOTE, 0), NULL, pe_order_runnable_left, rsc->cluster); } // Don't clear resource history if probing on same node pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, pe_order_same_node|pe_order_then_cancels_first, rsc->cluster); // Certain checks need allowed nodes if (check_unfencing || check_utilization || (rsc->container != NULL)) { allowed_nodes = allowed_nodes_as_list(rsc, rsc->cluster); } if (check_unfencing) { // Check whether the node needs to be unfenced for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *node = item->data; pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, rsc->cluster); crm_debug("Ordering any stops of %s before %s, and any starts after", rsc->id, unfence->uuid); /* * It would be more efficient to order clone resources once, * rather than order each instance, but ordering the instance * allows us to avoid unnecessary dependencies that might conflict * with user constraints. * * @TODO: This constraint can still produce a transition loop if the * resource has a stop scheduled on the node being unfenced, and * there is a user ordering constraint to start some other resource * (which will be ordered after the unfence) before stopping this * resource. An example is "start some slow-starting cloned service * before stopping an associated virtual IP that may be moving to * it": * stop this -> unfencing -> start that -> stop this */ pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL, strdup(unfence->uuid), unfence, pe_order_optional|pe_order_same_node, rsc->cluster); pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence, rsc, start_key(rsc), NULL, pe_order_implies_then_on_node|pe_order_same_node, rsc->cluster); } } if (check_utilization) { pcmk__create_utilization_constraints(rsc, allowed_nodes); } if (rsc->container != NULL) { pe_resource_t *remote_rsc = NULL; if (rsc->is_remote_node) { // rsc is the implicit remote connection for a guest or bundle node /* Guest resources are not allowed to run on Pacemaker Remote nodes, * to avoid nesting remotes. However, bundles are allowed. */ if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { rsc_avoids_remote_nodes(rsc->container); } /* If someone cleans up a guest or bundle node's container, we will * likely schedule a (re-)probe of the container and recovery of the * connection. Order the connection stop after the container probe, * so that if we detect the container running, we will trigger a new * transition and avoid the unnecessary recovery. */ pcmk__order_resource_actions(rsc->container, RSC_STATUS, rsc, RSC_STOP, pe_order_optional); /* A user can specify that a resource must start on a Pacemaker Remote * node by explicitly configuring it with the container=NODENAME * meta-attribute. This is of questionable merit, since location * constraints can accomplish the same thing. But we support it, so here * we check whether a resource (that is not itself a remote connection) * has container set to a remote node or guest node resource. */ } else if (rsc->container->is_remote_node) { remote_rsc = rsc->container; } else { remote_rsc = pe__resource_contains_guest_node(rsc->cluster, rsc->container); } if (remote_rsc != NULL) { /* Force the resource on the Pacemaker Remote node instead of * colocating the resource with the container resource. */ for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *node = item->data; if (node->details->remote_rsc != remote_rsc) { node->weight = -INFINITY; } } } else { /* This resource is either a filler for a container that does NOT * represent a Pacemaker Remote node, or a Pacemaker Remote * connection resource for a guest node or bundle. */ int score; crm_trace("Order and colocate %s relative to its container %s", rsc->id, rsc->container->id); pcmk__new_ordering(rsc->container, pcmk__op_key(rsc->container->id, RSC_START, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_implies_then|pe_order_runnable_left, rsc->cluster); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, rsc->container, pcmk__op_key(rsc->container->id, RSC_STOP, 0), NULL, pe_order_implies_first, rsc->cluster); if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { score = 10000; /* Highly preferred but not essential */ } else { score = INFINITY; /* Force them to run on the same host */ } pcmk__new_colocation("resource-with-container", NULL, score, rsc, rsc->container, NULL, NULL, true, rsc->cluster); } } if (rsc->is_remote_node || pcmk_is_set(rsc->flags, pe_rsc_fence_device)) { /* Remote connections and fencing devices are not allowed to run on * Pacemaker Remote nodes */ rsc_avoids_remote_nodes(rsc); } g_list_free(allowed_nodes); } /*! * \internal * \brief Apply a colocation's score to node weights or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node weights (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void pcmk__primitive_apply_coloc_score(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *colocation, bool for_dependent) { enum pcmk__coloc_affects filter_results; CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL), return); if (for_dependent) { // Always process on behalf of primary resource primary->cmds->apply_coloc_score(dependent, primary, colocation, false); return; } filter_results = pcmk__colocation_affects(dependent, primary, colocation, false); pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)", ((colocation->score > 0)? "Colocating" : "Anti-colocating"), dependent->id, primary->id, colocation->id, colocation->score, filter_results); switch (filter_results) { case pcmk__coloc_affects_role: pcmk__apply_coloc_to_priority(dependent, primary, colocation); break; case pcmk__coloc_affects_location: pcmk__apply_coloc_to_weights(dependent, primary, colocation); break; default: // pcmk__coloc_affects_nothing return; } } /*! * \internal * \brief Return action flags for a given primitive resource action * * \param[in,out] action Action to get flags for * \param[in] node If not NULL, limit effects to this node (ignored) * * \return Flags appropriate to \p action on \p node */ enum pe_action_flags pcmk__primitive_action_flags(pe_action_t *action, const pe_node_t *node) { CRM_ASSERT(action != NULL); return action->flags; } /*! * \internal * \brief Check whether a node is a multiply active resource's expected node * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return true if \p rsc is multiply active with multiple-active set to * stop_unexpected, and \p node is the node where it will remain active * \note This assumes that the resource's next role cannot be changed to stopped * after this is called, which should be reasonable if status has already * been unpacked and resources have been assigned to nodes. */ static bool is_expected_node(const pe_resource_t *rsc, const pe_node_t *node) { return pcmk_all_flags_set(rsc->flags, pe_rsc_stop_unexpected|pe_rsc_restarting) && (rsc->next_role > RSC_ROLE_STOPPED) && (rsc->allocated_to != NULL) && (node != NULL) && (rsc->allocated_to->details == node->details); } /*! * \internal * \brief Schedule actions needed to stop a resource wherever it is active * * \param[in,out] rsc Resource being stopped * \param[in] node Node where resource is being stopped (ignored) * \param[in] optional Whether actions should be optional */ static void stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional) { for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) { pe_node_t *current = (pe_node_t *) iter->data; pe_action_t *stop = NULL; if (is_expected_node(rsc, current)) { /* We are scheduling restart actions for a multiply active resource * with multiple-active=stop_unexpected, and this is where it should * not be stopped. */ pe_rsc_trace(rsc, "Skipping stop of multiply active resource %s " "on expected node %s", rsc->id, pe__node_name(current)); continue; } if (rsc->partial_migration_target != NULL) { if ((rsc->partial_migration_target->details == current->details) // Only if the allocated node still is the migration target && (rsc->allocated_to != NULL) && rsc->allocated_to->details == rsc->partial_migration_target->details) { pe_rsc_trace(rsc, "Skipping stop of %s on %s " "because partial migration there will continue", rsc->id, pe__node_name(current)); continue; } else { pe_rsc_trace(rsc, "Forcing stop of %s on %s " "because migration target changed", rsc->id, pe__node_name(current)); optional = false; } } pe_rsc_trace(rsc, "Scheduling stop of %s on %s", rsc->id, pe__node_name(current)); stop = stop_action(rsc, current, optional); if (rsc->allocated_to == NULL) { pe_action_set_reason(stop, "node availability", true); } else if (pcmk_all_flags_set(rsc->flags, pe_rsc_restarting |pe_rsc_stop_unexpected)) { /* We are stopping a multiply active resource on a node that is * not its expected node, and we are still scheduling restart * actions, so the stop is for being multiply active. */ pe_action_set_reason(stop, "being multiply active", true); } if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe__clear_action_flags(stop, pe_action_runnable); } if (pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop)) { pcmk__schedule_cleanup(rsc, current, optional); } if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) { pe_action_t *unfence = pe_fence_op(current, "on", true, NULL, false, rsc->cluster); order_actions(stop, unfence, pe_order_implies_first); if (!pcmk__node_unfenced(current)) { pe_proc_err("Stopping %s until %s can be unfenced", rsc->id, pe__node_name(current)); } } } } /*! * \internal * \brief Schedule actions needed to start a resource on a node * * \param[in,out] rsc Resource being started * \param[in] node Node where resource should be started * \param[in] optional Whether actions should be optional */ static void start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional) { pe_action_t *start = NULL; CRM_ASSERT(node != NULL); pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)", (optional? "optional" : "required"), rsc->id, pe__node_name(node), node->weight); start = start_action(rsc, node, TRUE); pcmk__order_vs_unfence(rsc, node, start, pe_order_implies_then); if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) { pe__clear_action_flags(start, pe_action_optional); } if (is_expected_node(rsc, node)) { /* This could be a problem if the start becomes necessary for other * reasons later. */ pe_rsc_trace(rsc, "Start of multiply active resouce %s " "on expected node %s will be a pseudo-action", rsc->id, pe__node_name(node)); pe__set_action_flags(start, pe_action_pseudo); } } /*! * \internal * \brief Schedule actions needed to promote a resource on a node * * \param[in,out] rsc Resource being promoted * \param[in] node Node where resource should be promoted * \param[in] optional Whether actions should be optional */ static void promote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional) { GList *iter = NULL; GList *action_list = NULL; bool runnable = true; CRM_ASSERT(node != NULL); // Any start must be runnable for promotion to be runnable action_list = pe__resource_actions(rsc, node, RSC_START, true); for (iter = action_list; iter != NULL; iter = iter->next) { pe_action_t *start = (pe_action_t *) iter->data; if (!pcmk_is_set(start->flags, pe_action_runnable)) { runnable = false; } } g_list_free(action_list); if (runnable) { pe_action_t *promote = promote_action(rsc, node, optional); pe_rsc_trace(rsc, "Scheduling %s promotion of %s on %s", (optional? "optional" : "required"), rsc->id, pe__node_name(node)); if (is_expected_node(rsc, node)) { /* This could be a problem if the promote becomes necessary for * other reasons later. */ pe_rsc_trace(rsc, "Promotion of multiply active resouce %s " "on expected node %s will be a pseudo-action", rsc->id, pe__node_name(node)); pe__set_action_flags(promote, pe_action_pseudo); } } else { pe_rsc_trace(rsc, "Not promoting %s on %s: start unrunnable", rsc->id, pe__node_name(node)); action_list = pe__resource_actions(rsc, node, RSC_PROMOTE, true); for (iter = action_list; iter != NULL; iter = iter->next) { pe_action_t *promote = (pe_action_t *) iter->data; pe__clear_action_flags(promote, pe_action_runnable); } g_list_free(action_list); } } /*! * \internal * \brief Schedule actions needed to demote a resource wherever it is active * * \param[in,out] rsc Resource being demoted * \param[in] node Node where resource should be demoted (ignored) * \param[in] optional Whether actions should be optional */ static void demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional) { /* Since this will only be called for a primitive (possibly as an instance * of a collective resource), the resource is multiply active if it is * running on more than one node, so we want to demote on all of them as * part of recovery, regardless of which one is the desired node. */ for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) { pe_node_t *current = (pe_node_t *) iter->data; if (is_expected_node(rsc, current)) { pe_rsc_trace(rsc, "Skipping demote of multiply active resource %s " "on expected node %s", rsc->id, pe__node_name(current)); } else { pe_rsc_trace(rsc, "Scheduling %s demotion of %s on %s", (optional? "optional" : "required"), rsc->id, pe__node_name(current)); demote_action(rsc, current, optional); } } } static void assert_role_error(pe_resource_t *rsc, pe_node_t *node, bool optional) { CRM_ASSERT(false); } /*! * \internal * \brief Schedule cleanup of a resource * * \param[in,out] rsc Resource to clean up * \param[in] node Node to clean up on * \param[in] optional Whether clean-up should be optional */ void pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, bool optional) { /* If the cleanup is required, its orderings are optional, because they're * relevant only if both actions are required. Conversely, if the cleanup is * optional, the orderings make the then action required if the first action * becomes required. */ enum pe_ordering flag = optional? pe_order_implies_then : pe_order_optional; CRM_CHECK((rsc != NULL) && (node != NULL), return); if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed", rsc->id, pe__node_name(node)); return; } if (node->details->unclean || !node->details->online) { pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable", rsc->id, pe__node_name(node)); return; } crm_notice("Scheduling clean-up of %s on %s", rsc->id, pe__node_name(node)); delete_action(rsc, node, optional); // stop -> clean-up -> start pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_DELETE, flag); pcmk__order_resource_actions(rsc, RSC_DELETE, rsc, RSC_START, flag); } /*! * \internal * \brief Add primitive meta-attributes relevant to graph actions to XML * * \param[in] rsc Primitive resource whose meta-attributes should be added * \param[in,out] xml Transition graph action attributes XML to add to */ void pcmk__primitive_add_graph_meta(pe_resource_t *rsc, xmlNode *xml) { char *name = NULL; char *value = NULL; pe_resource_t *parent = NULL; CRM_ASSERT((rsc != NULL) && (xml != NULL)); /* Clone instance numbers get set internally as meta-attributes, and are * needed in the transition graph (for example, to tell unique clone * instances apart). */ value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); if (value != NULL) { name = crm_meta_name(XML_RSC_ATTR_INCARNATION); crm_xml_add(xml, name, value); free(name); } // Not sure if this one is really needed ... value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE); if (value != NULL) { name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE); crm_xml_add(xml, name, value); free(name); } /* The container meta-attribute can be set on the primitive itself or one of * its parents (for example, a group inside a container resource), so check * them all, and keep the highest one found. */ for (parent = rsc; parent != NULL; parent = parent->parent) { if (parent->container != NULL) { crm_xml_add(xml, CRM_META "_" XML_RSC_ATTR_CONTAINER, parent->container->id); } } + + /* Bundle replica children will get their external-ip set internally as a + * meta-attribute. The graph action needs it, but under a different naming + * convention than other meta-attributes. + */ + value = g_hash_table_lookup(rsc->meta, "external-ip"); + if (value != NULL) { + crm_xml_add(xml, "pcmk_external_ip", value); + } } // Primitive implementation of resource_alloc_functions_t:add_utilization() void pcmk__primitive_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization", orig_rsc->id, rsc->id); pcmk__release_node_capacity(utilization, rsc); } /*! * \internal * \brief Get epoch time of node's shutdown attribute (or now if none) * * \param[in] node Node to check * \param[in] data_set Cluster working set * * \return Epoch time corresponding to shutdown attribute if set or now if not */ static time_t shutdown_time(pe_node_t *node, pe_working_set_t *data_set) { const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); time_t result = 0; if (shutdown != NULL) { long long result_ll; if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) { result = (time_t) result_ll; } } return (result == 0)? get_effective_time(data_set) : result; } // Primitive implementation of resource_alloc_functions_t:shutdown_lock() void pcmk__primitive_shutdown_lock(pe_resource_t *rsc) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); // Fence devices and remote connections can't be locked if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches) || pe__resource_is_remote_conn(rsc, rsc->cluster)) { return; } if (rsc->lock_node != NULL) { // The lock was obtained from resource history if (rsc->running_on != NULL) { /* The resource was started elsewhere even though it is now * considered locked. This shouldn't be possible, but as a * failsafe, we don't want to disturb the resource now. */ pe_rsc_info(rsc, "Cancelling shutdown lock because %s is already active", rsc->id); pe__clear_resource_history(rsc, rsc->lock_node, rsc->cluster); rsc->lock_node = NULL; rsc->lock_time = 0; } // Only a resource active on exactly one node can be locked } else if (pcmk__list_of_1(rsc->running_on)) { pe_node_t *node = rsc->running_on->data; if (node->details->shutdown) { if (node->details->unclean) { pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown", rsc->id, pe__node_name(node)); } else { rsc->lock_node = node; rsc->lock_time = shutdown_time(node, rsc->cluster); } } } if (rsc->lock_node == NULL) { // No lock needed return; } if (rsc->cluster->shutdown_lock > 0) { time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock; pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)", rsc->id, pe__node_name(rsc->lock_node), (long long) lock_expiration); pe__update_recheck_time(++lock_expiration, rsc->cluster); } else { pe_rsc_info(rsc, "Locking %s to %s due to shutdown", rsc->id, pe__node_name(rsc->lock_node)); } // If resource is locked to one node, ban it from all other nodes for (GList *item = rsc->cluster->nodes; item != NULL; item = item->next) { pe_node_t *node = item->data; if (strcmp(node->details->uname, rsc->lock_node->details->uname)) { resource_location(rsc, node, -CRM_SCORE_INFINITY, XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster); } } }