diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c index 0e63eb1e3d..1fd0d6651b 100644 --- a/lib/pacemaker/pcmk_graph_producer.c +++ b/lib/pacemaker/pcmk_graph_producer.c @@ -1,1089 +1,1117 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk_is_set((flags), pe_action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->details->uname) /*! * \internal * \brief Add an XML node tag for a specified ID * * \param[in] id Node UUID to add * \param[in,out] xml Parent XML tag to add to */ static xmlNode* add_node_to_xml_by_id(const char *id, xmlNode *xml) { xmlNode *node_xml; node_xml = create_xml_node(xml, XML_CIB_TAG_NODE); crm_xml_add(node_xml, XML_ATTR_UUID, id); return node_xml; } /*! * \internal * \brief Add an XML node tag for a specified node * * \param[in] node Node to add * \param[in,out] xml XML to add node to */ static void add_node_to_xml(const pe_node_t *node, void *xml) { add_node_to_xml_by_id(node->details->id, (xmlNode *) xml); } /*! * \internal * \brief Add XML with nodes that need an update of their maintenance state * * \param[in,out] xml Parent XML tag to add to * \param[in] data_set Working set for cluster */ static int add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set) { GList *gIter = NULL; xmlNode *maintenance = xml?create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE):NULL; int count = 0; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; struct pe_node_shared_s *details = node->details; if (!pe__is_guest_or_remote_node(node)) { continue; /* just remote nodes need to know atm */ } if (details->maintenance != details->remote_maintenance) { if (maintenance) { crm_xml_add( add_node_to_xml_by_id(node->details->id, maintenance), XML_NODE_IS_MAINTENANCE, details->maintenance?"1":"0"); } count++; } } crm_trace("%s %d nodes to adjust maintenance-mode " "to transition", maintenance?"Added":"Counted", count); return count; } /*! * \internal * \brief Add pseudo action with nodes needing maintenance state update * * \param[in,out] data_set Working set for cluster */ static void add_maintenance_update(pe_working_set_t *data_set) { pe_action_t *action = NULL; if (add_maintenance_nodes(NULL, data_set)) { crm_trace("adding maintenance state update pseudo action"); action = get_pseudo_op(CRM_OP_MAINTENANCE_NODES, data_set); pe__set_action_flags(action, pe_action_print_always); } } /*! * \internal * \brief Add XML with nodes that an action is expected to bring down * * If a specified action is expected to bring any nodes down, add an XML block * with their UUIDs. When a node is lost, this allows the controller to * determine whether it was expected. * * \param[in,out] xml Parent XML tag to add to * \param[in] action Action to check for downed nodes * \param[in] data_set Working set for cluster */ static void add_downed_nodes(xmlNode *xml, const pe_action_t *action, const pe_working_set_t *data_set) { CRM_CHECK(xml && action && action->node && data_set, return); if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) { /* Shutdown makes the action's node down */ xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->node->details->id, downed); } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { /* Fencing makes the action's node and any hosted guest nodes down */ const char *fence = g_hash_table_lookup(action->meta, "stonith_action"); if (pcmk__is_fencing_action(fence)) { xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->node->details->id, downed); pe_foreach_guest_node(data_set, action->node, add_node_to_xml, downed); } } else if (action->rsc && action->rsc->is_remote_node && pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) { /* Stopping a remote connection resource makes connected node down, * unless it's part of a migration */ GList *iter; pe_action_t *input; gboolean migrating = FALSE; for (iter = action->actions_before; iter != NULL; iter = iter->next) { input = ((pe_action_wrapper_t *) iter->data)->action; if (input->rsc && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_casei) && pcmk__str_eq(input->task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { migrating = TRUE; break; } } if (!migrating) { xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); add_node_to_xml_by_id(action->rsc->id, downed); } } } /*! * \internal * \brief Create a transition graph operation key for a clone action * * \param[in] action Clone action * \param[in] interval_ms Action interval in milliseconds * * \return Newly allocated string with transition graph operation key */ static char * clone_op_key(pe_action_t *action, guint interval_ms) { if (pcmk__str_eq(action->task, RSC_NOTIFY, pcmk__str_none)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); CRM_LOG_ASSERT((n_type != NULL) && (n_task != NULL)); return pcmk__notify_key(action->rsc->clone_name, n_type, n_task); } else if (action->cancel_task != NULL) { return pcmk__op_key(action->rsc->clone_name, action->cancel_task, interval_ms); } else { return pcmk__op_key(action->rsc->clone_name, action->task, interval_ms); } } /*! * \internal * \brief Add node details to transition graph action XML * * \param[in] action Scheduled action * \param[in] xml Transition graph action XML for \p action */ static void add_node_details(pe_action_t *action, xmlNode *xml) { pe_node_t *router_node = pcmk__connection_host_for_action(action); crm_xml_add(xml, XML_LRM_ATTR_TARGET, action->node->details->uname); crm_xml_add(xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id); if (router_node != NULL) { crm_xml_add(xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname); } } /*! * \internal * \brief Add resource details to transition graph action XML * * \param[in] action Scheduled action * \param[in] action_xml Transition graph action XML for \p action */ static void add_resource_details(pe_action_t *action, xmlNode *action_xml) { xmlNode *rsc_xml = NULL; const char *attr_list[] = { XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER, XML_ATTR_TYPE }; /* If a resource is locked to a node via shutdown-lock, mark its actions * so the controller can preserve the lock when the action completes. */ if (pcmk__action_locks_rsc_to_node(action)) { crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, (long long) action->rsc->lock_time); } // List affected resource rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml)); if (pcmk_is_set(action->rsc->flags, pe_rsc_orphan) && (action->rsc->clone_name != NULL)) { /* Use the numbered instance name here, because if there is more * than one instance on a node, we need to make sure the command * goes to the right one. * * This is important even for anonymous clones, because the clone's * unique meta-attribute might have just been toggled from on to * off. */ crm_debug("Using orphan clone name %s instead of %s", action->rsc->id, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } else if (!pcmk_is_set(action->rsc->flags, pe_rsc_unique)) { const char *xml_id = ID(action->rsc->xml); crm_debug("Using anonymous clone name %s for %s (aka %s)", xml_id, action->rsc->id, action->rsc->clone_name); /* ID is what we'd like client to use * ID_LONG is what they might know it as instead * * ID_LONG is only strictly needed /here/ during the * transition period until all nodes in the cluster * are running the new software /and/ have rebooted * once (meaning that they've only ever spoken to a DC * supporting this feature). * * If anyone toggles the unique flag to 'on', the * 'instance free' name will correspond to an orphan * and fall into the clause above instead */ crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); if ((action->rsc->clone_name != NULL) && !pcmk__str_eq(xml_id, action->rsc->clone_name, pcmk__str_none)) { crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); } else { crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } } else { CRM_ASSERT(action->rsc->clone_name == NULL); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); } for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) { crm_xml_add(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); } } /*! * \internal * \brief Add action attributes to transition graph action XML * * \param[in] action Scheduled action * \param[in] action_xml Transition graph action XML for \p action */ static void add_action_attributes(pe_action_t *action, xmlNode *action_xml) { xmlNode *args_xml = NULL; /* We create free-standing XML to start, so we can sort the attributes * before adding it to action_xml, which keeps the scheduler regression * test graphs comparable. */ args_xml = create_xml_node(NULL, XML_TAG_ATTRS); crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if ((action->rsc != NULL) && (action->node != NULL)) { // Get the resource instance attributes, evaluated properly for node GHashTable *params = pe_rsc_params(action->rsc, action->node, action->rsc->cluster); pcmk__substitute_remote_addr(action->rsc, params); g_hash_table_foreach(params, hash2smartfield, args_xml); #if ENABLE_VERSIONED_ATTRS { xmlNode *versioned_parameters = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS); pe_get_versioned_attributes(versioned_parameters, action->rsc, action->node, action->rsc->cluster); if (xml_has_children(versioned_parameters)) { add_node_copy(action_xml, versioned_parameters); } free_xml(versioned_parameters); } #endif } else if ((action->rsc != NULL) && (action->rsc->variant <= pe_native)) { GHashTable *params = pe_rsc_params(action->rsc, NULL, action->rsc->cluster); g_hash_table_foreach(params, hash2smartfield, args_xml); #if ENABLE_VERSIONED_ATTRS if (xml_has_children(action->rsc->versioned_parameters)) { add_node_copy(action_xml, action->rsc->versioned_parameters); } #endif } #if ENABLE_VERSIONED_ATTRS if (rsc_details != NULL) { if (xml_has_children(rsc_details->versioned_parameters)) { add_node_copy(action_xml, rsc_details->versioned_parameters); } if (xml_has_children(rsc_details->versioned_meta)) { add_node_copy(action_xml, rsc_details->versioned_meta); } } #endif g_hash_table_foreach(action->meta, hash2metafield, args_xml); if (action->rsc != NULL) { const char *value = g_hash_table_lookup(action->rsc->meta, "external-ip"); pe_resource_t *parent = action->rsc; while (parent != NULL) { parent->cmds->append_meta(parent, args_xml); parent = parent->parent; } if (value != NULL) { hash2smartfield((gpointer) "pcmk_external_ip", (gpointer) value, (gpointer) args_xml); } pcmk__add_bundle_meta_to_xml(args_xml, action); } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_none) && (action->node != NULL)) { /* Pass the node's attributes as meta-attributes. * * @TODO: Determine whether it is still necessary to do this. It was * added in 33d99707, probably for the libfence-based implementation in * c9a90bd, which is no longer used. */ g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml); } sorted_xml(args_xml, action_xml, FALSE); free_xml(args_xml); } /*! * \internal * \brief Create the transition graph XML for a scheduled action * * \param[in] parent Parent XML element to add action to * \param[in] action Scheduled action * \param[in] skip_details If false, add action details as sub-elements * \param[in] data_set Cluster working set */ static void create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details, pe_working_set_t *data_set) { bool needs_node_info = true; bool needs_maintenance_info = false; xmlNode *action_xml = NULL; #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t *rsc_details = NULL; #endif if ((action == NULL) || (data_set == NULL)) { return; } // Create the top-level element based on task if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) { /* All fences need node info; guest node fences are pseudo-events */ action_xml = create_xml_node(parent, pcmk_is_set(action->flags, pe_action_pseudo)? XML_GRAPH_TAG_PSEUDO_EVENT : XML_GRAPH_TAG_CRM_EVENT); } else if (pcmk__str_any_of(action->task, CRM_OP_SHUTDOWN, CRM_OP_CLEAR_FAILCOUNT, CRM_OP_LRM_REFRESH, NULL)) { action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) { // CIB-only clean-up for shutdown locks action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB); } else if (pcmk_is_set(action->flags, pe_action_pseudo)) { if (pcmk__str_eq(action->task, CRM_OP_MAINTENANCE_NODES, pcmk__str_none)) { needs_maintenance_info = true; } action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT); needs_node_info = false; } else { action_xml = create_xml_node(parent, XML_GRAPH_TAG_RSC_OP); #if ENABLE_VERSIONED_ATTRS rsc_details = pe_rsc_action_details(action); #endif } crm_xml_add_int(action_xml, XML_ATTR_ID, action->id); crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task); if ((action->rsc != NULL) && (action->rsc->clone_name != NULL)) { char *clone_key = NULL; guint interval_ms; if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } clone_key = clone_op_key(action, interval_ms); crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key); crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid); free(clone_key); } else { crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid); } if (needs_node_info && (action->node != NULL)) { add_node_details(action, action_xml); g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET), strdup(action->node->details->uname)); g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET_UUID), strdup(action->node->details->id)); } if (skip_details) { return; } if ((action->rsc != NULL) && !pcmk_is_set(action->flags, pe_action_pseudo)) { // This is a real resource action, so add resource details add_resource_details(action, action_xml); } /* List any attributes in effect */ add_action_attributes(action, action_xml); /* List any nodes this action is expected to make down */ if (needs_node_info && (action->node != NULL)) { add_downed_nodes(action_xml, action, data_set); } if (needs_maintenance_info) { add_maintenance_nodes(action_xml, data_set); } } /*! * \internal * \brief Check whether an action should be added to the transition graph * * \param[in] action Action to check * * \return true if action should be added to graph, otherwise false */ static bool should_add_action_to_graph(pe_action_t *action) { if (!pcmk_is_set(action->flags, pe_action_runnable)) { crm_trace("Ignoring action %s (%d): unrunnable", action->uuid, action->id); return false; } if (pcmk_is_set(action->flags, pe_action_optional) && !pcmk_is_set(action->flags, pe_action_print_always)) { crm_trace("Ignoring action %s (%d): optional", action->uuid, action->id); return false; } /* Actions for unmanaged resources should be excluded from the graph, * with the exception of monitors and cancellation of recurring monitors. */ if ((action->rsc != NULL) && !pcmk_is_set(action->rsc->flags, pe_rsc_managed) && !pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_none)) { const char *interval_ms_s; /* A cancellation of a recurring monitor will get here because the task * is cancel rather than monitor, but the interval can still be used to * recognize it. The interval has been normalized to milliseconds by * this point, so a string comparison is sufficient. */ interval_ms_s = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS); if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) { crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)", action->uuid, action->id, action->rsc->id); return false; } } /* Always add pseudo-actions, fence actions, and shutdown actions (already * determined to be required and runnable by this point) */ if (pcmk_is_set(action->flags, pe_action_pseudo) || pcmk__strcase_any_of(action->task, CRM_OP_FENCE, CRM_OP_SHUTDOWN, NULL)) { return true; } if (action->node == NULL) { pe_err("Skipping action %s (%d) " "because it was not allocated to a node (bug?)", action->uuid, action->id); pcmk__log_action("Unallocated", action, false); return false; } if (pcmk_is_set(action->flags, pe_action_dc)) { crm_trace("Action %s (%d) should be dumped: " "can run on DC instead of %s", action->uuid, action->id, action->node->details->uname); } else if (pe__is_guest_node(action->node) && !action->node->details->remote_requires_reset) { crm_trace("Action %s (%d) should be dumped: " "assuming will be runnable on guest node %s", action->uuid, action->id, action->node->details->uname); } else if (!action->node->details->online) { pe_err("Skipping action %s (%d) " "because it was scheduled for offline node (bug?)", action->uuid, action->id); pcmk__log_action("Offline node", action, false); return false; } else if (action->node->details->unclean) { pe_err("Skipping action %s (%d) " "because it was scheduled for unclean node (bug?)", action->uuid, action->id); pcmk__log_action("Unclean node", action, false); return false; } return true; } /*! * \internal * \brief Check whether an ordering's flags can change an action * * \param[in] ordering Ordering to check * * \return true if ordering has flags that can change an action, false otherwise */ static bool ordering_can_change_actions(pe_action_wrapper_t *ordering) { return pcmk_any_flags_set(ordering->type, ~(pe_order_implies_first_printed |pe_order_implies_then_printed |pe_order_optional)); } /*! * \internal * \brief Check whether an action input should be in the transition graph * * \param[in] action Action to check * \param[in,out] input Action input to check * * \return true if input should be in graph, false otherwise * \note This function may not only check an input, but disable it under certian * circumstances (load or anti-colocation orderings that are not needed). */ static bool should_add_input_to_graph(pe_action_t *action, pe_action_wrapper_t *input) { if (input->state == pe_link_dumped) { return true; } if (input->type == pe_order_none) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering disabled", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pe_action_runnable) && !ordering_can_change_actions(input)) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pe_action_runnable) && pcmk_is_set(input->type, pe_order_one_or_more)) { crm_trace("Ignoring %s (%d) input %s (%d): " "one-or-more and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->type, pe_order_implies_first_migratable) && !pcmk_is_set(input->action->flags, pe_action_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "implies input migratable but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->type, pe_order_apply_first_non_migratable) && pcmk_is_set(input->action->flags, pe_action_migrate_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "only if input unmigratable but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if ((input->type == pe_order_optional) && pcmk_is_set(input->action->flags, pe_action_migrate_runnable) && pcmk__ends_with(input->action->uuid, "_stop_0")) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional but stop in migration", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (input->type == pe_order_load) { pe_node_t *input_node = input->action->node; // load orderings are relevant only if actions are for same node if (action->rsc && pcmk__str_eq(action->task, RSC_MIGRATE, pcmk__str_casei)) { pe_node_t *allocated = action->rsc->allocated_to; /* For load_stopped -> migrate_to orderings, we care about where it * has been allocated to, not where it will be executed. */ if ((input_node == NULL) || (allocated == NULL) || (input_node->details != allocated->details)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, (allocated? allocated->details->uname : ""), (input_node? input_node->details->uname : "")); input->type = pe_order_none; return false; } } else if ((input_node == NULL) || (action->node == NULL) || (input_node->details != action->node->details)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, (action->node? action->node->details->uname : ""), (input_node? input_node->details->uname : "")); input->type = pe_order_none; return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "load ordering input optional", action->uuid, action->id, input->action->uuid, input->action->id); input->type = pe_order_none; return false; } } else if (input->type == pe_order_anti_colocation) { if (input->action->node && action->node && (input->action->node->details != action->node->details)) { crm_trace("Ignoring %s (%d) input %s (%d): " "anti-colocation node mismatch %s vs %s", action->uuid, action->id, input->action->uuid, input->action->id, action->node->details->uname, input->action->node->details->uname); input->type = pe_order_none; return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "anti-colocation input optional", action->uuid, action->id, input->action->uuid, input->action->id); input->type = pe_order_none; return false; } } else if (input->action->rsc && input->action->rsc != action->rsc && pcmk_is_set(input->action->rsc->flags, pe_rsc_failed) && !pcmk_is_set(input->action->rsc->flags, pe_rsc_managed) && pcmk__ends_with(input->action->uuid, "_stop_0") && action->rsc && pe_rsc_is_clone(action->rsc)) { crm_warn("Ignoring requirement that %s complete before %s:" " unmanaged failed resources cannot prevent clone shutdown", input->action->uuid, action->uuid); return false; } else if (pcmk_is_set(input->action->flags, pe_action_optional) && !pcmk_any_flags_set(input->action->flags, pe_action_print_always|pe_action_dumped) && !should_add_action_to_graph(input->action)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input optional", action->uuid, action->id, input->action->uuid, input->action->id); return false; } crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x", action->uuid, action->id, action_type_str(input->action->flags), input->action->uuid, input->action->id, action_node_str(input->action), action_runnable_str(input->action->flags), action_optional_str(input->action->flags), input->type); return true; } /*! * \internal * \brief Check whether an ordering creates an ordering loop * * \param[in] init_action "First" action in ordering * \param[in] action Callers should always set this the same as * \p init_action (this function may use a different * value for recursive calls) * \param[in] input Action wrapper for "then" action in ordering * * \return true if the ordering creates a loop, otherwise false */ bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action, pe_action_wrapper_t *input) { bool has_loop = false; if (pcmk_is_set(input->action->flags, pe_action_tracking)) { crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->details->uname : "", action->uuid, action->node? action->node->details->uname : "", input->type); return false; } // Don't need to check inputs that won't be used if (!should_add_input_to_graph(action, input)) { return false; } if (input->action == init_action) { crm_debug("Input loop found in %s@%s ->...-> %s@%s", action->uuid, action->node? action->node->details->uname : "", init_action->uuid, init_action->node? init_action->node->details->uname : ""); return true; } pe__set_action_flags(input->action, pe_action_tracking); crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)" "for graph loop with %s@%s ", action->uuid, action->node? action->node->details->uname : "", input->action->uuid, input->action->node? input->action->node->details->uname : "", input->type, init_action->uuid, init_action->node? init_action->node->details->uname : ""); // Recursively check input itself for loops for (GList *iter = input->action->actions_before; iter != NULL; iter = iter->next) { if (pcmk__graph_has_loop(init_action, input->action, (pe_action_wrapper_t *) iter->data)) { // Recursive call already logged a debug message has_loop = true; break; } } pe__clear_action_flags(input->action, pe_action_tracking); if (!has_loop) { crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->details->uname : "", action->uuid, action->node? action->node->details->uname : "", input->type); } return has_loop; } /*! * \internal * \brief Create a synapse XML element for a transition graph * * \param[in] action Action that synapse is for * \param[in] data_set Cluster working set containing graph * * \return Newly added XML element for new graph synapse */ static xmlNode * create_graph_synapse(pe_action_t *action, pe_working_set_t *data_set) { int synapse_priority = 0; xmlNode *syn = create_xml_node(data_set->graph, "synapse"); crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse); data_set->num_synapse++; if (action->rsc != NULL) { synapse_priority = action->rsc->priority; } if (action->priority > synapse_priority) { synapse_priority = action->priority; } if (synapse_priority > 0) { crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); } return syn; } /*! * \internal * \brief Add an action to the transition graph XML if appropriate * * \param[in] action Action to possibly add * \param[in] data_set Cluster working set * * \note This will de-duplicate the action inputs, meaning that the * pe_action_wrapper_t:type flags can no longer be relied on to retain * their original settings. That means this MUST be called after * pcmk__apply_orderings() is complete, and nothing after this should rely * on those type flags. (For example, some code looks for type equal to * some flag rather than whether the flag is set, and some code looks for * particular combinations of flags -- such code must be done before * pcmk__create_graph().) */ void pcmk__add_action_to_graph(pe_action_t *action, pe_working_set_t *data_set) { xmlNode *syn = NULL; xmlNode *set = NULL; xmlNode *in = NULL; /* If we haven't already, de-duplicate inputs (even if we won't be adding * the action to the graph, so that crm_simulate's dot graphs don't have * duplicates). */ if (!pcmk_is_set(action->flags, pe_action_dedup)) { pcmk__deduplicate_action_inputs(action); pe__set_action_flags(action, pe_action_dedup); } if (pcmk_is_set(action->flags, pe_action_dumped) // Already added, or || !should_add_action_to_graph(action)) { // shouldn't be added return; } pe__set_action_flags(action, pe_action_dumped); syn = create_graph_synapse(action, data_set); set = create_xml_node(syn, "action_set"); in = create_xml_node(syn, "inputs"); create_graph_action(set, action, false, data_set); for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { pe_action_wrapper_t *input = (pe_action_wrapper_t *) lpc->data; if (should_add_input_to_graph(action, input)) { xmlNode *input_xml = create_xml_node(in, "trigger"); input->state = pe_link_dumped; create_graph_action(input_xml, input->action, true, data_set); } } } static int transition_id = -1; /*! * \internal * \brief Log a message after calculating a transition * * \param[in] filename Where transition input is stored */ void pcmk__log_transition_summary(const char *filename) { if (was_processing_error) { crm_err("Calculated transition %d (with errors)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else if (was_processing_warning) { crm_warn("Calculated transition %d (with warnings)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else { crm_notice("Calculated transition %d%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } if (crm_config_error) { crm_notice("Configuration errors found during scheduler processing," " please run \"crm_verify -L\" to identify issues"); } } +/*! + * \internal + * \brief Add a resource's actions to the transition graph + * + * \param[in] rsc Resource whose actions should be added + */ +void +native_expand(pe_resource_t *rsc) +{ + GList *gIter = NULL; + + CRM_ASSERT(rsc); + pe_rsc_trace(rsc, "Processing actions from %s", rsc->id); + + for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { + pe_action_t *action = (pe_action_t *) gIter->data; + + crm_trace("processing action %d for rsc=%s", action->id, rsc->id); + pcmk__add_action_to_graph(action, rsc->cluster); + } + + for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { + pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; + + child_rsc->cmds->add_actions_to_graph(child_rsc); + } +} + /*! * \internal * \brief Create a transition graph with all cluster actions needed * * \param[in] data_set Cluster working set */ void pcmk__create_graph(pe_working_set_t *data_set) { GList *iter = NULL; const char *value = NULL; long long limit = 0LL; transition_id++; crm_trace("Creating transition graph %d", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); value = pe_pref(data_set->config_hash, "stonith-timeout"); crm_xml_add(data_set->graph, "stonith-timeout", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); value = pe_pref(data_set->config_hash, "migration-limit"); if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) { crm_xml_add(data_set->graph, "migration-limit", value); } if (data_set->recheck_by > 0) { char *recheck_epoch = NULL; recheck_epoch = crm_strdup_printf("%llu", (long long) data_set->recheck_by); crm_xml_add(data_set->graph, "recheck-by", recheck_epoch); free(recheck_epoch); } /* The following code will de-duplicate action inputs, so nothing past this * should rely on the action input type flags retaining their original * values. */ // Add resource actions to graph for (iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; pe_rsc_trace(rsc, "Processing actions for %s", rsc->id); rsc->cmds->add_actions_to_graph(rsc); } // Add pseudo-action for list of nodes with maintenance state update add_maintenance_update(data_set); // Add non-resource (node) actions for (iter = data_set->actions; iter != NULL; iter = iter->next) { pe_action_t *action = (pe_action_t *) iter->data; if ((action->rsc != NULL) && (action->node != NULL) && action->node->details->shutdown && !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance) && !pcmk_any_flags_set(action->flags, pe_action_optional|pe_action_runnable) && pcmk__str_eq(action->task, RSC_STOP, pcmk__str_none)) { /* Eventually we should just ignore the 'fence' case, but for now * it's the best way to detect (in CTS) when CIB resource updates * are being lost. */ if (pcmk_is_set(data_set->flags, pe_flag_have_quorum) || (data_set->no_quorum_policy == no_quorum_ignore)) { crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)", action->node->details->unclean? "fence" : "shut down", action->node->details->uname, action->rsc->id, pcmk_is_set(action->rsc->flags, pe_rsc_managed)? " blocked" : " unmanaged", pcmk_is_set(action->rsc->flags, pe_rsc_failed)? " failed" : "", action->uuid); } } pcmk__add_action_to_graph(action, data_set); } crm_log_xml_trace(data_set->graph, "graph"); } diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c index 4cf27868cf..bcfb3efa74 100644 --- a/lib/pacemaker/pcmk_sched_native.c +++ b/lib/pacemaker/pcmk_sched_native.c @@ -1,2119 +1,2091 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include "libpacemaker_private.h" // The controller removes the resource from the CIB, making this redundant // #define DELETE_THEN_REFRESH 1 #define VARIANT_NATIVE 1 #include extern bool pcmk__is_daemon; static void Recurring(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, pe_working_set_t *data_set); static void RecurringOp(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, xmlNode *operation, pe_working_set_t *data_set); static void Recurring_Stopped(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, pe_working_set_t *data_set); static void RecurringOp_Stopped(pe_resource_t *rsc, pe_action_t *start, pe_node_t *node, xmlNode *operation, pe_working_set_t *data_set); gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set); gboolean StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean DemoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean RoleError(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean NullOp(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); /* This array says what the *next* role should be when transitioning from one * role to another. For example going from Stopped to Promoted, the next role is * RSC_ROLE_UNPROMOTED, because the resource must be started before being promoted. * The current state then becomes Started, which is fed into this array again, * giving a next role of RSC_ROLE_PROMOTED. */ static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current state Next state*/ /* Unknown Stopped Started Unpromoted Promoted */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_UNPROMOTED }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_PROMOTED }, /* Unpromoted */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_UNPROMOTED, RSC_ROLE_PROMOTED }, /* Promoted */ { RSC_ROLE_STOPPED, RSC_ROLE_UNPROMOTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_UNPROMOTED, RSC_ROLE_PROMOTED }, }; typedef gboolean (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *next, gboolean optional, pe_working_set_t *data_set); // This array picks the function needed to transition from one role to another static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current state Next state */ /* Unknown Stopped Started Unpromoted Promoted */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Unpromoted */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, }, /* Promoted */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp, }, }; static bool native_choose_node(pe_resource_t * rsc, pe_node_t * prefer, pe_working_set_t * data_set) { GList *nodes = NULL; pe_node_t *chosen = NULL; pe_node_t *best = NULL; int multiple = 1; int length = 0; bool result = false; pcmk__ban_insufficient_capacity(rsc, &prefer); if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to != NULL; } // Sort allowed nodes by weight if (rsc->allowed_nodes) { length = g_hash_table_size(rsc->allowed_nodes); } if (length > 0) { nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = pcmk__sort_nodes(nodes, pe__current_node(rsc)); // First node in sorted list has the best score best = g_list_nth_data(nodes, 0); } if (prefer && nodes) { chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (chosen == NULL) { pe_rsc_trace(rsc, "Preferred node %s for %s was unknown", prefer->details->uname, rsc->id); /* Favor the preferred node as long as its weight is at least as good as * the best allowed node's. * * An alternative would be to favor the preferred node even if the best * node is better, when the best node's weight is less than INFINITY. */ } else if (chosen->weight < best->weight) { pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable", chosen->details->uname, rsc->id); chosen = NULL; } else if (!pcmk__node_available(chosen, true, false)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable", chosen->details->uname, rsc->id); chosen = NULL; } else { pe_rsc_trace(rsc, "Chose preferred node %s for %s (ignoring %d candidates)", chosen->details->uname, rsc->id, length); } } if ((chosen == NULL) && nodes) { /* Either there is no preferred node, or the preferred node is not * available, but there are other nodes allowed to run the resource. */ chosen = best; pe_rsc_trace(rsc, "Chose node %s for %s from %d candidates", chosen ? chosen->details->uname : "", rsc->id, length); if (!pe_rsc_is_unique_clone(rsc->parent) && (chosen != NULL) && (chosen->weight > 0) // Zero not acceptable && pcmk__node_available(chosen, false, false)) { /* If the resource is already running on a node, prefer that node if * it is just as good as the chosen node. * * We don't do this for unique clone instances, because * distribute_children() has already assigned instances to their * running nodes when appropriate, and if we get here, we don't want * remaining unallocated instances to prefer a node that's already * running another instance. */ pe_node_t *running = pe__current_node(rsc); if ((running != NULL) && !pcmk__node_available(running, true, false)) { pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", rsc->id, running->details->uname); } else if (running) { for (GList *iter = nodes->next; iter; iter = iter->next) { pe_node_t *tmp = (pe_node_t *) iter->data; if (tmp->weight != chosen->weight) { // The nodes are sorted by weight, so no more are equal break; } if (tmp->details == running->details) { // Scores are equal, so prefer the current node chosen = tmp; } multiple++; } } } } if (multiple > 1) { do_crm_log(((chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO), "Chose node %s for %s from %d nodes with score %s", chosen->details->uname, rsc->id, multiple, pcmk_readable_score(chosen->weight)); } result = pcmk__assign_primitive(rsc, chosen, false); g_list_free(nodes); return result; } pe_node_t * pcmk__native_allocate(pe_resource_t *rsc, pe_node_t *prefer) { GList *gIter = NULL; if (rsc->parent && !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) { /* never allocate children on their own */ pe_rsc_debug(rsc, "Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->allocate(rsc->parent, prefer); } if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } pe__set_resource_flags(rsc, pe_rsc_allocating); pe__show_node_weights(true, rsc, "Pre-alloc", rsc->allowed_nodes, rsc->cluster); for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; GHashTable *archive = NULL; pe_resource_t *primary = constraint->primary; if ((constraint->dependent_role >= RSC_ROLE_PROMOTED) || (constraint->score < 0 && constraint->score > -INFINITY)) { archive = pcmk__copy_node_table(rsc->allowed_nodes); } pe_rsc_trace(rsc, "%s: Allocating %s first (constraint=%s score=%d role=%s)", rsc->id, primary->id, constraint->id, constraint->score, role2text(constraint->dependent_role)); primary->cmds->allocate(primary, NULL); rsc->cmds->apply_coloc_score(rsc, primary, constraint, true); if (archive && !pcmk__any_node_available(rsc->allowed_nodes)) { pe_rsc_info(rsc, "%s: Rolling back scores from %s", rsc->id, primary->id); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = archive; archive = NULL; } if (archive) { g_hash_table_destroy(archive); } } pe__show_node_weights(true, rsc, "Post-coloc", rsc->allowed_nodes, rsc->cluster); for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data; pe_resource_t *dependent = constraint->dependent; const float factor = constraint->score / (float) INFINITY; if (!pcmk__colocation_has_influence(constraint, NULL)) { continue; } pe_rsc_trace(rsc, "Merging score of '%s' constraint (%s with %s)", constraint->id, constraint->dependent->id, constraint->primary->id); dependent->cmds->add_colocated_node_scores(dependent, rsc->id, &rsc->allowed_nodes, constraint->node_attribute, factor, pcmk__coloc_select_active); } if (rsc->next_role == RSC_ROLE_STOPPED) { pe_rsc_trace(rsc, "Making sure %s doesn't get allocated", rsc->id); /* make sure it doesn't come up again */ resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, rsc->cluster); } else if(rsc->next_role > rsc->role && !pcmk_is_set(rsc->cluster->flags, pe_flag_have_quorum) && rsc->cluster->no_quorum_policy == no_quorum_freeze) { crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze"); } pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores), rsc, __func__, rsc->allowed_nodes, rsc->cluster); if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled) && !pcmk_is_set(rsc->cluster->flags, pe_flag_have_stonith_resource)) { pe__clear_resource_flags(rsc, pe_rsc_managed); } if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { const char *reason = NULL; pe_node_t *assign_to = NULL; pe__set_next_role(rsc, rsc->role, "unmanaged"); assign_to = pe__current_node(rsc); if (assign_to == NULL) { reason = "inactive"; } else if (rsc->role == RSC_ROLE_PROMOTED) { reason = "promoted"; } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { reason = "failed"; } else { reason = "active"; } pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id, (assign_to? assign_to->details->uname : "no node"), reason); pcmk__assign_primitive(rsc, assign_to, true); } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stop_everything)) { pe_rsc_debug(rsc, "Forcing %s to stop", rsc->id); pcmk__assign_primitive(rsc, NULL, true); } else if (pcmk_is_set(rsc->flags, pe_rsc_provisional) && native_choose_node(rsc, prefer, rsc->cluster)) { pe_rsc_trace(rsc, "Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if (rsc->allocated_to == NULL) { if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); } else if (rsc->running_on != NULL) { pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id); } } else { pe_rsc_debug(rsc, "Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } pe__clear_resource_flags(rsc, pe_rsc_allocating); if (rsc->is_remote_node) { pe_node_t *remote_node = pe_find_node(rsc->cluster->nodes, rsc->id); CRM_ASSERT(remote_node != NULL); if (rsc->allocated_to && rsc->next_role != RSC_ROLE_STOPPED) { crm_trace("Setting Pacemaker Remote node %s to ONLINE", remote_node->details->id); remote_node->details->online = TRUE; /* We shouldn't consider an unseen remote-node unclean if we are going * to try and connect to it. Otherwise we get an unnecessary fence */ if (remote_node->details->unseen == TRUE) { remote_node->details->unclean = FALSE; } } else { crm_trace("Setting Pacemaker Remote node %s to SHUTDOWN (next role %s, %sallocated)", remote_node->details->id, role2text(rsc->next_role), (rsc->allocated_to? "" : "un")); remote_node->details->shutdown = TRUE; } } return rsc->allocated_to; } static gboolean is_op_dup(pe_resource_t *rsc, const char *name, guint interval_ms) { gboolean dup = FALSE; const char *id = NULL; const char *value = NULL; xmlNode *operation = NULL; guint interval2_ms = 0; CRM_ASSERT(rsc); for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { value = crm_element_value(operation, "name"); if (!pcmk__str_eq(value, name, pcmk__str_casei)) { continue; } value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval2_ms = crm_parse_interval_spec(value); if (interval_ms != interval2_ms) { continue; } if (id == NULL) { id = ID(operation); } else { pcmk__config_err("Operation %s is duplicate of %s (do not use " "same name and interval combination more " "than once per resource)", ID(operation), id); dup = TRUE; } } } return dup; } static bool op_cannot_recur(const char *name) { return pcmk__strcase_any_of(name, RSC_STOP, RSC_START, RSC_DEMOTE, RSC_PROMOTE, NULL); } static void RecurringOp(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval_spec = NULL; const char *node_uname = node? node->details->uname : "n/a"; guint interval_ms = 0; pe_action_t *mon = NULL; gboolean is_optional = TRUE; GList *possible_matches = NULL; CRM_ASSERT(rsc); /* Only process for the operations without role="Stopped" */ role = crm_element_value(operation, "role"); if (role && text2role(role) == RSC_ROLE_STOPPED) { return; } interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_parse_interval_spec(interval_spec); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval_ms)) { crm_trace("Not creating duplicate recurring action %s for %dms %s", ID(operation), interval_ms, name); return; } if (op_cannot_recur(name)) { pcmk__config_err("Ignoring %s because action '%s' cannot be recurring", ID(operation), name); return; } key = pcmk__op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { crm_trace("Not creating recurring action %s for disabled resource %s", ID(operation), rsc->id); free(key); return; } pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on %s", ID(operation), rsc->id, role2text(rsc->next_role), node_uname); if (start != NULL) { pe_rsc_trace(rsc, "Marking %s %s due to %s", key, pcmk_is_set(start->flags, pe_action_optional)? "optional" : "mandatory", start->uuid); is_optional = (rsc->cmds->action_flags(start, NULL) & pe_action_optional); } else { pe_rsc_trace(rsc, "Marking %s optional", key); is_optional = TRUE; } /* start a monitor for an already active resource */ possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches == NULL) { is_optional = FALSE; pe_rsc_trace(rsc, "Marking %s mandatory: not active", key); } else { GList *gIter = NULL; for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) { pe_action_t *op = (pe_action_t *) gIter->data; if (pcmk_is_set(op->flags, pe_action_reschedule)) { is_optional = FALSE; break; } } g_list_free(possible_matches); } if (((rsc->next_role == RSC_ROLE_PROMOTED) && (role == NULL)) || (role != NULL && text2role(role) != rsc->next_role)) { int log_level = LOG_TRACE; const char *result = "Ignoring"; if (is_optional) { char *after_key = NULL; pe_action_t *cancel_op = NULL; // It's running, so cancel it log_level = LOG_INFO; result = "Cancelling"; cancel_op = pcmk__new_cancel_action(rsc, name, interval_ms, node); switch (rsc->role) { case RSC_ROLE_UNPROMOTED: case RSC_ROLE_STARTED: if (rsc->next_role == RSC_ROLE_PROMOTED) { after_key = promote_key(rsc); } else if (rsc->next_role == RSC_ROLE_STOPPED) { after_key = stop_key(rsc); } break; case RSC_ROLE_PROMOTED: after_key = demote_key(rsc); break; default: break; } if (after_key) { pcmk__new_ordering(rsc, NULL, cancel_op, rsc, after_key, NULL, pe_order_runnable_left, data_set); } } do_crm_log(log_level, "%s action %s (%s vs. %s)", result, key, role ? role : role2text(RSC_ROLE_UNPROMOTED), role2text(rsc->next_role)); free(key); return; } mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set); key = mon->uuid; if (is_optional) { pe_rsc_trace(rsc, "%s\t %s (optional)", node_uname, mon->uuid); } if ((start == NULL) || !pcmk_is_set(start->flags, pe_action_runnable)) { pe_rsc_debug(rsc, "%s\t %s (cancelled : start un-runnable)", node_uname, mon->uuid); pe__clear_action_flags(mon, pe_action_runnable); } else if (node == NULL || node->details->online == FALSE || node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", node_uname, mon->uuid); pe__clear_action_flags(mon, pe_action_runnable); } else if (!pcmk_is_set(mon->flags, pe_action_optional)) { pe_rsc_info(rsc, " Start recurring %s (%us) for %s on %s", mon->task, interval_ms / 1000, rsc->id, node_uname); } if (rsc->next_role == RSC_ROLE_PROMOTED) { char *running_promoted = pcmk__itoa(PCMK_OCF_RUNNING_PROMOTED); add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_promoted); free(running_promoted); } if ((node == NULL) || pcmk_is_set(rsc->flags, pe_rsc_managed)) { pcmk__new_ordering(rsc, start_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then|pe_order_runnable_left, data_set); pcmk__new_ordering(rsc, reload_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then|pe_order_runnable_left, data_set); if (rsc->next_role == RSC_ROLE_PROMOTED) { pcmk__new_ordering(rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional|pe_order_runnable_left, data_set); } else if (rsc->role == RSC_ROLE_PROMOTED) { pcmk__new_ordering(rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional|pe_order_runnable_left, data_set); } } } static void Recurring(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, pe_working_set_t * data_set) { if (!pcmk_is_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { RecurringOp(rsc, start, node, operation, data_set); } } } } static void RecurringOp_Stopped(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval_spec = NULL; const char *node_uname = node? node->details->uname : "n/a"; guint interval_ms = 0; GList *possible_matches = NULL; GList *gIter = NULL; /* Only process for the operations with role="Stopped" */ role = crm_element_value(operation, "role"); if (role == NULL || text2role(role) != RSC_ROLE_STOPPED) { return; } interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_parse_interval_spec(interval_spec); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval_ms)) { crm_trace("Not creating duplicate recurring action %s for %dms %s", ID(operation), interval_ms, name); return; } if (op_cannot_recur(name)) { pcmk__config_err("Ignoring %s because action '%s' cannot be recurring", ID(operation), name); return; } key = pcmk__op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { crm_trace("Not creating recurring action %s for disabled resource %s", ID(operation), rsc->id); free(key); return; } // @TODO add support if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { crm_notice("Ignoring %s (recurring monitors for Stopped role are " "not supported for anonymous clones)", ID(operation)); return; } pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on nodes where it should not be running", ID(operation), rsc->id, role2text(rsc->next_role)); /* if the monitor exists on the node where the resource will be running, cancel it */ if (node != NULL) { possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches) { pe_action_t *cancel_op = NULL; g_list_free(possible_matches); cancel_op = pcmk__new_cancel_action(rsc, name, interval_ms, node); if ((rsc->next_role == RSC_ROLE_STARTED) || (rsc->next_role == RSC_ROLE_UNPROMOTED)) { /* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */ /* rsc->role == RSC_ROLE_STARTED: for a migration, cancel the monitor on the target node before start */ pcmk__new_ordering(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL, pe_order_runnable_left, data_set); } pe_rsc_info(rsc, "Cancel action %s (%s vs. %s) on %s", key, role, role2text(rsc->next_role), node_uname); } } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *stop_node = (pe_node_t *) gIter->data; const char *stop_node_uname = stop_node->details->uname; gboolean is_optional = TRUE; gboolean probe_is_optional = TRUE; gboolean stop_is_optional = TRUE; pe_action_t *stopped_mon = NULL; char *rc_inactive = NULL; GList *stop_ops = NULL; GList *local_gIter = NULL; if (node && pcmk__str_eq(stop_node_uname, node_uname, pcmk__str_casei)) { continue; } pe_rsc_trace(rsc, "Creating recurring action %s for %s on %s", ID(operation), rsc->id, pcmk__s(stop_node_uname, "unknown node")); /* start a monitor for an already stopped resource */ possible_matches = find_actions_exact(rsc->actions, key, stop_node); if (possible_matches == NULL) { pe_rsc_trace(rsc, "Marking %s mandatory on %s: not active", key, pcmk__s(stop_node_uname, "unknown node")); is_optional = FALSE; } else { pe_rsc_trace(rsc, "Marking %s optional on %s: already active", key, pcmk__s(stop_node_uname, "unknown node")); is_optional = TRUE; g_list_free(possible_matches); } stopped_mon = custom_action(rsc, strdup(key), name, stop_node, is_optional, TRUE, data_set); rc_inactive = pcmk__itoa(PCMK_OCF_NOT_RUNNING); add_hash_param(stopped_mon->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); free(rc_inactive); if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { GList *probes = pe__resource_actions(rsc, stop_node, RSC_STATUS, FALSE); GList *pIter = NULL; for (pIter = probes; pIter != NULL; pIter = pIter->next) { pe_action_t *probe = (pe_action_t *) pIter->data; order_actions(probe, stopped_mon, pe_order_runnable_left); crm_trace("%s then %s on %s", probe->uuid, stopped_mon->uuid, stop_node->details->uname); } g_list_free(probes); } stop_ops = pe__resource_actions(rsc, stop_node, RSC_STOP, TRUE); for (local_gIter = stop_ops; local_gIter != NULL; local_gIter = local_gIter->next) { pe_action_t *stop = (pe_action_t *) local_gIter->data; if (!pcmk_is_set(stop->flags, pe_action_optional)) { stop_is_optional = FALSE; } if (!pcmk_is_set(stop->flags, pe_action_runnable)) { crm_debug("%s\t %s (cancelled : stop un-runnable)", pcmk__s(stop_node_uname, ""), stopped_mon->uuid); pe__clear_action_flags(stopped_mon, pe_action_runnable); } if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { pcmk__new_ordering(rsc, stop_key(rsc), stop, NULL, strdup(key), stopped_mon, pe_order_implies_then|pe_order_runnable_left, data_set); } } if (stop_ops) { g_list_free(stop_ops); } if (is_optional == FALSE && probe_is_optional && stop_is_optional && !pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Marking %s optional on %s due to unmanaged", key, pcmk__s(stop_node_uname, "unknown node")); pe__set_action_flags(stopped_mon, pe_action_optional); } if (pcmk_is_set(stopped_mon->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s\t %s (optional)", pcmk__s(stop_node_uname, ""), stopped_mon->uuid); } if (stop_node->details->online == FALSE || stop_node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", pcmk__s(stop_node_uname, ""), stopped_mon->uuid); pe__clear_action_flags(stopped_mon, pe_action_runnable); } if (pcmk_is_set(stopped_mon->flags, pe_action_runnable) && !pcmk_is_set(stopped_mon->flags, pe_action_optional)) { crm_notice(" Start recurring %s (%us) for %s on %s", stopped_mon->task, interval_ms / 1000, rsc->id, pcmk__s(stop_node_uname, "unknown node")); } } free(key); } static void Recurring_Stopped(pe_resource_t * rsc, pe_action_t * start, pe_node_t * node, pe_working_set_t * data_set) { if (!pcmk_is_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { RecurringOp_Stopped(rsc, start, node, operation, data_set); } } } } static void handle_migration_actions(pe_resource_t * rsc, pe_node_t *current, pe_node_t *chosen, pe_working_set_t * data_set) { pe_action_t *migrate_to = NULL; pe_action_t *migrate_from = NULL; pe_action_t *start = NULL; pe_action_t *stop = NULL; gboolean partial = rsc->partial_migration_target ? TRUE : FALSE; pe_rsc_trace(rsc, "Processing migration actions %s moving from %s to %s . partial migration = %s", rsc->id, current->details->id, chosen->details->id, partial ? "TRUE" : "FALSE"); start = start_action(rsc, chosen, TRUE); stop = stop_action(rsc, current, TRUE); if (partial == FALSE) { migrate_to = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), RSC_MIGRATE, current, TRUE, TRUE, data_set); } migrate_from = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), RSC_MIGRATED, chosen, TRUE, TRUE, data_set); if ((migrate_to && migrate_from) || (migrate_from && partial)) { pe__set_action_flags(start, pe_action_migrate_runnable); pe__set_action_flags(stop, pe_action_migrate_runnable); // This is easier than trying to delete it from the graph pe__set_action_flags(start, pe_action_pseudo); /* order probes before migrations */ if (partial) { pe__set_action_flags(migrate_from, pe_action_migrate_runnable); migrate_from->needs = start->needs; pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional, data_set); } else { pe__set_action_flags(migrate_from, pe_action_migrate_runnable); pe__set_action_flags(migrate_to, pe_action_migrate_runnable); migrate_to->needs = start->needs; pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_optional, data_set); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional|pe_order_implies_first_migratable, data_set); } pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_optional|pe_order_implies_first_migratable, data_set); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional|pe_order_implies_first_migratable|pe_order_pseudo_left, data_set); } if (migrate_to) { add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); /* Pacemaker Remote connections don't require pending to be recorded in * the CIB. We can reduce CIB writes by not setting PENDING for them. */ if (rsc->is_remote_node == FALSE) { /* migrate_to takes place on the source node, but can * have an effect on the target node depending on how * the agent is written. Because of this, we have to maintain * a record that the migrate_to occurred, in case the source node * loses membership while the migrate_to action is still in-flight. */ add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true"); } } if (migrate_from) { add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); } } /*! * \internal * \brief Schedule actions to bring resource down and back to current role * * \param[in] rsc Resource to restart * \param[in] current Node that resource should be brought down on * \param[in] chosen Node that resource should be brought up on * \param[in] need_stop Whether the resource must be stopped * \param[in] need_promote Whether the resource must be promoted * * \return Role that resource would have after scheduled actions are taken */ static void schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current, pe_node_t *chosen, bool need_stop, bool need_promote) { enum rsc_role_e role = rsc->role; enum rsc_role_e next_role; pe__set_resource_flags(rsc, pe_rsc_restarting); // Bring resource down to a stop on its current node while (role != RSC_ROLE_STOPPED) { next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED]; pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s", (need_stop? "required" : "optional"), rsc->id, role2text(role), role2text(next_role)); if (!rsc_action_matrix[role][next_role](rsc, current, !need_stop, rsc->cluster)) { break; } role = next_role; } // Bring resource up to its next role on its next node while ((rsc->role <= rsc->next_role) && (role != rsc->role) && !pcmk_is_set(rsc->flags, pe_rsc_block)) { bool required = need_stop; next_role = rsc_state_matrix[role][rsc->role]; if ((next_role == RSC_ROLE_PROMOTED) && need_promote) { required = true; } pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s", (required? "required" : "optional"), rsc->id, role2text(role), role2text(next_role)); if (!rsc_action_matrix[role][next_role](rsc, chosen, !required, rsc->cluster)) { break; } role = next_role; } pe__clear_resource_flags(rsc, pe_rsc_restarting); } void native_create_actions(pe_resource_t *rsc) { pe_action_t *start = NULL; pe_node_t *chosen = NULL; pe_node_t *current = NULL; gboolean need_stop = FALSE; bool need_promote = FALSE; gboolean is_moving = FALSE; gboolean allow_migrate = FALSE; GList *gIter = NULL; unsigned int num_all_active = 0; unsigned int num_clean_active = 0; bool multiply_active = FALSE; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; CRM_ASSERT(rsc != NULL); allow_migrate = pcmk_is_set(rsc->flags, pe_rsc_allow_migrate)? TRUE : FALSE; chosen = rsc->allocated_to; next_role = rsc->next_role; if (next_role == RSC_ROLE_UNKNOWN) { pe__set_next_role(rsc, (chosen == NULL)? RSC_ROLE_STOPPED : RSC_ROLE_STARTED, "allocation"); } pe_rsc_trace(rsc, "Creating all actions for %s transition from %s to %s (%s) on %s", rsc->id, role2text(rsc->role), role2text(rsc->next_role), ((next_role == RSC_ROLE_UNKNOWN)? "implicit" : "explicit"), ((chosen == NULL)? "no node" : chosen->details->uname)); current = pe__find_active_on(rsc, &num_all_active, &num_clean_active); for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) { pe_node_t *dangling_source = (pe_node_t *) gIter->data; pe_action_t *stop = NULL; pe_rsc_trace(rsc, "Creating stop action %sfor %s on %s due to dangling migration", pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop)? "and cleanup " : "", rsc->id, dangling_source->details->uname); stop = stop_action(rsc, dangling_source, FALSE); pe__set_action_flags(stop, pe_action_dangle); if (pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, dangling_source, FALSE, rsc->cluster); } } if ((num_all_active == 2) && (num_clean_active == 2) && chosen && rsc->partial_migration_source && rsc->partial_migration_target && (current->details == rsc->partial_migration_source->details) && (chosen->details == rsc->partial_migration_target->details)) { /* The chosen node is still the migration target from a partial * migration. Attempt to continue the migration instead of recovering * by stopping the resource everywhere and starting it on a single node. */ pe_rsc_trace(rsc, "Will attempt to continue with partial migration " "to target %s from %s", rsc->partial_migration_target->details->id, rsc->partial_migration_source->details->id); } else if (!pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) { /* If a resource has "requires" set to nothing or quorum, don't consider * it active on unclean nodes (similar to how all resources behave when * stonith-enabled is false). We can start such resources elsewhere * before fencing completes, and if we considered the resource active on * the failed node, we would attempt recovery for being active on * multiple nodes. */ multiply_active = (num_clean_active > 1); } else { multiply_active = (num_all_active > 1); } if (multiply_active) { if (rsc->partial_migration_target && rsc->partial_migration_source) { // Migration was in progress, but we've chosen a different target crm_notice("Resource %s can no longer migrate from %s to %s " "(will stop on both nodes)", rsc->id, rsc->partial_migration_source->details->uname, rsc->partial_migration_target->details->uname); multiply_active = false; } else { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); // Resource was (possibly) incorrectly multiply active pe_proc_err("%s resource %s might be active on %u nodes (%s)", pcmk__s(class, "Untyped"), rsc->id, num_all_active, recovery2text(rsc->recovery_type)); crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ#Resource_is_Too_Active for more information"); } switch (rsc->recovery_type) { case recovery_stop_start: need_stop = TRUE; break; case recovery_stop_unexpected: need_stop = TRUE; // StopRsc() will skip expected node pe__set_resource_flags(rsc, pe_rsc_stop_unexpected); break; default: break; } /* If by chance a partial migration is in process, but the migration * target is not chosen still, clear all partial migration data. */ rsc->partial_migration_source = rsc->partial_migration_target = NULL; allow_migrate = FALSE; } if (!multiply_active) { pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected); } if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) { pe_rsc_trace(rsc, "Creating start action for %s to represent already pending start", rsc->id); start = start_action(rsc, chosen, TRUE); pe__set_action_flags(start, pe_action_print_always); } if (current && chosen && current->details != chosen->details) { pe_rsc_trace(rsc, "Moving %s from %s to %s", rsc->id, pcmk__s(current->details->uname, "unknown node"), pcmk__s(chosen->details->uname, "unknown node")); is_moving = TRUE; need_stop = TRUE; } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { if (pcmk_is_set(rsc->flags, pe_rsc_stop)) { need_stop = TRUE; pe_rsc_trace(rsc, "Recovering %s", rsc->id); } else { pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id); if (rsc->next_role == RSC_ROLE_PROMOTED) { need_promote = TRUE; } } } else if (pcmk_is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id); need_stop = TRUE; } else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) { pe_rsc_trace(rsc, "Creating start action for promoted resource %s", rsc->id); start = start_action(rsc, chosen, TRUE); if (!pcmk_is_set(start->flags, pe_action_optional)) { // Recovery of a promoted resource pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id); need_stop = TRUE; } } /* Create any additional actions required when bringing resource down and * back up to same level. */ schedule_restart_actions(rsc, current, chosen, need_stop, need_promote); /* Required steps from this role to the next */ role = rsc->role; while (role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; pe_rsc_trace(rsc, "Creating action to take %s from %s to %s (ending at %s)", rsc->id, role2text(role), role2text(next_role), role2text(rsc->next_role)); if (!rsc_action_matrix[role][next_role](rsc, chosen, FALSE, rsc->cluster)) { break; } role = next_role; } if (pcmk_is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Not creating recurring monitors for blocked resource %s", rsc->id); } else if ((rsc->next_role != RSC_ROLE_STOPPED) || !pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Creating recurring monitors for %s resource %s", ((rsc->next_role == RSC_ROLE_STOPPED)? "unmanaged" : "active"), rsc->id); start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, rsc->cluster); Recurring_Stopped(rsc, start, chosen, rsc->cluster); } else { pe_rsc_trace(rsc, "Creating recurring monitors for inactive resource %s", rsc->id); Recurring_Stopped(rsc, NULL, NULL, rsc->cluster); } /* if we are stuck in a partial migration, where the target * of the partial migration no longer matches the chosen target. * A full stop/start is required */ if (rsc->partial_migration_target && (chosen == NULL || rsc->partial_migration_target->details != chosen->details)) { pe_rsc_trace(rsc, "Not allowing partial migration of %s to continue", rsc->id); allow_migrate = FALSE; } else if (!is_moving || !pcmk_is_set(rsc->flags, pe_rsc_managed) || pcmk_any_flags_set(rsc->flags, pe_rsc_failed|pe_rsc_start_pending) || (current && current->details->unclean) || rsc->next_role < RSC_ROLE_STARTED) { allow_migrate = FALSE; } if (allow_migrate) { handle_migration_actions(rsc, current, chosen, rsc->cluster); } } static void rsc_avoids_remote_nodes(pe_resource_t *rsc) { GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->details->remote_rsc) { node->weight = -INFINITY; } } } /*! * \internal * \brief Return allowed nodes as (possibly sorted) list * * Convert a resource's hash table of allowed nodes to a list. If printing to * stdout, sort the list, to keep action ID numbers consistent for regression * test output (while avoiding the performance hit on a live cluster). * * \param[in] rsc Resource to check for allowed nodes * \param[in] data_set Cluster working set * * \return List of resource's allowed nodes * \note Callers should take care not to rely on the list being sorted. */ static GList * allowed_nodes_as_list(pe_resource_t *rsc, pe_working_set_t *data_set) { GList *allowed_nodes = NULL; if (rsc->allowed_nodes) { allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes); } if (!pcmk__is_daemon) { allowed_nodes = g_list_sort(allowed_nodes, sort_node_uname); } return allowed_nodes; } void native_internal_constraints(pe_resource_t *rsc) { /* This function is on the critical path and worth optimizing as much as possible */ pe_resource_t *top = NULL; GList *allowed_nodes = NULL; bool check_unfencing = FALSE; bool check_utilization = false; if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping native constraints for unmanaged resource: %s", rsc->id); return; } top = uber_parent(rsc); // Whether resource requires unfencing check_unfencing = !pcmk_is_set(rsc->flags, pe_rsc_fence_device) && pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing) && pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing); // Whether a non-default placement strategy is used check_utilization = (g_hash_table_size(rsc->utilization) > 0) && !pcmk__str_eq(rsc->cluster->placement_strategy, "default", pcmk__str_casei); // Order stops before starts (i.e. restart) pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional|pe_order_implies_then|pe_order_restart, rsc->cluster); // Promotable ordering: demote before stop, start before promote if (pcmk_is_set(top->flags, pe_rsc_promotable) || (rsc->role > RSC_ROLE_UNPROMOTED)) { pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_DEMOTE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_promoted_implies_first, rsc->cluster); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_PROMOTE, 0), NULL, pe_order_runnable_left, rsc->cluster); } // Don't clear resource history if probing on same node pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0), NULL, pe_order_same_node|pe_order_then_cancels_first, rsc->cluster); // Certain checks need allowed nodes if (check_unfencing || check_utilization || rsc->container) { allowed_nodes = allowed_nodes_as_list(rsc, rsc->cluster); } if (check_unfencing) { /* Check if the node needs to be unfenced first */ for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *node = item->data; pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, rsc->cluster); crm_debug("Ordering any stops of %s before %s, and any starts after", rsc->id, unfence->uuid); /* * It would be more efficient to order clone resources once, * rather than order each instance, but ordering the instance * allows us to avoid unnecessary dependencies that might conflict * with user constraints. * * @TODO: This constraint can still produce a transition loop if the * resource has a stop scheduled on the node being unfenced, and * there is a user ordering constraint to start some other resource * (which will be ordered after the unfence) before stopping this * resource. An example is "start some slow-starting cloned service * before stopping an associated virtual IP that may be moving to * it": * stop this -> unfencing -> start that -> stop this */ pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL, strdup(unfence->uuid), unfence, pe_order_optional|pe_order_same_node, rsc->cluster); pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence, rsc, start_key(rsc), NULL, pe_order_implies_then_on_node|pe_order_same_node, rsc->cluster); } } if (check_utilization) { pcmk__create_utilization_constraints(rsc, allowed_nodes); } if (rsc->container) { pe_resource_t *remote_rsc = NULL; if (rsc->is_remote_node) { // rsc is the implicit remote connection for a guest or bundle node /* Do not allow a guest resource to live on a Pacemaker Remote node, * to avoid nesting remotes. However, allow bundles to run on remote * nodes. */ if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { rsc_avoids_remote_nodes(rsc->container); } /* If someone cleans up a guest or bundle node's container, we will * likely schedule a (re-)probe of the container and recovery of the * connection. Order the connection stop after the container probe, * so that if we detect the container running, we will trigger a new * transition and avoid the unnecessary recovery. */ pcmk__order_resource_actions(rsc->container, RSC_STATUS, rsc, RSC_STOP, pe_order_optional); /* A user can specify that a resource must start on a Pacemaker Remote * node by explicitly configuring it with the container=NODENAME * meta-attribute. This is of questionable merit, since location * constraints can accomplish the same thing. But we support it, so here * we check whether a resource (that is not itself a remote connection) * has container set to a remote node or guest node resource. */ } else if (rsc->container->is_remote_node) { remote_rsc = rsc->container; } else { remote_rsc = pe__resource_contains_guest_node(rsc->cluster, rsc->container); } if (remote_rsc) { /* Force the resource on the Pacemaker Remote node instead of * colocating the resource with the container resource. */ for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *node = item->data; if (node->details->remote_rsc != remote_rsc) { node->weight = -INFINITY; } } } else { /* This resource is either a filler for a container that does NOT * represent a Pacemaker Remote node, or a Pacemaker Remote * connection resource for a guest node or bundle. */ int score; crm_trace("Order and colocate %s relative to its container %s", rsc->id, rsc->container->id); pcmk__new_ordering(rsc->container, pcmk__op_key(rsc->container->id, RSC_START, 0), NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_implies_then|pe_order_runnable_left, rsc->cluster); pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, rsc->container, pcmk__op_key(rsc->container->id, RSC_STOP, 0), NULL, pe_order_implies_first, rsc->cluster); if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) { score = 10000; /* Highly preferred but not essential */ } else { score = INFINITY; /* Force them to run on the same host */ } pcmk__new_colocation("resource-with-container", NULL, score, rsc, rsc->container, NULL, NULL, true, rsc->cluster); } } if (rsc->is_remote_node || pcmk_is_set(rsc->flags, pe_rsc_fence_device)) { /* don't allow remote nodes to run stonith devices * or remote connection resources.*/ rsc_avoids_remote_nodes(rsc); } g_list_free(allowed_nodes); } /*! * \internal * \brief Apply a colocation's score to node weights or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node weights (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent */ void pcmk__primitive_apply_coloc_score(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *colocation, bool for_dependent) { enum pcmk__coloc_affects filter_results; CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL), return); if (for_dependent) { // Always process on behalf of primary resource primary->cmds->apply_coloc_score(dependent, primary, colocation, false); return; } filter_results = pcmk__colocation_affects(dependent, primary, colocation, false); pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)", ((colocation->score > 0)? "Colocating" : "Anti-colocating"), dependent->id, primary->id, colocation->id, colocation->score, filter_results); switch (filter_results) { case pcmk__coloc_affects_role: pcmk__apply_coloc_to_priority(dependent, primary, colocation); break; case pcmk__coloc_affects_location: pcmk__apply_coloc_to_weights(dependent, primary, colocation); break; default: // pcmk__coloc_affects_nothing return; } } enum pe_action_flags native_action_flags(pe_action_t * action, pe_node_t * node) { return action->flags; } void native_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { pcmk__apply_location(constraint, rsc); } -/*! - * \internal - * \brief Add a resource's actions to the transition graph - * - * \param[in] rsc Resource whose actions should be added - */ -void -native_expand(pe_resource_t *rsc) -{ - GList *gIter = NULL; - - CRM_ASSERT(rsc); - pe_rsc_trace(rsc, "Processing actions from %s", rsc->id); - - for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { - pe_action_t *action = (pe_action_t *) gIter->data; - - crm_trace("processing action %d for rsc=%s", action->id, rsc->id); - pcmk__add_action_to_graph(action, rsc->cluster); - } - - for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { - pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; - - child_rsc->cmds->add_actions_to_graph(child_rsc); - } -} - /*! * \internal * \brief Check whether a node is a multiply active resource's expected node * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return true if \p rsc is multiply active with multiple-active set to * stop_unexpected, and \p node is the node where it will remain active * \note This assumes that the resource's next role cannot be changed to stopped * after this is called, which should be reasonable if status has already * been unpacked and resources have been assigned to nodes. */ static bool is_expected_node(const pe_resource_t *rsc, const pe_node_t *node) { return pcmk_all_flags_set(rsc->flags, pe_rsc_stop_unexpected|pe_rsc_restarting) && (rsc->next_role > RSC_ROLE_STOPPED) && (rsc->allocated_to != NULL) && (node != NULL) && (rsc->allocated_to->details == node->details); } gboolean StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { GList *gIter = NULL; CRM_ASSERT(rsc); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *current = (pe_node_t *) gIter->data; pe_action_t *stop; if (is_expected_node(rsc, current)) { /* We are scheduling restart actions for a multiply active resource * with multiple-active=stop_unexpected, and this is where it should * not be stopped. */ pe_rsc_trace(rsc, "Skipping stop of multiply active resource %s " "on expected node %s", rsc->id, current->details->uname); continue; } if (rsc->partial_migration_target) { if (rsc->partial_migration_target->details == current->details // Only if the allocated node still is the migration target. && rsc->allocated_to && rsc->allocated_to->details == rsc->partial_migration_target->details) { pe_rsc_trace(rsc, "Skipping stop of %s on %s " "because migration to %s in progress", rsc->id, current->details->uname, next->details->uname); continue; } else { pe_rsc_trace(rsc, "Forcing stop of %s on %s " "because migration target changed", rsc->id, current->details->uname); optional = FALSE; } } pe_rsc_trace(rsc, "Scheduling stop of %s on %s", rsc->id, current->details->uname); stop = stop_action(rsc, current, optional); if(rsc->allocated_to == NULL) { pe_action_set_reason(stop, "node availability", TRUE); } else if (pcmk_all_flags_set(rsc->flags, pe_rsc_restarting |pe_rsc_stop_unexpected)) { /* We are stopping a multiply active resource on a node that is * not its expected node, and we are still scheduling restart * actions, so the stop is for being multiply active. */ pe_action_set_reason(stop, "being multiply active", TRUE); } if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { pe__clear_action_flags(stop, pe_action_runnable); } if (pcmk_is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, optional, data_set); } if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) { pe_action_t *unfence = pe_fence_op(current, "on", TRUE, NULL, FALSE, data_set); order_actions(stop, unfence, pe_order_implies_first); if (!pcmk__node_unfenced(current)) { pe_proc_err("Stopping %s until %s can be unfenced", rsc->id, current->details->uname); } } } return TRUE; } gboolean StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { pe_action_t *start = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (weight=%d)", (optional? "optional" : "required"), rsc->id, ((next == NULL)? "N/A" : next->details->uname), ((next == NULL)? 0 : next->weight)); start = start_action(rsc, next, TRUE); pcmk__order_vs_unfence(rsc, next, start, pe_order_implies_then); if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) { pe__clear_action_flags(start, pe_action_optional); } if (is_expected_node(rsc, next)) { /* This could be a problem if the start becomes necessary for other * reasons later. */ pe_rsc_trace(rsc, "Start of multiply active resouce %s " "on expected node %s will be a pseudo-action", rsc->id, next->details->uname); pe__set_action_flags(start, pe_action_pseudo); } return TRUE; } gboolean PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { GList *gIter = NULL; gboolean runnable = TRUE; GList *action_list = NULL; CRM_ASSERT(rsc); CRM_CHECK(next != NULL, return FALSE); pe_rsc_trace(rsc, "%s on %s", rsc->id, next->details->uname); action_list = pe__resource_actions(rsc, next, RSC_START, TRUE); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { pe_action_t *start = (pe_action_t *) gIter->data; if (!pcmk_is_set(start->flags, pe_action_runnable)) { runnable = FALSE; } } g_list_free(action_list); if (runnable) { pe_action_t *promote = promote_action(rsc, next, optional); if (is_expected_node(rsc, next)) { /* This could be a problem if the promote becomes necessary for * other reasons later. */ pe_rsc_trace(rsc, "Promotion of multiply active resouce %s " "on expected node %s will be a pseudo-action", rsc->id, next->details->uname); pe__set_action_flags(promote, pe_action_pseudo); } return TRUE; } pe_rsc_debug(rsc, "%s\tPromote %s (canceled)", next->details->uname, rsc->id); action_list = pe__resource_actions(rsc, next, RSC_PROMOTE, TRUE); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { pe_action_t *promote = (pe_action_t *) gIter->data; pe__clear_action_flags(promote, pe_action_runnable); } g_list_free(action_list); return TRUE; } gboolean DemoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { GList *gIter = NULL; CRM_ASSERT(rsc); if (is_expected_node(rsc, next)) { pe_rsc_trace(rsc, "Skipping demote of multiply active resource %s " "on expected node %s", rsc->id, next->details->uname); return TRUE; } pe_rsc_trace(rsc, "%s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_UNPROMOTED, return FALSE); */ for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *current = (pe_node_t *) gIter->data; pe_rsc_trace(rsc, "%s on %s", rsc->id, next ? next->details->uname : "N/A"); demote_action(rsc, current, optional); } return TRUE; } gboolean RoleError(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); crm_err("%s on %s", rsc->id, next ? next->details->uname : "N/A"); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); return FALSE; } gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set) { if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if (node == NULL) { pe_rsc_trace(rsc, "Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if (node->details->unclean || node->details->online == FALSE) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete_action(rsc, node, optional); pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_DELETE, optional? pe_order_implies_then : pe_order_optional); pcmk__order_resource_actions(rsc, RSC_DELETE, rsc, RSC_START, optional? pe_order_implies_then : pe_order_optional); return TRUE; } gboolean native_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete, gboolean force) { enum pe_ordering flags = pe_order_optional; char *key = NULL; pe_action_t *probe = NULL; pe_node_t *running = NULL; pe_node_t *allowed = NULL; pe_resource_t *top = uber_parent(rsc); static const char *rc_promoted = NULL; static const char *rc_inactive = NULL; if (rc_inactive == NULL) { rc_inactive = pcmk__itoa(PCMK_OCF_NOT_RUNNING); rc_promoted = pcmk__itoa(PCMK_OCF_RUNNING_PROMOTED); } CRM_CHECK(node != NULL, return FALSE); if (!force && !pcmk_is_set(rsc->cluster->flags, pe_flag_startup_probes)) { pe_rsc_trace(rsc, "Skipping active resource detection for %s", rsc->id); return FALSE; } if (pe__is_guest_or_remote_node(node)) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { pe_rsc_trace(rsc, "Skipping probe for %s on %s because Pacemaker Remote nodes cannot run stonith agents", rsc->id, node->details->id); return FALSE; } else if (pe__is_guest_node(node) && pe__resource_contains_guest_node(rsc->cluster, rsc)) { pe_rsc_trace(rsc, "Skipping probe for %s on %s because guest nodes cannot run resources containing guest nodes", rsc->id, node->details->id); return FALSE; } else if (rsc->is_remote_node) { pe_rsc_trace(rsc, "Skipping probe for %s on %s because Pacemaker Remote nodes cannot host remote connections", rsc->id, node->details->id); return FALSE; } } if (rsc->children) { GList *gIter = NULL; gboolean any_created = FALSE; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; any_created = child_rsc->cmds->create_probe(child_rsc, node, complete, force) || any_created; } return any_created; } else if ((rsc->container) && (!rsc->is_remote_node)) { pe_rsc_trace(rsc, "Skipping %s: it is within container %s", rsc->id, rsc->container->id); return FALSE; } if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_trace(rsc, "Skipping orphan: %s", rsc->id); return FALSE; } // Check whether resource is already known on node if (!force && g_hash_table_lookup(rsc->known_on, node->details->id)) { pe_rsc_trace(rsc, "Skipping known: %s on %s", rsc->id, node->details->uname); return FALSE; } allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (rsc->exclusive_discover || top->exclusive_discover) { if (allowed == NULL) { /* exclusive discover is enabled and this node is not in the allowed list. */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, A", rsc->id, node->details->id); return FALSE; } else if (allowed->rsc_discover_mode != pe_discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, B", rsc->id, node->details->id); return FALSE; } } if(allowed == NULL && node->rsc_discover_mode == pe_discover_never) { /* If this node was allowed to host this resource it would * have been explicitly added to the 'allowed_nodes' list. * However it wasn't and the node has discovery disabled, so * no need to probe for this resource. */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, C", rsc->id, node->details->id); return FALSE; } if (allowed && allowed->rsc_discover_mode == pe_discover_never) { /* this resource is marked as not needing to be discovered on this node */ pe_rsc_trace(rsc, "Skipping probe for %s on node %s, discovery mode", rsc->id, node->details->id); return FALSE; } if (pe__is_guest_node(node)) { pe_resource_t *remote = node->details->remote_rsc->container; if(remote->role == RSC_ROLE_STOPPED) { /* If the container is stopped, then we know anything that * might have been inside it is also stopped and there is * no need to probe. * * If we don't know the container's state on the target * either: * * - the container is running, the transition will abort * and we'll end up in a different case next time, or * * - the container is stopped * * Either way there is no need to probe. * */ if(remote->allocated_to && g_hash_table_lookup(remote->known_on, remote->allocated_to->details->id) == NULL) { /* For safety, we order the 'rsc' start after 'remote' * has been probed. * * Using 'top' helps for groups, but we may need to * follow the start's ordering chain backwards. */ pcmk__new_ordering(remote, pcmk__op_key(remote->id, RSC_STATUS, 0), NULL, top, pcmk__op_key(top->id, RSC_START, 0), NULL, pe_order_optional, rsc->cluster); } pe_rsc_trace(rsc, "Skipping probe for %s on node %s, %s is stopped", rsc->id, node->details->id, remote->id); return FALSE; /* Here we really we want to check if remote->stop is required, * but that information doesn't exist yet */ } else if(node->details->remote_requires_reset || node->details->unclean || pcmk_is_set(remote->flags, pe_rsc_failed) || remote->next_role == RSC_ROLE_STOPPED || (remote->allocated_to && pe_find_node(remote->running_on, remote->allocated_to->details->uname) == NULL) ) { /* The container is stopping or restarting, don't start * 'rsc' until 'remote' stops as this also implies that * 'rsc' is stopped - avoiding the need to probe */ pcmk__new_ordering(remote, pcmk__op_key(remote->id, RSC_STOP, 0), NULL, top, pcmk__op_key(top->id, RSC_START, 0), NULL, pe_order_optional, rsc->cluster); pe_rsc_trace(rsc, "Skipping probe for %s on node %s, %s is stopping, restarting or moving", rsc->id, node->details->id, remote->id); return FALSE; /* } else { * The container is running so there is no problem probing it */ } } key = pcmk__op_key(rsc->id, RSC_STATUS, 0); probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, rsc->cluster); pe__clear_action_flags(probe, pe_action_optional); pcmk__order_vs_unfence(rsc, node, probe, pe_order_optional); /* * We need to know if it's running_on (not just known_on) this node * to correctly determine the target rc. */ running = pe_find_node_id(rsc->running_on, node->details->id); if (running == NULL) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); } else if (rsc->role == RSC_ROLE_PROMOTED) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_promoted); } crm_debug("Probing %s on %s (%s) %d %p", rsc->id, node->details->uname, role2text(rsc->role), pcmk_is_set(probe->flags, pe_action_runnable), rsc->running_on); if ((pcmk_is_set(rsc->flags, pe_rsc_fence_device) && pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing)) || !pe_rsc_is_clone(top)) { top = rsc; } else { crm_trace("Probing %s on %s (%s) as %s", rsc->id, node->details->uname, role2text(rsc->role), top->id); } if (!pcmk_is_set(probe->flags, pe_action_runnable) && (rsc->running_on == NULL)) { /* Prevent the start from occurring if rsc isn't active, but * don't cause it to stop if it was active already */ pe__set_order_flags(flags, pe_order_runnable_left); } pcmk__new_ordering(rsc, NULL, probe, top, pcmk__op_key(top->id, RSC_START, 0), NULL, flags, rsc->cluster); // Order the probe before any agent reload pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL, pe_order_optional, rsc->cluster); return TRUE; } void native_append_meta(pe_resource_t * rsc, xmlNode * xml) { char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); pe_resource_t *parent; if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_INCARNATION); crm_xml_add(xml, name, value); free(name); } value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE); if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE); crm_xml_add(xml, name, value); free(name); } for (parent = rsc; parent != NULL; parent = parent->parent) { if (parent->container) { crm_xml_add(xml, CRM_META"_"XML_RSC_ATTR_CONTAINER, parent->container->id); } } } // Primitive implementation of resource_alloc_functions_t:add_utilization() void pcmk__primitive_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { return; } pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization", orig_rsc->id, rsc->id); pcmk__release_node_capacity(utilization, rsc); } /*! * \internal * \brief Get epoch time of node's shutdown attribute (or now if none) * * \param[in] node Node to check * \param[in] data_set Cluster working set * * \return Epoch time corresponding to shutdown attribute if set or now if not */ static time_t shutdown_time(pe_node_t *node, pe_working_set_t *data_set) { const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); time_t result = 0; if (shutdown != NULL) { long long result_ll; if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) { result = (time_t) result_ll; } } return (result == 0)? get_effective_time(data_set) : result; } // Primitive implementation of resource_alloc_functions_t:shutdown_lock() void pcmk__primitive_shutdown_lock(pe_resource_t *rsc) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); // Fence devices and remote connections can't be locked if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches) || pe__resource_is_remote_conn(rsc, rsc->cluster)) { return; } if (rsc->lock_node != NULL) { // The lock was obtained from resource history if (rsc->running_on != NULL) { /* The resource was started elsewhere even though it is now * considered locked. This shouldn't be possible, but as a * failsafe, we don't want to disturb the resource now. */ pe_rsc_info(rsc, "Cancelling shutdown lock because %s is already active", rsc->id); pe__clear_resource_history(rsc, rsc->lock_node, rsc->cluster); rsc->lock_node = NULL; rsc->lock_time = 0; } // Only a resource active on exactly one node can be locked } else if (pcmk__list_of_1(rsc->running_on)) { pe_node_t *node = rsc->running_on->data; if (node->details->shutdown) { if (node->details->unclean) { pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown", rsc->id, node->details->uname); } else { rsc->lock_node = node; rsc->lock_time = shutdown_time(node, rsc->cluster); } } } if (rsc->lock_node == NULL) { // No lock needed return; } if (rsc->cluster->shutdown_lock > 0) { time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock; pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)", rsc->id, rsc->lock_node->details->uname, (long long) lock_expiration); pe__update_recheck_time(++lock_expiration, rsc->cluster); } else { pe_rsc_info(rsc, "Locking %s to %s due to shutdown", rsc->id, rsc->lock_node->details->uname); } // If resource is locked to one node, ban it from all other nodes for (GList *item = rsc->cluster->nodes; item != NULL; item = item->next) { pe_node_t *node = item->data; if (strcmp(node->details->uname, rsc->lock_node->details->uname)) { resource_location(rsc, node, -CRM_SCORE_INFINITY, XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster); } } }