Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4624338
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
100 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c
index 92a4056446..05e2e14230 100644
--- a/lib/pacemaker/pcmk_graph_producer.c
+++ b/lib/pacemaker/pcmk_graph_producer.c
@@ -1,1121 +1,1114 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <glib.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
// Convenience macros for logging action properties
#define action_type_str(flags) \
(pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action")
#define action_optional_str(flags) \
(pcmk_is_set((flags), pe_action_optional)? "optional" : "required")
#define action_runnable_str(flags) \
(pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable")
#define action_node_str(a) \
(((a)->node == NULL)? "no node" : (a)->node->details->uname)
/*!
* \internal
* \brief Add an XML node tag for a specified ID
*
* \param[in] id Node UUID to add
* \param[in,out] xml Parent XML tag to add to
*/
static xmlNode*
add_node_to_xml_by_id(const char *id, xmlNode *xml)
{
xmlNode *node_xml;
node_xml = create_xml_node(xml, XML_CIB_TAG_NODE);
crm_xml_add(node_xml, XML_ATTR_UUID, id);
return node_xml;
}
/*!
* \internal
* \brief Add an XML node tag for a specified node
*
* \param[in] node Node to add
* \param[in,out] xml XML to add node to
*/
static void
add_node_to_xml(const pe_node_t *node, void *xml)
{
add_node_to_xml_by_id(node->details->id, (xmlNode *) xml);
}
/*!
* \internal
* \brief Add XML with nodes that need an update of their maintenance state
*
* \param[in,out] xml Parent XML tag to add to
* \param[in] data_set Working set for cluster
*/
static int
add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set)
{
GList *gIter = NULL;
xmlNode *maintenance =
xml?create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE):NULL;
int count = 0;
for (gIter = data_set->nodes; gIter != NULL;
gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
struct pe_node_shared_s *details = node->details;
if (!pe__is_guest_or_remote_node(node)) {
continue; /* just remote nodes need to know atm */
}
if (details->maintenance != details->remote_maintenance) {
if (maintenance) {
crm_xml_add(
add_node_to_xml_by_id(node->details->id, maintenance),
XML_NODE_IS_MAINTENANCE, details->maintenance?"1":"0");
}
count++;
}
}
crm_trace("%s %d nodes to adjust maintenance-mode "
"to transition", maintenance?"Added":"Counted", count);
return count;
}
/*!
* \internal
* \brief Add pseudo action with nodes needing maintenance state update
*
* \param[in,out] data_set Working set for cluster
*/
static void
add_maintenance_update(pe_working_set_t *data_set)
{
pe_action_t *action = NULL;
if (add_maintenance_nodes(NULL, data_set)) {
crm_trace("adding maintenance state update pseudo action");
action = get_pseudo_op(CRM_OP_MAINTENANCE_NODES, data_set);
pe__set_action_flags(action, pe_action_print_always);
}
}
/*!
* \internal
* \brief Add XML with nodes that an action is expected to bring down
*
* If a specified action is expected to bring any nodes down, add an XML block
* with their UUIDs. When a node is lost, this allows the controller to
* determine whether it was expected.
*
* \param[in,out] xml Parent XML tag to add to
* \param[in] action Action to check for downed nodes
* \param[in] data_set Working set for cluster
*/
static void
add_downed_nodes(xmlNode *xml, const pe_action_t *action,
const pe_working_set_t *data_set)
{
CRM_CHECK(xml && action && action->node && data_set, return);
if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
/* Shutdown makes the action's node down */
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->node->details->id, downed);
} else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) {
/* Fencing makes the action's node and any hosted guest nodes down */
const char *fence = g_hash_table_lookup(action->meta, "stonith_action");
if (pcmk__is_fencing_action(fence)) {
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->node->details->id, downed);
pe_foreach_guest_node(data_set, action->node, add_node_to_xml, downed);
}
} else if (action->rsc && action->rsc->is_remote_node
&& pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) {
/* Stopping a remote connection resource makes connected node down,
* unless it's part of a migration
*/
GList *iter;
pe_action_t *input;
gboolean migrating = FALSE;
for (iter = action->actions_before; iter != NULL; iter = iter->next) {
input = ((pe_action_wrapper_t *) iter->data)->action;
if (input->rsc && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_casei)
&& pcmk__str_eq(input->task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
migrating = TRUE;
break;
}
}
if (!migrating) {
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->rsc->id, downed);
}
}
}
/*!
* \internal
* \brief Create a transition graph operation key for a clone action
*
* \param[in] action Clone action
* \param[in] interval_ms Action interval in milliseconds
*
* \return Newly allocated string with transition graph operation key
*/
static char *
clone_op_key(pe_action_t *action, guint interval_ms)
{
if (pcmk__str_eq(action->task, RSC_NOTIFY, pcmk__str_none)) {
const char *n_type = g_hash_table_lookup(action->meta, "notify_type");
const char *n_task = g_hash_table_lookup(action->meta,
"notify_operation");
CRM_LOG_ASSERT((n_type != NULL) && (n_task != NULL));
return pcmk__notify_key(action->rsc->clone_name, n_type, n_task);
} else if (action->cancel_task != NULL) {
return pcmk__op_key(action->rsc->clone_name, action->cancel_task,
interval_ms);
} else {
return pcmk__op_key(action->rsc->clone_name, action->task, interval_ms);
}
}
/*!
* \internal
* \brief Add node details to transition graph action XML
*
* \param[in] action Scheduled action
* \param[in] xml Transition graph action XML for \p action
*/
static void
add_node_details(pe_action_t *action, xmlNode *xml)
{
pe_node_t *router_node = pcmk__connection_host_for_action(action);
crm_xml_add(xml, XML_LRM_ATTR_TARGET, action->node->details->uname);
crm_xml_add(xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id);
if (router_node != NULL) {
crm_xml_add(xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname);
}
}
/*!
* \internal
* \brief Add resource details to transition graph action XML
*
* \param[in] action Scheduled action
* \param[in] action_xml Transition graph action XML for \p action
*/
static void
add_resource_details(pe_action_t *action, xmlNode *action_xml)
{
xmlNode *rsc_xml = NULL;
const char *attr_list[] = {
XML_AGENT_ATTR_CLASS,
XML_AGENT_ATTR_PROVIDER,
XML_ATTR_TYPE
};
/* If a resource is locked to a node via shutdown-lock, mark its actions
* so the controller can preserve the lock when the action completes.
*/
if (pcmk__action_locks_rsc_to_node(action)) {
crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
(long long) action->rsc->lock_time);
}
// List affected resource
rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml));
if (pcmk_is_set(action->rsc->flags, pe_rsc_orphan)
&& (action->rsc->clone_name != NULL)) {
/* Use the numbered instance name here, because if there is more
* than one instance on a node, we need to make sure the command
* goes to the right one.
*
* This is important even for anonymous clones, because the clone's
* unique meta-attribute might have just been toggled from on to
* off.
*/
crm_debug("Using orphan clone name %s instead of %s",
action->rsc->id, action->rsc->clone_name);
crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name);
crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id);
} else if (!pcmk_is_set(action->rsc->flags, pe_rsc_unique)) {
const char *xml_id = ID(action->rsc->xml);
crm_debug("Using anonymous clone name %s for %s (aka %s)",
xml_id, action->rsc->id, action->rsc->clone_name);
/* ID is what we'd like client to use
* ID_LONG is what they might know it as instead
*
* ID_LONG is only strictly needed /here/ during the
* transition period until all nodes in the cluster
* are running the new software /and/ have rebooted
* once (meaning that they've only ever spoken to a DC
* supporting this feature).
*
* If anyone toggles the unique flag to 'on', the
* 'instance free' name will correspond to an orphan
* and fall into the clause above instead
*/
crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id);
if ((action->rsc->clone_name != NULL)
&& !pcmk__str_eq(xml_id, action->rsc->clone_name,
pcmk__str_none)) {
crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name);
} else {
crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id);
}
} else {
CRM_ASSERT(action->rsc->clone_name == NULL);
crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id);
}
for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) {
crm_xml_add(rsc_xml, attr_list[lpc],
g_hash_table_lookup(action->rsc->meta, attr_list[lpc]));
}
}
/*!
* \internal
* \brief Add action attributes to transition graph action XML
*
* \param[in] action Scheduled action
* \param[in] action_xml Transition graph action XML for \p action
*/
static void
add_action_attributes(pe_action_t *action, xmlNode *action_xml)
{
xmlNode *args_xml = NULL;
/* We create free-standing XML to start, so we can sort the attributes
* before adding it to action_xml, which keeps the scheduler regression
* test graphs comparable.
*/
args_xml = create_xml_node(NULL, XML_TAG_ATTRS);
crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
g_hash_table_foreach(action->extra, hash2field, args_xml);
if ((action->rsc != NULL) && (action->node != NULL)) {
// Get the resource instance attributes, evaluated properly for node
GHashTable *params = pe_rsc_params(action->rsc, action->node,
action->rsc->cluster);
pcmk__substitute_remote_addr(action->rsc, params);
g_hash_table_foreach(params, hash2smartfield, args_xml);
#if ENABLE_VERSIONED_ATTRS
{
xmlNode *versioned_parameters = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS);
pe_get_versioned_attributes(versioned_parameters, action->rsc,
action->node, action->rsc->cluster);
if (xml_has_children(versioned_parameters)) {
add_node_copy(action_xml, versioned_parameters);
}
free_xml(versioned_parameters);
}
#endif
} else if ((action->rsc != NULL) && (action->rsc->variant <= pe_native)) {
GHashTable *params = pe_rsc_params(action->rsc, NULL,
action->rsc->cluster);
g_hash_table_foreach(params, hash2smartfield, args_xml);
#if ENABLE_VERSIONED_ATTRS
if (xml_has_children(action->rsc->versioned_parameters)) {
add_node_copy(action_xml, action->rsc->versioned_parameters);
}
#endif
}
#if ENABLE_VERSIONED_ATTRS
if (rsc_details != NULL) {
if (xml_has_children(rsc_details->versioned_parameters)) {
add_node_copy(action_xml, rsc_details->versioned_parameters);
}
if (xml_has_children(rsc_details->versioned_meta)) {
add_node_copy(action_xml, rsc_details->versioned_meta);
}
}
#endif
g_hash_table_foreach(action->meta, hash2metafield, args_xml);
if (action->rsc != NULL) {
- const char *value = g_hash_table_lookup(action->rsc->meta,
- "external-ip");
pe_resource_t *parent = action->rsc;
while (parent != NULL) {
parent->cmds->add_graph_meta(parent, args_xml);
parent = parent->parent;
}
- if (value != NULL) {
- hash2smartfield((gpointer) "pcmk_external_ip", (gpointer) value,
- (gpointer) args_xml);
- }
-
pcmk__add_bundle_meta_to_xml(args_xml, action);
} else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_none)
&& (action->node != NULL)) {
/* Pass the node's attributes as meta-attributes.
*
* @TODO: Determine whether it is still necessary to do this. It was
* added in 33d99707, probably for the libfence-based implementation in
* c9a90bd, which is no longer used.
*/
g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml);
}
sorted_xml(args_xml, action_xml, FALSE);
free_xml(args_xml);
}
/*!
* \internal
* \brief Create the transition graph XML for a scheduled action
*
* \param[in] parent Parent XML element to add action to
* \param[in] action Scheduled action
* \param[in] skip_details If false, add action details as sub-elements
* \param[in] data_set Cluster working set
*/
static void
create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details,
pe_working_set_t *data_set)
{
bool needs_node_info = true;
bool needs_maintenance_info = false;
xmlNode *action_xml = NULL;
#if ENABLE_VERSIONED_ATTRS
pe_rsc_action_details_t *rsc_details = NULL;
#endif
if ((action == NULL) || (data_set == NULL)) {
return;
}
// Create the top-level element based on task
if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) {
/* All fences need node info; guest node fences are pseudo-events */
action_xml = create_xml_node(parent,
pcmk_is_set(action->flags, pe_action_pseudo)?
XML_GRAPH_TAG_PSEUDO_EVENT :
XML_GRAPH_TAG_CRM_EVENT);
} else if (pcmk__str_any_of(action->task,
CRM_OP_SHUTDOWN,
CRM_OP_CLEAR_FAILCOUNT, NULL)) {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
} else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) {
// CIB-only clean-up for shutdown locks
action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB);
} else if (pcmk_is_set(action->flags, pe_action_pseudo)) {
if (pcmk__str_eq(action->task, CRM_OP_MAINTENANCE_NODES,
pcmk__str_none)) {
needs_maintenance_info = true;
}
action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT);
needs_node_info = false;
} else {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_RSC_OP);
#if ENABLE_VERSIONED_ATTRS
rsc_details = pe_rsc_action_details(action);
#endif
}
crm_xml_add_int(action_xml, XML_ATTR_ID, action->id);
crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task);
if ((action->rsc != NULL) && (action->rsc->clone_name != NULL)) {
char *clone_key = NULL;
guint interval_ms;
if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0,
&interval_ms) != pcmk_rc_ok) {
interval_ms = 0;
}
clone_key = clone_op_key(action, interval_ms);
crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key);
crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid);
free(clone_key);
} else {
crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid);
}
if (needs_node_info && (action->node != NULL)) {
add_node_details(action, action_xml);
g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET),
strdup(action->node->details->uname));
g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET_UUID),
strdup(action->node->details->id));
}
if (skip_details) {
return;
}
if ((action->rsc != NULL)
&& !pcmk_is_set(action->flags, pe_action_pseudo)) {
// This is a real resource action, so add resource details
add_resource_details(action, action_xml);
}
/* List any attributes in effect */
add_action_attributes(action, action_xml);
/* List any nodes this action is expected to make down */
if (needs_node_info && (action->node != NULL)) {
add_downed_nodes(action_xml, action, data_set);
}
if (needs_maintenance_info) {
add_maintenance_nodes(action_xml, data_set);
}
}
/*!
* \internal
* \brief Check whether an action should be added to the transition graph
*
* \param[in] action Action to check
*
* \return true if action should be added to graph, otherwise false
*/
static bool
should_add_action_to_graph(pe_action_t *action)
{
if (!pcmk_is_set(action->flags, pe_action_runnable)) {
crm_trace("Ignoring action %s (%d): unrunnable",
action->uuid, action->id);
return false;
}
if (pcmk_is_set(action->flags, pe_action_optional)
&& !pcmk_is_set(action->flags, pe_action_print_always)) {
crm_trace("Ignoring action %s (%d): optional",
action->uuid, action->id);
return false;
}
/* Actions for unmanaged resources should be excluded from the graph,
* with the exception of monitors and cancellation of recurring monitors.
*/
if ((action->rsc != NULL)
&& !pcmk_is_set(action->rsc->flags, pe_rsc_managed)
&& !pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_none)) {
const char *interval_ms_s;
/* A cancellation of a recurring monitor will get here because the task
* is cancel rather than monitor, but the interval can still be used to
* recognize it. The interval has been normalized to milliseconds by
* this point, so a string comparison is sufficient.
*/
interval_ms_s = g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS);
if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) {
crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)",
action->uuid, action->id, action->rsc->id);
return false;
}
}
/* Always add pseudo-actions, fence actions, and shutdown actions (already
* determined to be required and runnable by this point)
*/
if (pcmk_is_set(action->flags, pe_action_pseudo)
|| pcmk__strcase_any_of(action->task, CRM_OP_FENCE, CRM_OP_SHUTDOWN,
NULL)) {
return true;
}
if (action->node == NULL) {
pe_err("Skipping action %s (%d) "
"because it was not allocated to a node (bug?)",
action->uuid, action->id);
pcmk__log_action("Unallocated", action, false);
return false;
}
if (pcmk_is_set(action->flags, pe_action_dc)) {
crm_trace("Action %s (%d) should be dumped: "
"can run on DC instead of %s",
action->uuid, action->id, pe__node_name(action->node));
} else if (pe__is_guest_node(action->node)
&& !action->node->details->remote_requires_reset) {
crm_trace("Action %s (%d) should be dumped: "
"assuming will be runnable on guest %s",
action->uuid, action->id, pe__node_name(action->node));
} else if (!action->node->details->online) {
pe_err("Skipping action %s (%d) "
"because it was scheduled for offline node (bug?)",
action->uuid, action->id);
pcmk__log_action("Offline node", action, false);
return false;
} else if (action->node->details->unclean) {
pe_err("Skipping action %s (%d) "
"because it was scheduled for unclean node (bug?)",
action->uuid, action->id);
pcmk__log_action("Unclean node", action, false);
return false;
}
return true;
}
/*!
* \internal
* \brief Check whether an ordering's flags can change an action
*
* \param[in] ordering Ordering to check
*
* \return true if ordering has flags that can change an action, false otherwise
*/
static bool
ordering_can_change_actions(pe_action_wrapper_t *ordering)
{
return pcmk_any_flags_set(ordering->type, ~(pe_order_implies_first_printed
|pe_order_implies_then_printed
|pe_order_optional));
}
/*!
* \internal
* \brief Check whether an action input should be in the transition graph
*
* \param[in] action Action to check
* \param[in,out] input Action input to check
*
* \return true if input should be in graph, false otherwise
* \note This function may not only check an input, but disable it under certian
* circumstances (load or anti-colocation orderings that are not needed).
*/
static bool
should_add_input_to_graph(pe_action_t *action, pe_action_wrapper_t *input)
{
if (input->state == pe_link_dumped) {
return true;
}
if (input->type == pe_order_none) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"ordering disabled",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (!pcmk_is_set(input->action->flags, pe_action_runnable)
&& !ordering_can_change_actions(input)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"optional and input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (!pcmk_is_set(input->action->flags, pe_action_runnable)
&& pcmk_is_set(input->type, pe_order_one_or_more)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"one-or-more and input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (pcmk_is_set(input->type, pe_order_implies_first_migratable)
&& !pcmk_is_set(input->action->flags, pe_action_runnable)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"implies input migratable but input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (pcmk_is_set(input->type, pe_order_apply_first_non_migratable)
&& pcmk_is_set(input->action->flags, pe_action_migrate_runnable)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"only if input unmigratable but input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if ((input->type == pe_order_optional)
&& pcmk_is_set(input->action->flags, pe_action_migrate_runnable)
&& pcmk__ends_with(input->action->uuid, "_stop_0")) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"optional but stop in migration",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (input->type == pe_order_load) {
pe_node_t *input_node = input->action->node;
// load orderings are relevant only if actions are for same node
if (action->rsc && pcmk__str_eq(action->task, RSC_MIGRATE, pcmk__str_casei)) {
pe_node_t *allocated = action->rsc->allocated_to;
/* For load_stopped -> migrate_to orderings, we care about where it
* has been allocated to, not where it will be executed.
*/
if ((input_node == NULL) || (allocated == NULL)
|| (input_node->details != allocated->details)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"load ordering node mismatch %s vs %s",
action->uuid, action->id,
input->action->uuid, input->action->id,
(allocated? allocated->details->uname : "<none>"),
(input_node? input_node->details->uname : "<none>"));
input->type = pe_order_none;
return false;
}
} else if ((input_node == NULL) || (action->node == NULL)
|| (input_node->details != action->node->details)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"load ordering node mismatch %s vs %s",
action->uuid, action->id,
input->action->uuid, input->action->id,
(action->node? action->node->details->uname : "<none>"),
(input_node? input_node->details->uname : "<none>"));
input->type = pe_order_none;
return false;
} else if (pcmk_is_set(input->action->flags, pe_action_optional)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"load ordering input optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
input->type = pe_order_none;
return false;
}
} else if (input->type == pe_order_anti_colocation) {
if (input->action->node && action->node
&& (input->action->node->details != action->node->details)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"anti-colocation node mismatch %s vs %s",
action->uuid, action->id,
input->action->uuid, input->action->id,
pe__node_name(action->node),
pe__node_name(input->action->node));
input->type = pe_order_none;
return false;
} else if (pcmk_is_set(input->action->flags, pe_action_optional)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"anti-colocation input optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
input->type = pe_order_none;
return false;
}
} else if (input->action->rsc
&& input->action->rsc != action->rsc
&& pcmk_is_set(input->action->rsc->flags, pe_rsc_failed)
&& !pcmk_is_set(input->action->rsc->flags, pe_rsc_managed)
&& pcmk__ends_with(input->action->uuid, "_stop_0")
&& action->rsc && pe_rsc_is_clone(action->rsc)) {
crm_warn("Ignoring requirement that %s complete before %s:"
" unmanaged failed resources cannot prevent clone shutdown",
input->action->uuid, action->uuid);
return false;
} else if (pcmk_is_set(input->action->flags, pe_action_optional)
&& !pcmk_any_flags_set(input->action->flags,
pe_action_print_always|pe_action_dumped)
&& !should_add_action_to_graph(input->action)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"input optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
}
crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x",
action->uuid, action->id, action_type_str(input->action->flags),
input->action->uuid, input->action->id,
action_node_str(input->action),
action_runnable_str(input->action->flags),
action_optional_str(input->action->flags), input->type);
return true;
}
/*!
* \internal
* \brief Check whether an ordering creates an ordering loop
*
* \param[in] init_action "First" action in ordering
* \param[in] action Callers should always set this the same as
* \p init_action (this function may use a different
* value for recursive calls)
* \param[in] input Action wrapper for "then" action in ordering
*
* \return true if the ordering creates a loop, otherwise false
*/
bool
pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action,
pe_action_wrapper_t *input)
{
bool has_loop = false;
if (pcmk_is_set(input->action->flags, pe_action_tracking)) {
crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
action->uuid,
action->node? action->node->details->uname : "",
input->type);
return false;
}
// Don't need to check inputs that won't be used
if (!should_add_input_to_graph(action, input)) {
return false;
}
if (input->action == init_action) {
crm_debug("Input loop found in %s@%s ->...-> %s@%s",
action->uuid,
action->node? action->node->details->uname : "",
init_action->uuid,
init_action->node? init_action->node->details->uname : "");
return true;
}
pe__set_action_flags(input->action, pe_action_tracking);
crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)"
"for graph loop with %s@%s ",
action->uuid,
action->node? action->node->details->uname : "",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
input->type,
init_action->uuid,
init_action->node? init_action->node->details->uname : "");
// Recursively check input itself for loops
for (GList *iter = input->action->actions_before;
iter != NULL; iter = iter->next) {
if (pcmk__graph_has_loop(init_action, input->action,
(pe_action_wrapper_t *) iter->data)) {
// Recursive call already logged a debug message
has_loop = true;
break;
}
}
pe__clear_action_flags(input->action, pe_action_tracking);
if (!has_loop) {
crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
action->uuid,
action->node? action->node->details->uname : "",
input->type);
}
return has_loop;
}
/*!
* \internal
* \brief Create a synapse XML element for a transition graph
*
* \param[in] action Action that synapse is for
* \param[in] data_set Cluster working set containing graph
*
* \return Newly added XML element for new graph synapse
*/
static xmlNode *
create_graph_synapse(pe_action_t *action, pe_working_set_t *data_set)
{
int synapse_priority = 0;
xmlNode *syn = create_xml_node(data_set->graph, "synapse");
crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse);
data_set->num_synapse++;
if (action->rsc != NULL) {
synapse_priority = action->rsc->priority;
}
if (action->priority > synapse_priority) {
synapse_priority = action->priority;
}
if (synapse_priority > 0) {
crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority);
}
return syn;
}
/*!
* \internal
* \brief Add an action to the transition graph XML if appropriate
*
* \param[in] data Action to possibly add
* \param[in] user_data Cluster working set
*
* \note This will de-duplicate the action inputs, meaning that the
* pe_action_wrapper_t:type flags can no longer be relied on to retain
* their original settings. That means this MUST be called after
* pcmk__apply_orderings() is complete, and nothing after this should rely
* on those type flags. (For example, some code looks for type equal to
* some flag rather than whether the flag is set, and some code looks for
* particular combinations of flags -- such code must be done before
* pcmk__create_graph().)
*/
static void
add_action_to_graph(gpointer data, gpointer user_data)
{
pe_action_t *action = (pe_action_t *) data;
pe_working_set_t *data_set = (pe_working_set_t *) user_data;
xmlNode *syn = NULL;
xmlNode *set = NULL;
xmlNode *in = NULL;
/* If we haven't already, de-duplicate inputs (even if we won't be adding
* the action to the graph, so that crm_simulate's dot graphs don't have
* duplicates).
*/
if (!pcmk_is_set(action->flags, pe_action_dedup)) {
pcmk__deduplicate_action_inputs(action);
pe__set_action_flags(action, pe_action_dedup);
}
if (pcmk_is_set(action->flags, pe_action_dumped) // Already added, or
|| !should_add_action_to_graph(action)) { // shouldn't be added
return;
}
pe__set_action_flags(action, pe_action_dumped);
crm_trace("Adding action %d (%s%s%s) to graph",
action->id, action->uuid,
((action->node == NULL)? "" : " on "),
((action->node == NULL)? "" : action->node->details->uname));
syn = create_graph_synapse(action, data_set);
set = create_xml_node(syn, "action_set");
in = create_xml_node(syn, "inputs");
create_graph_action(set, action, false, data_set);
for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *input = (pe_action_wrapper_t *) lpc->data;
if (should_add_input_to_graph(action, input)) {
xmlNode *input_xml = create_xml_node(in, "trigger");
input->state = pe_link_dumped;
create_graph_action(input_xml, input->action, true, data_set);
}
}
}
static int transition_id = -1;
/*!
* \internal
* \brief Log a message after calculating a transition
*
* \param[in] filename Where transition input is stored
*/
void
pcmk__log_transition_summary(const char *filename)
{
if (was_processing_error) {
crm_err("Calculated transition %d (with errors)%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
} else if (was_processing_warning) {
crm_warn("Calculated transition %d (with warnings)%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
} else {
crm_notice("Calculated transition %d%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
}
if (crm_config_error) {
crm_notice("Configuration errors found during scheduler processing,"
" please run \"crm_verify -L\" to identify issues");
}
}
/*!
* \internal
* \brief Add a resource's actions to the transition graph
*
* \param[in] rsc Resource whose actions should be added
*/
void
pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc)
{
GList *iter = NULL;
CRM_ASSERT(rsc != NULL);
pe_rsc_trace(rsc, "Adding actions for %s to graph", rsc->id);
// First add the resource's own actions
g_list_foreach(rsc->actions, add_action_to_graph, rsc->cluster);
// Then recursively add its children's actions (appropriate to variant)
for (iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
child_rsc->cmds->add_actions_to_graph(child_rsc);
}
}
/*!
* \internal
* \brief Create a transition graph with all cluster actions needed
*
* \param[in] data_set Cluster working set
*/
void
pcmk__create_graph(pe_working_set_t *data_set)
{
GList *iter = NULL;
const char *value = NULL;
long long limit = 0LL;
transition_id++;
crm_trace("Creating transition graph %d", transition_id);
data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
value = pe_pref(data_set->config_hash, "cluster-delay");
crm_xml_add(data_set->graph, "cluster-delay", value);
value = pe_pref(data_set->config_hash, "stonith-timeout");
crm_xml_add(data_set->graph, "stonith-timeout", value);
crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) {
crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
} else {
crm_xml_add(data_set->graph, "failed-start-offset", "1");
}
value = pe_pref(data_set->config_hash, "batch-limit");
crm_xml_add(data_set->graph, "batch-limit", value);
crm_xml_add_int(data_set->graph, "transition_id", transition_id);
value = pe_pref(data_set->config_hash, "migration-limit");
if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) {
crm_xml_add(data_set->graph, "migration-limit", value);
}
if (data_set->recheck_by > 0) {
char *recheck_epoch = NULL;
recheck_epoch = crm_strdup_printf("%llu",
(long long) data_set->recheck_by);
crm_xml_add(data_set->graph, "recheck-by", recheck_epoch);
free(recheck_epoch);
}
/* The following code will de-duplicate action inputs, so nothing past this
* should rely on the action input type flags retaining their original
* values.
*/
// Add resource actions to graph
for (iter = data_set->resources; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
pe_rsc_trace(rsc, "Processing actions for %s", rsc->id);
rsc->cmds->add_actions_to_graph(rsc);
}
// Add pseudo-action for list of nodes with maintenance state update
add_maintenance_update(data_set);
// Add non-resource (node) actions
for (iter = data_set->actions; iter != NULL; iter = iter->next) {
pe_action_t *action = (pe_action_t *) iter->data;
if ((action->rsc != NULL)
&& (action->node != NULL)
&& action->node->details->shutdown
&& !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance)
&& !pcmk_any_flags_set(action->flags,
pe_action_optional|pe_action_runnable)
&& pcmk__str_eq(action->task, RSC_STOP, pcmk__str_none)) {
/* Eventually we should just ignore the 'fence' case, but for now
* it's the best way to detect (in CTS) when CIB resource updates
* are being lost.
*/
if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)
|| (data_set->no_quorum_policy == no_quorum_ignore)) {
crm_crit("Cannot %s %s because of %s:%s%s (%s)",
action->node->details->unclean? "fence" : "shut down",
pe__node_name(action->node), action->rsc->id,
pcmk_is_set(action->rsc->flags, pe_rsc_managed)? " blocked" : " unmanaged",
pcmk_is_set(action->rsc->flags, pe_rsc_failed)? " failed" : "",
action->uuid);
}
}
add_action_to_graph((gpointer) action, (gpointer) data_set);
}
crm_log_xml_trace(data_set->graph, "graph");
}
diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c
index 05bfc76c62..72b1aadc49 100644
--- a/lib/pacemaker/pcmk_sched_primitive.c
+++ b/lib/pacemaker/pcmk_sched_primitive.c
@@ -1,1534 +1,1543 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
static void stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
static void start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
static void demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
static void promote_resource(pe_resource_t *rsc, pe_node_t *node,
bool optional);
static void assert_role_error(pe_resource_t *rsc, pe_node_t *node,
bool optional);
static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* This array lists the immediate next role when transitioning from one role
* to a target role. For example, when going from Stopped to Promoted, the
* next role is Unpromoted, because the resource must be started before it
* can be promoted. The current state then becomes Started, which is fed
* into this array again, giving a next role of Promoted.
*
* Current role Immediate next role Final target role
* ------------ ------------------- -----------------
*/
/* Unknown */ { RSC_ROLE_UNKNOWN, /* Unknown */
RSC_ROLE_STOPPED, /* Stopped */
RSC_ROLE_STOPPED, /* Started */
RSC_ROLE_STOPPED, /* Unpromoted */
RSC_ROLE_STOPPED, /* Promoted */
},
/* Stopped */ { RSC_ROLE_STOPPED, /* Unknown */
RSC_ROLE_STOPPED, /* Stopped */
RSC_ROLE_STARTED, /* Started */
RSC_ROLE_UNPROMOTED, /* Unpromoted */
RSC_ROLE_UNPROMOTED, /* Promoted */
},
/* Started */ { RSC_ROLE_STOPPED, /* Unknown */
RSC_ROLE_STOPPED, /* Stopped */
RSC_ROLE_STARTED, /* Started */
RSC_ROLE_UNPROMOTED, /* Unpromoted */
RSC_ROLE_PROMOTED, /* Promoted */
},
/* Unpromoted */ { RSC_ROLE_STOPPED, /* Unknown */
RSC_ROLE_STOPPED, /* Stopped */
RSC_ROLE_STOPPED, /* Started */
RSC_ROLE_UNPROMOTED, /* Unpromoted */
RSC_ROLE_PROMOTED, /* Promoted */
},
/* Promoted */ { RSC_ROLE_STOPPED, /* Unknown */
RSC_ROLE_UNPROMOTED, /* Stopped */
RSC_ROLE_UNPROMOTED, /* Started */
RSC_ROLE_UNPROMOTED, /* Unpromoted */
RSC_ROLE_PROMOTED, /* Promoted */
},
};
/*!
* \internal
* \brief Function to schedule actions needed for a role change
*
* \param[in,out] rsc Resource whose role is changing
* \param[in] node Node where resource will be in its next role
* \param[in] optional Whether scheduled actions should be optional
*/
typedef void (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *node,
bool optional);
static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* This array lists the function needed to transition directly from one role
* to another. NULL indicates that nothing is needed.
*
* Current role Transition function Next role
* ------------ ------------------- ----------
*/
/* Unknown */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
assert_role_error, /* Started */
assert_role_error, /* Unpromoted */
assert_role_error, /* Promoted */
},
/* Stopped */ { assert_role_error, /* Unknown */
NULL, /* Stopped */
start_resource, /* Started */
start_resource, /* Unpromoted */
assert_role_error, /* Promoted */
},
/* Started */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
NULL, /* Started */
NULL, /* Unpromoted */
promote_resource, /* Promoted */
},
/* Unpromoted */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
stop_resource, /* Started */
NULL, /* Unpromoted */
promote_resource, /* Promoted */
},
/* Promoted */ { assert_role_error, /* Unknown */
demote_resource, /* Stopped */
demote_resource, /* Started */
demote_resource, /* Unpromoted */
NULL, /* Promoted */
},
};
/*!
* \internal
* \brief Get a list of a resource's allowed nodes sorted by node weight
*
* \param[in] rsc Resource to check
*
* \return List of allowed nodes sorted by node weight
*/
static GList *
sorted_allowed_nodes(const pe_resource_t *rsc)
{
if (rsc->allowed_nodes != NULL) {
GList *nodes = g_hash_table_get_values(rsc->allowed_nodes);
if (nodes != NULL) {
return pcmk__sort_nodes(nodes, pe__current_node(rsc));
}
}
return NULL;
}
/*!
* \internal
* \brief Assign a resource to its best allowed node, if possible
*
* \param[in] rsc Resource to choose a node for
* \param[in] prefer If not NULL, prefer this node when all else equal
*
* \return true if \p rsc could be assigned to a node, otherwise false
*/
static bool
assign_best_node(pe_resource_t *rsc, pe_node_t *prefer)
{
GList *nodes = NULL;
pe_node_t *chosen = NULL;
pe_node_t *best = NULL;
bool result = false;
pcmk__ban_insufficient_capacity(rsc, &prefer);
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
// We've already finished assignment of resources to nodes
return rsc->allocated_to != NULL;
}
// Sort allowed nodes by weight
nodes = sorted_allowed_nodes(rsc);
if (nodes != NULL) {
best = (pe_node_t *) nodes->data; // First node has best score
}
if ((prefer != NULL) && (nodes != NULL)) {
// Get the allowed node version of prefer
chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
if (chosen == NULL) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unknown",
pe__node_name(prefer), rsc->id);
/* Favor the preferred node as long as its weight is at least as good as
* the best allowed node's.
*
* An alternative would be to favor the preferred node even if the best
* node is better, when the best node's weight is less than INFINITY.
*/
} else if (chosen->weight < best->weight) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
pe__node_name(chosen), rsc->id);
chosen = NULL;
} else if (!pcmk__node_available(chosen, true, false)) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable",
pe__node_name(chosen), rsc->id);
chosen = NULL;
} else {
pe_rsc_trace(rsc,
"Chose preferred node %s for %s (ignoring %d candidates)",
pe__node_name(chosen), rsc->id, g_list_length(nodes));
}
}
if ((chosen == NULL) && (best != NULL)) {
/* Either there is no preferred node, or the preferred node is not
* suitable, but another node is allowed to run the resource.
*/
chosen = best;
if (!pe_rsc_is_unique_clone(rsc->parent)
&& (chosen->weight > 0) // Zero not acceptable
&& pcmk__node_available(chosen, false, false)) {
/* If the resource is already running on a node, prefer that node if
* it is just as good as the chosen node.
*
* We don't do this for unique clone instances, because
* distribute_children() has already assigned instances to their
* running nodes when appropriate, and if we get here, we don't want
* remaining unassigned instances to prefer a node that's already
* running another instance.
*/
pe_node_t *running = pe__current_node(rsc);
if (running == NULL) {
// Nothing to do
} else if (!pcmk__node_available(running, true, false)) {
pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources",
rsc->id, pe__node_name(running));
} else {
int nodes_with_best_score = 1;
for (GList *iter = nodes->next; iter; iter = iter->next) {
pe_node_t *allowed = (pe_node_t *) iter->data;
if (allowed->weight != chosen->weight) {
// The nodes are sorted by weight, so no more are equal
break;
}
if (allowed->details == running->details) {
// Scores are equal, so prefer the current node
chosen = allowed;
}
nodes_with_best_score++;
}
if (nodes_with_best_score > 1) {
do_crm_log(((chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO),
"Chose %s for %s from %d nodes with score %s",
pe__node_name(chosen), rsc->id,
nodes_with_best_score,
pcmk_readable_score(chosen->weight));
}
}
}
pe_rsc_trace(rsc, "Chose %s for %s from %d candidates",
pe__node_name(chosen), rsc->id, g_list_length(nodes));
}
result = pcmk__finalize_assignment(rsc, chosen, false);
g_list_free(nodes);
return result;
}
/*!
* \internal
* \brief Apply a "this with" colocation to a node's allowed node scores
*
* \param[in] data Colocation to apply
* \param[in] user_data Resource being assigned
*/
static void
apply_this_with(void *data, void *user_data)
{
pcmk__colocation_t *colocation = (pcmk__colocation_t *) data;
pe_resource_t *rsc = (pe_resource_t *) user_data;
GHashTable *archive = NULL;
pe_resource_t *other = colocation->primary;
// In certain cases, we will need to revert the node scores
if ((colocation->dependent_role >= RSC_ROLE_PROMOTED)
|| ((colocation->score < 0) && (colocation->score > -INFINITY))) {
archive = pcmk__copy_node_table(rsc->allowed_nodes);
}
pe_rsc_trace(rsc,
"%s: Assigning colocation %s primary %s first"
"(score=%d role=%s)",
rsc->id, colocation->id, other->id,
colocation->score, role2text(colocation->dependent_role));
other->cmds->assign(other, NULL);
// Apply the colocation score to this resource's allowed node scores
rsc->cmds->apply_coloc_score(rsc, other, colocation, true);
if ((archive != NULL)
&& !pcmk__any_node_available(rsc->allowed_nodes)) {
pe_rsc_info(rsc,
"%s: Reverting scores from colocation with %s "
"because no nodes allowed",
rsc->id, other->id);
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = archive;
archive = NULL;
}
if (archive != NULL) {
g_hash_table_destroy(archive);
}
}
/*!
* \internal
* \brief Apply a "with this" colocation to a node's allowed node scores
*
* \param[in] data Colocation to apply
* \param[in] user_data Resource being assigned
*/
static void
apply_with_this(void *data, void *user_data)
{
pcmk__colocation_t *colocation = (pcmk__colocation_t *) data;
pe_resource_t *rsc = (pe_resource_t *) user_data;
pe_resource_t *other = colocation->dependent;
const float factor = colocation->score / (float) INFINITY;
if (!pcmk__colocation_has_influence(colocation, NULL)) {
return;
}
pe_rsc_trace(rsc,
"%s: Incorporating attenuated %s assignment scores due "
"to colocation %s", rsc->id, other->id, colocation->id);
other->cmds->add_colocated_node_scores(other, rsc->id,
&rsc->allowed_nodes,
colocation->node_attribute,
factor, pcmk__coloc_select_active);
}
/*!
* \internal
* \brief Update a Pacemaker Remote node once its connection has been assigned
*
* \param[in] connection Connection resource that has been assigned
*/
static void
remote_connection_assigned(pe_resource_t *connection)
{
pe_node_t *remote_node = pe_find_node(connection->cluster->nodes,
connection->id);
CRM_CHECK(remote_node != NULL, return);
if ((connection->allocated_to != NULL)
&& (connection->next_role != RSC_ROLE_STOPPED)) {
crm_trace("Pacemaker Remote node %s will be online",
remote_node->details->id);
remote_node->details->online = TRUE;
if (remote_node->details->unseen) {
// Avoid unnecessary fence, since we will attempt connection
remote_node->details->unclean = FALSE;
}
} else {
crm_trace("Pacemaker Remote node %s will be shut down "
"(%sassigned connection's next role is %s)",
remote_node->details->id,
((connection->allocated_to == NULL)? "un" : ""),
role2text(connection->next_role));
remote_node->details->shutdown = TRUE;
}
}
/*!
* \internal
* \brief Assign a primitive resource to a node
*
* \param[in] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*/
pe_node_t *
pcmk__primitive_assign(pe_resource_t *rsc, pe_node_t *prefer)
{
CRM_ASSERT(rsc != NULL);
// Never assign a child without parent being assigned first
if ((rsc->parent != NULL)
&& !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "%s: Assigning parent %s first",
rsc->id, rsc->parent->id);
rsc->parent->cmds->assign(rsc->parent, prefer);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return rsc->allocated_to; // Assignment has already been done
}
// Ensure we detect assignment loops
if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
return NULL;
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
pe__show_node_weights(true, rsc, "Pre-assignment", rsc->allowed_nodes,
rsc->cluster);
g_list_foreach(rsc->rsc_cons, apply_this_with, rsc);
pe__show_node_weights(true, rsc, "Post-this-with", rsc->allowed_nodes,
rsc->cluster);
g_list_foreach(rsc->rsc_cons_lhs, apply_with_this, rsc);
if (rsc->next_role == RSC_ROLE_STOPPED) {
pe_rsc_trace(rsc,
"Banning %s from all nodes because it will be stopped",
rsc->id);
resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE,
rsc->cluster);
} else if ((rsc->next_role > rsc->role)
&& !pcmk_is_set(rsc->cluster->flags, pe_flag_have_quorum)
&& (rsc->cluster->no_quorum_policy == no_quorum_freeze)) {
crm_notice("Resource %s cannot be elevated from %s to %s due to "
"no-quorum-policy=freeze",
rsc->id, role2text(rsc->role), role2text(rsc->next_role));
pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze");
}
pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
// Unmanage resource if fencing is enabled but no device is configured
if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)
&& !pcmk_is_set(rsc->cluster->flags, pe_flag_have_stonith_resource)) {
pe__clear_resource_flags(rsc, pe_rsc_managed);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
// Unmanaged resources stay on their current node
const char *reason = NULL;
pe_node_t *assign_to = NULL;
pe__set_next_role(rsc, rsc->role, "unmanaged");
assign_to = pe__current_node(rsc);
if (assign_to == NULL) {
reason = "inactive";
} else if (rsc->role == RSC_ROLE_PROMOTED) {
reason = "promoted";
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
reason = "failed";
} else {
reason = "active";
}
pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
(assign_to? assign_to->details->uname : "no node"), reason);
pcmk__finalize_assignment(rsc, assign_to, true);
} else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stop_everything)) {
pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources", rsc->id);
pcmk__finalize_assignment(rsc, NULL, true);
} else if (pcmk_is_set(rsc->flags, pe_rsc_provisional)
&& assign_best_node(rsc, prefer)) {
// Assignment successful
} else if (rsc->allocated_to == NULL) {
if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
} else if (rsc->running_on != NULL) {
pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id);
}
} else {
pe_rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id,
pe__node_name(rsc->allocated_to));
}
pe__clear_resource_flags(rsc, pe_rsc_allocating);
if (rsc->is_remote_node) {
remote_connection_assigned(rsc);
}
return rsc->allocated_to;
}
/*!
* \internal
* \brief Schedule actions to bring resource down and back to current role
*
* \param[in,out] rsc Resource to restart
* \param[in] current Node that resource should be brought down on
* \param[in] need_stop Whether the resource must be stopped
* \param[in] need_promote Whether the resource must be promoted
*
* \return Role that resource would have after scheduled actions are taken
*/
static void
schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
bool need_stop, bool need_promote)
{
enum rsc_role_e role = rsc->role;
enum rsc_role_e next_role;
rsc_transition_fn fn = NULL;
pe__set_resource_flags(rsc, pe_rsc_restarting);
// Bring resource down to a stop on its current node
while (role != RSC_ROLE_STOPPED) {
next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
(need_stop? "required" : "optional"), rsc->id,
role2text(role), role2text(next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, current, !need_stop);
role = next_role;
}
// Bring resource up to its next role on its next node
while ((rsc->role <= rsc->next_role) && (role != rsc->role)
&& !pcmk_is_set(rsc->flags, pe_rsc_block)) {
bool required = need_stop;
next_role = rsc_state_matrix[role][rsc->role];
if ((next_role == RSC_ROLE_PROMOTED) && need_promote) {
required = true;
}
pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
(required? "required" : "optional"), rsc->id,
role2text(role), role2text(next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, rsc->allocated_to, !required);
role = next_role;
}
pe__clear_resource_flags(rsc, pe_rsc_restarting);
}
/*!
* \internal
* \brief If a resource's next role is not explicitly specified, set a default
*
* \param[in,out] rsc Resource to set next role for
*
* \return "explicit" if next role was explicitly set, otherwise "implicit"
*/
static const char *
set_default_next_role(pe_resource_t *rsc)
{
if (rsc->next_role != RSC_ROLE_UNKNOWN) {
return "explicit";
}
if (rsc->allocated_to == NULL) {
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "assignment");
} else {
pe__set_next_role(rsc, RSC_ROLE_STARTED, "assignment");
}
return "implicit";
}
/*!
* \internal
* \brief Create an action to represent an already pending start
*
* \param[in,out] rsc Resource to create start action for
*/
static void
create_pending_start(pe_resource_t *rsc)
{
pe_action_t *start = NULL;
pe_rsc_trace(rsc,
"Creating action for %s to represent already pending start",
rsc->id);
start = start_action(rsc, rsc->allocated_to, TRUE);
pe__set_action_flags(start, pe_action_print_always);
}
/*!
* \internal
* \brief Schedule actions needed to take a resource to its next role
*
* \param[in,out] rsc Resource to schedule actions for
*/
static void
schedule_role_transition_actions(pe_resource_t *rsc)
{
enum rsc_role_e role = rsc->role;
while (role != rsc->next_role) {
enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role];
rsc_transition_fn fn = NULL;
pe_rsc_trace(rsc,
"Creating action to take %s from %s to %s (ending at %s)",
rsc->id, role2text(role), role2text(next_role),
role2text(rsc->next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, rsc->allocated_to, false);
role = next_role;
}
}
/*!
* \internal
* \brief Create all actions needed for a given primitive resource
*
* \param[in,out] rsc Primitive resource to create actions for
*/
void
pcmk__primitive_create_actions(pe_resource_t *rsc)
{
bool need_stop = false;
bool need_promote = false;
bool is_moving = false;
bool allow_migrate = false;
bool multiply_active = false;
pe_node_t *current = NULL;
unsigned int num_all_active = 0;
unsigned int num_clean_active = 0;
const char *next_role_source = NULL;
CRM_ASSERT(rsc != NULL);
next_role_source = set_default_next_role(rsc);
pe_rsc_trace(rsc,
"Creating all actions for %s transition from %s to %s "
"(%s) on %s",
rsc->id, role2text(rsc->role), role2text(rsc->next_role),
next_role_source, pe__node_name(rsc->allocated_to));
current = pe__find_active_on(rsc, &num_all_active, &num_clean_active);
g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration,
rsc);
if ((current != NULL) && (rsc->allocated_to != NULL)
&& (current->details != rsc->allocated_to->details)
&& (rsc->next_role >= RSC_ROLE_STARTED)) {
pe_rsc_trace(rsc, "Moving %s from %s to %s",
rsc->id, pe__node_name(current),
pe__node_name(rsc->allocated_to));
is_moving = true;
allow_migrate = pcmk__rsc_can_migrate(rsc, current);
// This is needed even if migrating (though I'm not sure why ...)
need_stop = true;
}
// Check whether resource is partially migrated and/or multiply active
if ((rsc->partial_migration_source != NULL)
&& (rsc->partial_migration_target != NULL)
&& allow_migrate && (num_all_active == 2)
&& (current->details == rsc->partial_migration_source->details)
&& (rsc->allocated_to->details == rsc->partial_migration_target->details)) {
/* A partial migration is in progress, and the migration target remains
* the same as when the migration began.
*/
pe_rsc_trace(rsc, "Partial migration of %s from %s to %s will continue",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
} else if ((rsc->partial_migration_source != NULL)
|| (rsc->partial_migration_target != NULL)) {
// A partial migration is in progress but can't be continued
if (num_all_active > 2) {
// The resource is migrating *and* multiply active!
crm_notice("Forcing recovery of %s because it is migrating "
"from %s to %s and possibly active elsewhere",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
} else {
// The migration source or target isn't available
crm_notice("Forcing recovery of %s because it can no longer "
"migrate from %s to %s",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
}
need_stop = true;
rsc->partial_migration_source = rsc->partial_migration_target = NULL;
allow_migrate = false;
} else if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) {
multiply_active = (num_all_active > 1);
} else {
/* If a resource has "requires" set to nothing or quorum, don't consider
* it active on unclean nodes (similar to how all resources behave when
* stonith-enabled is false). We can start such resources elsewhere
* before fencing completes, and if we considered the resource active on
* the failed node, we would attempt recovery for being active on
* multiple nodes.
*/
multiply_active = (num_clean_active > 1);
}
if (multiply_active) {
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
// Resource was (possibly) incorrectly multiply active
pe_proc_err("%s resource %s might be active on %u nodes (%s)",
pcmk__s(class, "Untyped"), rsc->id, num_all_active,
recovery2text(rsc->recovery_type));
crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ"
"#Resource_is_Too_Active for more information");
switch (rsc->recovery_type) {
case recovery_stop_start:
need_stop = true;
break;
case recovery_stop_unexpected:
need_stop = true; // stop_resource() will skip expected node
pe__set_resource_flags(rsc, pe_rsc_stop_unexpected);
break;
default:
break;
}
} else {
pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
}
if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
create_pending_start(rsc);
}
if (is_moving) {
// Remaining tests are only for resources staying where they are
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
if (pcmk_is_set(rsc->flags, pe_rsc_stop)) {
need_stop = true;
pe_rsc_trace(rsc, "Recovering %s", rsc->id);
} else {
pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
if (rsc->next_role == RSC_ROLE_PROMOTED) {
need_promote = true;
}
}
} else if (pcmk_is_set(rsc->flags, pe_rsc_block)) {
pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
need_stop = true;
} else if ((rsc->role > RSC_ROLE_STARTED) && (current != NULL)
&& (rsc->allocated_to != NULL)) {
pe_action_t *start = NULL;
pe_rsc_trace(rsc, "Creating start action for promoted resource %s",
rsc->id);
start = start_action(rsc, rsc->allocated_to, TRUE);
if (!pcmk_is_set(start->flags, pe_action_optional)) {
// Recovery of a promoted resource
pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
need_stop = true;
}
}
// Create any actions needed to bring resource down and back up to same role
schedule_restart_actions(rsc, current, need_stop, need_promote);
// Create any actions needed to take resource from this role to the next
schedule_role_transition_actions(rsc);
pcmk__create_recurring_actions(rsc);
if (allow_migrate) {
pcmk__create_migration_actions(rsc, current);
}
}
/*!
* \internal
* \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes
*
* \param[in] rsc Resource to check
*/
static void
rsc_avoids_remote_nodes(const pe_resource_t *rsc)
{
GHashTableIter iter;
pe_node_t *node = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (node->details->remote_rsc != NULL) {
node->weight = -INFINITY;
}
}
}
/*!
* \internal
* \brief Return allowed nodes as (possibly sorted) list
*
* Convert a resource's hash table of allowed nodes to a list. If printing to
* stdout, sort the list, to keep action ID numbers consistent for regression
* test output (while avoiding the performance hit on a live cluster).
*
* \param[in] rsc Resource to check for allowed nodes
* \param[in] data_set Cluster working set
*
* \return List of resource's allowed nodes
* \note Callers should take care not to rely on the list being sorted.
*/
static GList *
allowed_nodes_as_list(pe_resource_t *rsc, pe_working_set_t *data_set)
{
GList *allowed_nodes = NULL;
if (rsc->allowed_nodes) {
allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes);
}
if (!pcmk__is_daemon) {
allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
}
return allowed_nodes;
}
/*!
* \internal
* \brief Create implicit constraints needed for a primitive resource
*
* \param[in,out] rsc Primitive resource to create implicit constraints for
*/
void
pcmk__primitive_internal_constraints(pe_resource_t *rsc)
{
pe_resource_t *top = NULL;
GList *allowed_nodes = NULL;
bool check_unfencing = false;
bool check_utilization = false;
CRM_ASSERT(rsc != NULL);
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc,
"Skipping implicit constraints for unmanaged resource %s",
rsc->id);
return;
}
top = uber_parent(rsc);
// Whether resource requires unfencing
check_unfencing = !pcmk_is_set(rsc->flags, pe_rsc_fence_device)
&& pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing)
&& pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing);
// Whether a non-default placement strategy is used
check_utilization = (g_hash_table_size(rsc->utilization) > 0)
&& !pcmk__str_eq(rsc->cluster->placement_strategy,
"default", pcmk__str_casei);
// Order stops before starts (i.e. restart)
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
pe_order_optional|pe_order_implies_then|pe_order_restart,
rsc->cluster);
// Promotable ordering: demote before stop, start before promote
if (pcmk_is_set(top->flags, pe_rsc_promotable)
|| (rsc->role > RSC_ROLE_UNPROMOTED)) {
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_DEMOTE, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
pe_order_promoted_implies_first, rsc->cluster);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
rsc, pcmk__op_key(rsc->id, RSC_PROMOTE, 0), NULL,
pe_order_runnable_left, rsc->cluster);
}
// Don't clear resource history if probing on same node
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0),
NULL, rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0),
NULL, pe_order_same_node|pe_order_then_cancels_first,
rsc->cluster);
// Certain checks need allowed nodes
if (check_unfencing || check_utilization || (rsc->container != NULL)) {
allowed_nodes = allowed_nodes_as_list(rsc, rsc->cluster);
}
if (check_unfencing) {
// Check whether the node needs to be unfenced
for (GList *item = allowed_nodes; item; item = item->next) {
pe_node_t *node = item->data;
pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE,
rsc->cluster);
crm_debug("Ordering any stops of %s before %s, and any starts after",
rsc->id, unfence->uuid);
/*
* It would be more efficient to order clone resources once,
* rather than order each instance, but ordering the instance
* allows us to avoid unnecessary dependencies that might conflict
* with user constraints.
*
* @TODO: This constraint can still produce a transition loop if the
* resource has a stop scheduled on the node being unfenced, and
* there is a user ordering constraint to start some other resource
* (which will be ordered after the unfence) before stopping this
* resource. An example is "start some slow-starting cloned service
* before stopping an associated virtual IP that may be moving to
* it":
* stop this -> unfencing -> start that -> stop this
*/
pcmk__new_ordering(rsc, stop_key(rsc), NULL,
NULL, strdup(unfence->uuid), unfence,
pe_order_optional|pe_order_same_node,
rsc->cluster);
pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
rsc, start_key(rsc), NULL,
pe_order_implies_then_on_node|pe_order_same_node,
rsc->cluster);
}
}
if (check_utilization) {
pcmk__create_utilization_constraints(rsc, allowed_nodes);
}
if (rsc->container != NULL) {
pe_resource_t *remote_rsc = NULL;
if (rsc->is_remote_node) {
// rsc is the implicit remote connection for a guest or bundle node
/* Guest resources are not allowed to run on Pacemaker Remote nodes,
* to avoid nesting remotes. However, bundles are allowed.
*/
if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
rsc_avoids_remote_nodes(rsc->container);
}
/* If someone cleans up a guest or bundle node's container, we will
* likely schedule a (re-)probe of the container and recovery of the
* connection. Order the connection stop after the container probe,
* so that if we detect the container running, we will trigger a new
* transition and avoid the unnecessary recovery.
*/
pcmk__order_resource_actions(rsc->container, RSC_STATUS, rsc,
RSC_STOP, pe_order_optional);
/* A user can specify that a resource must start on a Pacemaker Remote
* node by explicitly configuring it with the container=NODENAME
* meta-attribute. This is of questionable merit, since location
* constraints can accomplish the same thing. But we support it, so here
* we check whether a resource (that is not itself a remote connection)
* has container set to a remote node or guest node resource.
*/
} else if (rsc->container->is_remote_node) {
remote_rsc = rsc->container;
} else {
remote_rsc = pe__resource_contains_guest_node(rsc->cluster,
rsc->container);
}
if (remote_rsc != NULL) {
/* Force the resource on the Pacemaker Remote node instead of
* colocating the resource with the container resource.
*/
for (GList *item = allowed_nodes; item; item = item->next) {
pe_node_t *node = item->data;
if (node->details->remote_rsc != remote_rsc) {
node->weight = -INFINITY;
}
}
} else {
/* This resource is either a filler for a container that does NOT
* represent a Pacemaker Remote node, or a Pacemaker Remote
* connection resource for a guest node or bundle.
*/
int score;
crm_trace("Order and colocate %s relative to its container %s",
rsc->id, rsc->container->id);
pcmk__new_ordering(rsc->container,
pcmk__op_key(rsc->container->id, RSC_START, 0),
NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0),
NULL,
pe_order_implies_then|pe_order_runnable_left,
rsc->cluster);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
rsc->container,
pcmk__op_key(rsc->container->id, RSC_STOP, 0),
NULL, pe_order_implies_first, rsc->cluster);
if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
score = 10000; /* Highly preferred but not essential */
} else {
score = INFINITY; /* Force them to run on the same host */
}
pcmk__new_colocation("resource-with-container", NULL, score, rsc,
rsc->container, NULL, NULL, true,
rsc->cluster);
}
}
if (rsc->is_remote_node || pcmk_is_set(rsc->flags, pe_rsc_fence_device)) {
/* Remote connections and fencing devices are not allowed to run on
* Pacemaker Remote nodes
*/
rsc_avoids_remote_nodes(rsc);
}
g_list_free(allowed_nodes);
}
/*!
* \internal
* \brief Apply a colocation's score to node weights or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node weights (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__primitive_apply_coloc_score(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *colocation,
bool for_dependent)
{
enum pcmk__coloc_affects filter_results;
CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL),
return);
if (for_dependent) {
// Always process on behalf of primary resource
primary->cmds->apply_coloc_score(dependent, primary, colocation, false);
return;
}
filter_results = pcmk__colocation_affects(dependent, primary, colocation,
false);
pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
((colocation->score > 0)? "Colocating" : "Anti-colocating"),
dependent->id, primary->id, colocation->id, colocation->score,
filter_results);
switch (filter_results) {
case pcmk__coloc_affects_role:
pcmk__apply_coloc_to_priority(dependent, primary, colocation);
break;
case pcmk__coloc_affects_location:
pcmk__apply_coloc_to_weights(dependent, primary, colocation);
break;
default: // pcmk__coloc_affects_nothing
return;
}
}
/*!
* \internal
* \brief Return action flags for a given primitive resource action
*
* \param[in,out] action Action to get flags for
* \param[in] node If not NULL, limit effects to this node (ignored)
*
* \return Flags appropriate to \p action on \p node
*/
enum pe_action_flags
pcmk__primitive_action_flags(pe_action_t *action, const pe_node_t *node)
{
CRM_ASSERT(action != NULL);
return action->flags;
}
/*!
* \internal
* \brief Check whether a node is a multiply active resource's expected node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p rsc is multiply active with multiple-active set to
* stop_unexpected, and \p node is the node where it will remain active
* \note This assumes that the resource's next role cannot be changed to stopped
* after this is called, which should be reasonable if status has already
* been unpacked and resources have been assigned to nodes.
*/
static bool
is_expected_node(const pe_resource_t *rsc, const pe_node_t *node)
{
return pcmk_all_flags_set(rsc->flags,
pe_rsc_stop_unexpected|pe_rsc_restarting)
&& (rsc->next_role > RSC_ROLE_STOPPED)
&& (rsc->allocated_to != NULL) && (node != NULL)
&& (rsc->allocated_to->details == node->details);
}
/*!
* \internal
* \brief Schedule actions needed to stop a resource wherever it is active
*
* \param[in,out] rsc Resource being stopped
* \param[in] node Node where resource is being stopped (ignored)
* \param[in] optional Whether actions should be optional
*/
static void
stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
pe_node_t *current = (pe_node_t *) iter->data;
pe_action_t *stop = NULL;
if (is_expected_node(rsc, current)) {
/* We are scheduling restart actions for a multiply active resource
* with multiple-active=stop_unexpected, and this is where it should
* not be stopped.
*/
pe_rsc_trace(rsc,
"Skipping stop of multiply active resource %s "
"on expected node %s",
rsc->id, pe__node_name(current));
continue;
}
if (rsc->partial_migration_target != NULL) {
if ((rsc->partial_migration_target->details == current->details)
// Only if the allocated node still is the migration target
&& (rsc->allocated_to != NULL)
&& rsc->allocated_to->details == rsc->partial_migration_target->details) {
pe_rsc_trace(rsc,
"Skipping stop of %s on %s "
"because partial migration there will continue",
rsc->id, pe__node_name(current));
continue;
} else {
pe_rsc_trace(rsc,
"Forcing stop of %s on %s "
"because migration target changed",
rsc->id, pe__node_name(current));
optional = false;
}
}
pe_rsc_trace(rsc, "Scheduling stop of %s on %s",
rsc->id, pe__node_name(current));
stop = stop_action(rsc, current, optional);
if (rsc->allocated_to == NULL) {
pe_action_set_reason(stop, "node availability", true);
} else if (pcmk_all_flags_set(rsc->flags, pe_rsc_restarting
|pe_rsc_stop_unexpected)) {
/* We are stopping a multiply active resource on a node that is
* not its expected node, and we are still scheduling restart
* actions, so the stop is for being multiply active.
*/
pe_action_set_reason(stop, "being multiply active", true);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe__clear_action_flags(stop, pe_action_runnable);
}
if (pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop)) {
pcmk__schedule_cleanup(rsc, current, optional);
}
if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) {
pe_action_t *unfence = pe_fence_op(current, "on", true, NULL, false,
rsc->cluster);
order_actions(stop, unfence, pe_order_implies_first);
if (!pcmk__node_unfenced(current)) {
pe_proc_err("Stopping %s until %s can be unfenced",
rsc->id, pe__node_name(current));
}
}
}
}
/*!
* \internal
* \brief Schedule actions needed to start a resource on a node
*
* \param[in,out] rsc Resource being started
* \param[in] node Node where resource should be started
* \param[in] optional Whether actions should be optional
*/
static void
start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
pe_action_t *start = NULL;
CRM_ASSERT(node != NULL);
pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)",
(optional? "optional" : "required"), rsc->id,
pe__node_name(node), node->weight);
start = start_action(rsc, node, TRUE);
pcmk__order_vs_unfence(rsc, node, start, pe_order_implies_then);
if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) {
pe__clear_action_flags(start, pe_action_optional);
}
if (is_expected_node(rsc, node)) {
/* This could be a problem if the start becomes necessary for other
* reasons later.
*/
pe_rsc_trace(rsc,
"Start of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, pe__node_name(node));
pe__set_action_flags(start, pe_action_pseudo);
}
}
/*!
* \internal
* \brief Schedule actions needed to promote a resource on a node
*
* \param[in,out] rsc Resource being promoted
* \param[in] node Node where resource should be promoted
* \param[in] optional Whether actions should be optional
*/
static void
promote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
GList *iter = NULL;
GList *action_list = NULL;
bool runnable = true;
CRM_ASSERT(node != NULL);
// Any start must be runnable for promotion to be runnable
action_list = pe__resource_actions(rsc, node, RSC_START, true);
for (iter = action_list; iter != NULL; iter = iter->next) {
pe_action_t *start = (pe_action_t *) iter->data;
if (!pcmk_is_set(start->flags, pe_action_runnable)) {
runnable = false;
}
}
g_list_free(action_list);
if (runnable) {
pe_action_t *promote = promote_action(rsc, node, optional);
pe_rsc_trace(rsc, "Scheduling %s promotion of %s on %s",
(optional? "optional" : "required"), rsc->id,
pe__node_name(node));
if (is_expected_node(rsc, node)) {
/* This could be a problem if the promote becomes necessary for
* other reasons later.
*/
pe_rsc_trace(rsc,
"Promotion of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, pe__node_name(node));
pe__set_action_flags(promote, pe_action_pseudo);
}
} else {
pe_rsc_trace(rsc, "Not promoting %s on %s: start unrunnable",
rsc->id, pe__node_name(node));
action_list = pe__resource_actions(rsc, node, RSC_PROMOTE, true);
for (iter = action_list; iter != NULL; iter = iter->next) {
pe_action_t *promote = (pe_action_t *) iter->data;
pe__clear_action_flags(promote, pe_action_runnable);
}
g_list_free(action_list);
}
}
/*!
* \internal
* \brief Schedule actions needed to demote a resource wherever it is active
*
* \param[in,out] rsc Resource being demoted
* \param[in] node Node where resource should be demoted (ignored)
* \param[in] optional Whether actions should be optional
*/
static void
demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
/* Since this will only be called for a primitive (possibly as an instance
* of a collective resource), the resource is multiply active if it is
* running on more than one node, so we want to demote on all of them as
* part of recovery, regardless of which one is the desired node.
*/
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
pe_node_t *current = (pe_node_t *) iter->data;
if (is_expected_node(rsc, current)) {
pe_rsc_trace(rsc,
"Skipping demote of multiply active resource %s "
"on expected node %s",
rsc->id, pe__node_name(current));
} else {
pe_rsc_trace(rsc, "Scheduling %s demotion of %s on %s",
(optional? "optional" : "required"), rsc->id,
pe__node_name(current));
demote_action(rsc, current, optional);
}
}
}
static void
assert_role_error(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
CRM_ASSERT(false);
}
/*!
* \internal
* \brief Schedule cleanup of a resource
*
* \param[in,out] rsc Resource to clean up
* \param[in] node Node to clean up on
* \param[in] optional Whether clean-up should be optional
*/
void
pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, bool optional)
{
/* If the cleanup is required, its orderings are optional, because they're
* relevant only if both actions are required. Conversely, if the cleanup is
* optional, the orderings make the then action required if the first action
* becomes required.
*/
enum pe_ordering flag = optional? pe_order_implies_then : pe_order_optional;
CRM_CHECK((rsc != NULL) && (node != NULL), return);
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed",
rsc->id, pe__node_name(node));
return;
}
if (node->details->unclean || !node->details->online) {
pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable",
rsc->id, pe__node_name(node));
return;
}
crm_notice("Scheduling clean-up of %s on %s", rsc->id, pe__node_name(node));
delete_action(rsc, node, optional);
// stop -> clean-up -> start
pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_DELETE, flag);
pcmk__order_resource_actions(rsc, RSC_DELETE, rsc, RSC_START, flag);
}
/*!
* \internal
* \brief Add primitive meta-attributes relevant to graph actions to XML
*
* \param[in] rsc Primitive resource whose meta-attributes should be added
* \param[in,out] xml Transition graph action attributes XML to add to
*/
void
pcmk__primitive_add_graph_meta(pe_resource_t *rsc, xmlNode *xml)
{
char *name = NULL;
char *value = NULL;
pe_resource_t *parent = NULL;
CRM_ASSERT((rsc != NULL) && (xml != NULL));
/* Clone instance numbers get set internally as meta-attributes, and are
* needed in the transition graph (for example, to tell unique clone
* instances apart).
*/
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION);
if (value != NULL) {
name = crm_meta_name(XML_RSC_ATTR_INCARNATION);
crm_xml_add(xml, name, value);
free(name);
}
// Not sure if this one is really needed ...
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE);
if (value != NULL) {
name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE);
crm_xml_add(xml, name, value);
free(name);
}
/* The container meta-attribute can be set on the primitive itself or one of
* its parents (for example, a group inside a container resource), so check
* them all, and keep the highest one found.
*/
for (parent = rsc; parent != NULL; parent = parent->parent) {
if (parent->container != NULL) {
crm_xml_add(xml, CRM_META "_" XML_RSC_ATTR_CONTAINER,
parent->container->id);
}
}
+
+ /* Bundle replica children will get their external-ip set internally as a
+ * meta-attribute. The graph action needs it, but under a different naming
+ * convention than other meta-attributes.
+ */
+ value = g_hash_table_lookup(rsc->meta, "external-ip");
+ if (value != NULL) {
+ crm_xml_add(xml, "pcmk_external_ip", value);
+ }
}
// Primitive implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__primitive_add_utilization(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *all_rscs, GHashTable *utilization)
{
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return;
}
pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization",
orig_rsc->id, rsc->id);
pcmk__release_node_capacity(utilization, rsc);
}
/*!
* \internal
* \brief Get epoch time of node's shutdown attribute (or now if none)
*
* \param[in] node Node to check
* \param[in] data_set Cluster working set
*
* \return Epoch time corresponding to shutdown attribute if set or now if not
*/
static time_t
shutdown_time(pe_node_t *node, pe_working_set_t *data_set)
{
const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
time_t result = 0;
if (shutdown != NULL) {
long long result_ll;
if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) {
result = (time_t) result_ll;
}
}
return (result == 0)? get_effective_time(data_set) : result;
}
// Primitive implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__primitive_shutdown_lock(pe_resource_t *rsc)
{
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
// Fence devices and remote connections can't be locked
if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches)
|| pe__resource_is_remote_conn(rsc, rsc->cluster)) {
return;
}
if (rsc->lock_node != NULL) {
// The lock was obtained from resource history
if (rsc->running_on != NULL) {
/* The resource was started elsewhere even though it is now
* considered locked. This shouldn't be possible, but as a
* failsafe, we don't want to disturb the resource now.
*/
pe_rsc_info(rsc,
"Cancelling shutdown lock because %s is already active",
rsc->id);
pe__clear_resource_history(rsc, rsc->lock_node, rsc->cluster);
rsc->lock_node = NULL;
rsc->lock_time = 0;
}
// Only a resource active on exactly one node can be locked
} else if (pcmk__list_of_1(rsc->running_on)) {
pe_node_t *node = rsc->running_on->data;
if (node->details->shutdown) {
if (node->details->unclean) {
pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
rsc->id, pe__node_name(node));
} else {
rsc->lock_node = node;
rsc->lock_time = shutdown_time(node, rsc->cluster);
}
}
}
if (rsc->lock_node == NULL) {
// No lock needed
return;
}
if (rsc->cluster->shutdown_lock > 0) {
time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock;
pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
rsc->id, pe__node_name(rsc->lock_node),
(long long) lock_expiration);
pe__update_recheck_time(++lock_expiration, rsc->cluster);
} else {
pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
rsc->id, pe__node_name(rsc->lock_node));
}
// If resource is locked to one node, ban it from all other nodes
for (GList *item = rsc->cluster->nodes; item != NULL; item = item->next) {
pe_node_t *node = item->data;
if (strcmp(node->details->uname, rsc->lock_node->details->uname)) {
resource_location(rsc, node, -CRM_SCORE_INFINITY,
XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster);
}
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:15 PM (21 h, 58 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2002543
Default Alt Text
(100 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment