Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/include/crm/common/action_relation_internal.h b/include/crm/common/action_relation_internal.h
index 4a423459cf..d3aaecb497 100644
--- a/include/crm/common/action_relation_internal.h
+++ b/include/crm/common/action_relation_internal.h
@@ -1,103 +1,106 @@
/*
* Copyright 2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_COMMON_ACTION_RELATION_INTERNAL__H
# define PCMK__CRM_COMMON_ACTION_RELATION_INTERNAL__H
/*!
* Flags to indicate the relationship between two actions
*
* @COMPAT The values and semantics of these flags should not be changed until
* the deprecated enum pe_ordering is dropped from the public API.
*/
enum pcmk__action_relation_flags {
//! No relation (compare with equality rather than bit set)
pcmk__ar_none = 0U,
//! Actions are ordered (optionally, if no other flags are set)
pcmk__ar_ordered = (1U << 0),
//! Relation applies only if 'first' cannot be part of a live migration
pcmk__ar_if_first_unmigratable = (1U << 1),
/*!
* If 'then' is required, 'first' becomes required (and becomes unmigratable
* if 'then' is); also, if 'first' is a stop of a blocked resource, 'then'
* becomes unrunnable
*/
pcmk__ar_then_implies_first = (1U << 4),
/*!
* If 'first' is required, 'then' becomes required; if 'first' is a stop of
* a blocked resource, 'then' becomes unrunnable
*/
pcmk__ar_first_implies_then = (1U << 5),
/*!
* If 'then' is required and for a promoted instance, 'first' becomes
* required (and becomes unmigratable if 'then' is)
*/
pcmk__ar_promoted_then_implies_first = (1U << 6),
/*!
* 'first' is runnable only if 'then' is both runnable and migratable,
* and 'first' becomes required if 'then' is
*/
pcmk__ar_unmigratable_then_blocks = (1U << 7),
//! 'then' is runnable (and migratable) only if 'first' is runnable
pcmk__ar_unrunnable_first_blocks = (1U << 8),
//! If 'first' is unrunnable, 'then' becomes a real, unmigratable action
pcmk__ar_first_else_then = (1U << 9),
//! If 'first' is required, 'then' action for instance on same node is
pcmk__ar_first_implies_same_node_then = (1U << 10),
/*!
* Disable relation if 'first' is unrunnable and for an active resource,
* otherwise order actions and make 'then' unrunnable if 'first' is.
*
* This is used to order a bundle replica's start of its container before a
* probe of its remote connection resource, in case the connection uses the
* REMOTE_CONTAINER_HACK to replace the connection address with where the
* container is running.
*/
pcmk__ar_nested_remote_probe = (1U << 11),
/*!
* If 'first' is for a blocked resource, make 'then' unrunnable.
*
* If 'then' is required, make 'first' required, make 'first' unmigratable
* if 'then' is unmigratable, and make 'then' unrunnable if 'first' is
* unrunnable.
*
* If 'then' is unrunnable and for the same resource as 'first', make
* 'first' required if it is runnable, and make 'first' unmigratable if
* 'then' is unmigratable.
*
* This is used for "stop then start primitive" (restarts) and
* "stop group member then stop previous member".
*/
pcmk__ar_intermediate_stop = (1U << 12),
/*!
* The actions must be serialized if in the same transition but can be in
* either order. (In practice, we always arrange them as 'first' then
* 'then', so they end up being essentially the same as optional orderings.)
*
* @TODO Handle more intelligently -- for example, we could schedule the
* action with the fewest inputs first, so we're more likely to execute at
* least one if there is a failure during the transition. Or, we could
* prefer certain action types over others, or base it on resource priority.
*/
pcmk__ar_serialize = (1U << 14),
+
+ //! Relation applies only if actions are on same node
+ pcmk__ar_if_on_same_node = (1U << 15),
};
#endif // PCMK__CRM_COMMON_ACTION_RELATION_INTERNAL__H
diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c
index 5113ec674b..9fb4fba76d 100644
--- a/lib/pacemaker/pcmk_sched_actions.c
+++ b/lib/pacemaker/pcmk_sched_actions.c
@@ -1,1935 +1,1935 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <sys/param.h>
#include <glib.h>
#include <crm/lrmd_internal.h>
#include <crm/common/scheduler_internal.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Get the action flags relevant to ordering constraints
*
* \param[in,out] action Action to check
* \param[in] node Node that *other* action in the ordering is on
* (used only for clone resource actions)
*
* \return Action flags that should be used for orderings
*/
static uint32_t
action_flags_for_ordering(pe_action_t *action, const pe_node_t *node)
{
bool runnable = false;
uint32_t flags;
// For non-resource actions, return the action flags
if (action->rsc == NULL) {
return action->flags;
}
/* For non-clone resources, or a clone action not assigned to a node,
* return the flags as determined by the resource method without a node
* specified.
*/
flags = action->rsc->cmds->action_flags(action, NULL);
if ((node == NULL) || !pe_rsc_is_clone(action->rsc)) {
return flags;
}
/* Otherwise (i.e., for clone resource actions on a specific node), first
* remember whether the non-node-specific action is runnable.
*/
runnable = pcmk_is_set(flags, pcmk_action_runnable);
// Then recheck the resource method with the node
flags = action->rsc->cmds->action_flags(action, node);
/* For clones in ordering constraints, the node-specific "runnable" doesn't
* matter, just the non-node-specific setting (i.e., is the action runnable
* anywhere).
*
* This applies only to runnable, and only for ordering constraints. This
* function shouldn't be used for other types of constraints without
* changes. Not very satisfying, but it's logical and appears to work well.
*/
if (runnable && !pcmk_is_set(flags, pcmk_action_runnable)) {
pe__set_raw_action_flags(flags, action->rsc->id, pcmk_action_runnable);
}
return flags;
}
/*!
* \internal
* \brief Get action UUID that should be used with a resource ordering
*
* When an action is ordered relative to an action for a collective resource
* (clone, group, or bundle), it actually needs to be ordered after all
* instances of the collective have completed the relevant action (for example,
* given "start CLONE then start RSC", RSC must wait until all instances of
* CLONE have started). Given the UUID and resource of the first action in an
* ordering, this returns the UUID of the action that should actually be used
* for ordering (for example, "CLONE_started_0" instead of "CLONE_start_0").
*
* \param[in] first_uuid UUID of first action in ordering
* \param[in] first_rsc Resource of first action in ordering
*
* \return Newly allocated copy of UUID to use with ordering
* \note It is the caller's responsibility to free the return value.
*/
static char *
action_uuid_for_ordering(const char *first_uuid, const pe_resource_t *first_rsc)
{
guint interval_ms = 0;
char *uuid = NULL;
char *rid = NULL;
char *first_task_str = NULL;
enum action_tasks first_task = pcmk_action_unspecified;
enum action_tasks remapped_task = pcmk_action_unspecified;
// Only non-notify actions for collective resources need remapping
if ((strstr(first_uuid, PCMK_ACTION_NOTIFY) != NULL)
|| (first_rsc->variant < pcmk_rsc_variant_group)) {
goto done;
}
// Only non-recurring actions need remapping
CRM_ASSERT(parse_op_key(first_uuid, &rid, &first_task_str, &interval_ms));
if (interval_ms > 0) {
goto done;
}
first_task = text2task(first_task_str);
switch (first_task) {
case pcmk_action_stop:
case pcmk_action_start:
case pcmk_action_notify:
case pcmk_action_promote:
case pcmk_action_demote:
remapped_task = first_task + 1;
break;
case pcmk_action_stopped:
case pcmk_action_started:
case pcmk_action_notified:
case pcmk_action_promoted:
case pcmk_action_demoted:
remapped_task = first_task;
break;
case pcmk_action_monitor:
case pcmk_action_shutdown:
case pcmk_action_fence:
break;
default:
crm_err("Unknown action '%s' in ordering", first_task_str);
break;
}
if (remapped_task != pcmk_action_unspecified) {
/* If a clone or bundle has notifications enabled, the ordering will be
* relative to when notifications have been sent for the remapped task.
*/
if (pcmk_is_set(first_rsc->flags, pcmk_rsc_notify)
&& (pe_rsc_is_clone(first_rsc) || pe_rsc_is_bundled(first_rsc))) {
uuid = pcmk__notify_key(rid, "confirmed-post",
task2text(remapped_task));
} else {
uuid = pcmk__op_key(rid, task2text(remapped_task), 0);
}
pe_rsc_trace(first_rsc,
"Remapped action UUID %s to %s for ordering purposes",
first_uuid, uuid);
}
done:
if (uuid == NULL) {
uuid = strdup(first_uuid);
CRM_ASSERT(uuid != NULL);
}
free(first_task_str);
free(rid);
return uuid;
}
/*!
* \internal
* \brief Get actual action that should be used with an ordering
*
* When an action is ordered relative to an action for a collective resource
* (clone, group, or bundle), it actually needs to be ordered after all
* instances of the collective have completed the relevant action (for example,
* given "start CLONE then start RSC", RSC must wait until all instances of
* CLONE have started). Given the first action in an ordering, this returns the
* the action that should actually be used for ordering (for example, the
* started action instead of the start action).
*
* \param[in] action First action in an ordering
*
* \return Actual action that should be used for the ordering
*/
static pe_action_t *
action_for_ordering(pe_action_t *action)
{
pe_action_t *result = action;
pe_resource_t *rsc = action->rsc;
if ((rsc != NULL) && (rsc->variant >= pcmk_rsc_variant_group)
&& (action->uuid != NULL)) {
char *uuid = action_uuid_for_ordering(action->uuid, rsc);
result = find_first_action(rsc->actions, uuid, NULL, NULL);
if (result == NULL) {
crm_warn("Not remapping %s to %s because %s does not have "
"remapped action", action->uuid, uuid, rsc->id);
result = action;
}
free(uuid);
}
return result;
}
/*!
* \internal
* \brief Wrapper for update_ordered_actions() method for readability
*
* \param[in,out] rsc Resource to call method for
* \param[in,out] first 'First' action in an ordering
* \param[in,out] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this
* node (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates
* (may include pcmk_action_optional to affect only
* mandatory actions, and pe_action_runnable to
* affect only runnable actions)
* \param[in] type Group of enum pcmk__action_relation_flags to apply
* \param[in,out] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
static inline uint32_t
update(pe_resource_t *rsc, pe_action_t *first, pe_action_t *then,
const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type,
pe_working_set_t *data_set)
{
return rsc->cmds->update_ordered_actions(first, then, node, flags, filter,
type, data_set);
}
/*!
* \internal
* \brief Update flags for ordering's actions appropriately for ordering's flags
*
* \param[in,out] first First action in an ordering
* \param[in,out] then Then action in an ordering
* \param[in] first_flags Action flags for \p first for ordering purposes
* \param[in] then_flags Action flags for \p then for ordering purposes
* \param[in,out] order Action wrapper for \p first in ordering
* \param[in,out] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags
*/
static uint32_t
update_action_for_ordering_flags(pe_action_t *first, pe_action_t *then,
uint32_t first_flags, uint32_t then_flags,
pe_action_wrapper_t *order,
pe_working_set_t *data_set)
{
uint32_t changed = pcmk__updated_none;
/* The node will only be used for clones. If interleaved, node will be NULL,
* otherwise the ordering scope will be limited to the node. Normally, the
* whole 'then' clone should restart if 'first' is restarted, so then->node
* is needed.
*/
pe_node_t *node = then->node;
if (pcmk_is_set(order->type, pcmk__ar_first_implies_same_node_then)) {
/* For unfencing, only instances of 'then' on the same node as 'first'
* (the unfencing operation) should restart, so reset node to
* first->node, at which point this case is handled like a normal
* pcmk__ar_first_implies_then.
*/
pe__clear_order_flags(order->type,
pcmk__ar_first_implies_same_node_then);
pe__set_order_flags(order->type, pcmk__ar_first_implies_then);
node = first->node;
pe_rsc_trace(then->rsc,
"%s then %s: mapped pcmk__ar_first_implies_same_node_then "
"to pcmk__ar_first_implies_then on %s",
first->uuid, then->uuid, pe__node_name(node));
}
if (pcmk_is_set(order->type, pcmk__ar_first_implies_then)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node,
first_flags & pcmk_action_optional,
pcmk_action_optional, pcmk__ar_first_implies_then,
data_set);
} else if (!pcmk_is_set(first_flags, pcmk_action_optional)
&& pcmk_is_set(then->flags, pcmk_action_optional)) {
pe__clear_action_flags(then, pcmk_action_optional);
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
pe_rsc_trace(then->rsc,
"%s then %s: %s after pcmk__ar_first_implies_then",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pcmk__ar_intermediate_stop)
&& (then->rsc != NULL)) {
enum pe_action_flags restart = pcmk_action_optional
|pcmk_action_runnable;
changed |= update(then->rsc, first, then, node, first_flags, restart,
pcmk__ar_intermediate_stop, data_set);
pe_rsc_trace(then->rsc,
"%s then %s: %s after pcmk__ar_intermediate_stop",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pcmk__ar_then_implies_first)) {
if (first->rsc != NULL) {
changed |= update(first->rsc, first, then, node, first_flags,
pcmk_action_optional, pcmk__ar_then_implies_first,
data_set);
} else if (!pcmk_is_set(first_flags, pcmk_action_optional)
&& pcmk_is_set(first->flags, pcmk_action_runnable)) {
pe__clear_action_flags(first, pcmk_action_runnable);
pcmk__set_updated_flags(changed, first, pcmk__updated_first);
}
pe_rsc_trace(then->rsc,
"%s then %s: %s after pcmk__ar_then_implies_first",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pcmk__ar_promoted_then_implies_first)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node,
first_flags & pcmk_action_optional,
pcmk_action_optional,
pcmk__ar_promoted_then_implies_first, data_set);
}
pe_rsc_trace(then->rsc,
"%s then %s: %s after pcmk__ar_promoted_then_implies_first",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_one_or_more)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node, first_flags,
pcmk_action_runnable, pe_order_one_or_more,
data_set);
} else if (pcmk_is_set(first_flags, pcmk_action_runnable)) {
// We have another runnable instance of "first"
then->runnable_before++;
/* Mark "then" as runnable if it requires a certain number of
* "before" instances to be runnable, and they now are.
*/
if ((then->runnable_before >= then->required_runnable_before)
&& !pcmk_is_set(then->flags, pcmk_action_runnable)) {
pe__set_action_flags(then, pcmk_action_runnable);
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_one_or_more",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pcmk__ar_nested_remote_probe)
&& (then->rsc != NULL)) {
if (!pcmk_is_set(first_flags, pcmk_action_runnable)
&& (first->rsc->running_on != NULL)) {
pe_rsc_trace(then->rsc,
"%s then %s: ignoring because first is stopping",
first->uuid, then->uuid);
order->type = pcmk__ar_none;
} else {
changed |= update(then->rsc, first, then, node, first_flags,
pcmk_action_runnable,
pcmk__ar_unrunnable_first_blocks, data_set);
}
pe_rsc_trace(then->rsc,
"%s then %s: %s after pcmk__ar_nested_remote_probe",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pcmk__ar_unrunnable_first_blocks)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node, first_flags,
pcmk_action_runnable,
pcmk__ar_unrunnable_first_blocks, data_set);
} else if (!pcmk_is_set(first_flags, pcmk_action_runnable)
&& pcmk_is_set(then->flags, pcmk_action_runnable)) {
pe__clear_action_flags(then, pcmk_action_runnable);
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
pe_rsc_trace(then->rsc,
"%s then %s: %s after pcmk__ar_unrunnable_first_blocks",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pcmk__ar_unmigratable_then_blocks)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node, first_flags,
pcmk_action_optional,
pcmk__ar_unmigratable_then_blocks, data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after "
"pcmk__ar_unmigratable_then_blocks",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pcmk__ar_first_else_then)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node, first_flags,
pcmk_action_optional, pcmk__ar_first_else_then,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_first_else_then",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pcmk__ar_ordered)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node, first_flags,
pcmk_action_runnable, pcmk__ar_ordered, data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_ordered",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_asymmetrical)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node, first_flags,
pcmk_action_runnable, pe_order_asymmetrical,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_asymmetrical",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(first->flags, pcmk_action_runnable)
&& pcmk_is_set(order->type, pe_order_implies_then_printed)
&& !pcmk_is_set(first_flags, pcmk_action_optional)) {
pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
then->uuid, first->uuid);
pe__set_action_flags(then, pcmk_action_always_in_graph);
// Don't bother marking 'then' as changed just for this
}
if (pcmk_is_set(order->type, pe_order_implies_first_printed)
&& !pcmk_is_set(then_flags, pcmk_action_optional)) {
pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
first->uuid, then->uuid);
pe__set_action_flags(first, pcmk_action_always_in_graph);
// Don't bother marking 'first' as changed just for this
}
if (pcmk_any_flags_set(order->type, pcmk__ar_first_implies_then
|pcmk__ar_then_implies_first
|pcmk__ar_intermediate_stop)
&& (first->rsc != NULL)
&& !pcmk_is_set(first->rsc->flags, pcmk_rsc_managed)
&& pcmk_is_set(first->rsc->flags, pcmk_rsc_blocked)
&& !pcmk_is_set(first->flags, pcmk_action_runnable)
&& pcmk__str_eq(first->task, PCMK_ACTION_STOP, pcmk__str_none)) {
if (pcmk_is_set(then->flags, pcmk_action_runnable)) {
pe__clear_action_flags(then, pcmk_action_runnable);
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after checking whether first "
"is blocked, unmanaged, unrunnable stop",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
return changed;
}
// Convenience macros for logging action properties
#define action_type_str(flags) \
(pcmk_is_set((flags), pcmk_action_pseudo)? "pseudo-action" : "action")
#define action_optional_str(flags) \
(pcmk_is_set((flags), pcmk_action_optional)? "optional" : "required")
#define action_runnable_str(flags) \
(pcmk_is_set((flags), pcmk_action_runnable)? "runnable" : "unrunnable")
#define action_node_str(a) \
(((a)->node == NULL)? "no node" : (a)->node->details->uname)
/*!
* \internal
* \brief Update an action's flags for all orderings where it is "then"
*
* \param[in,out] then Action to update
* \param[in,out] data_set Cluster working set
*/
void
pcmk__update_action_for_orderings(pe_action_t *then, pe_working_set_t *data_set)
{
GList *lpc = NULL;
uint32_t changed = pcmk__updated_none;
int last_flags = then->flags;
pe_rsc_trace(then->rsc, "Updating %s %s (%s %s) on %s",
action_type_str(then->flags), then->uuid,
action_optional_str(then->flags),
action_runnable_str(then->flags), action_node_str(then));
if (pcmk_is_set(then->flags, pcmk_action_min_runnable)) {
/* Initialize current known "runnable before" actions. As
* update_action_for_ordering_flags() is called for each of then's
* before actions, this number will increment as runnable 'first'
* actions are encountered.
*/
then->runnable_before = 0;
if (then->required_runnable_before == 0) {
/* @COMPAT This ordering constraint uses the deprecated
* "require-all=false" attribute. Treat it like "clone-min=1".
*/
then->required_runnable_before = 1;
}
/* The pe_order_one_or_more clause of update_action_for_ordering_flags()
* (called below) will reset runnable if appropriate.
*/
pe__clear_action_flags(then, pcmk_action_runnable);
}
for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
pe_action_t *first = other->action;
pe_node_t *then_node = then->node;
pe_node_t *first_node = first->node;
if ((first->rsc != NULL)
&& (first->rsc->variant == pcmk_rsc_variant_group)
&& pcmk__str_eq(first->task, PCMK_ACTION_START, pcmk__str_none)) {
first_node = first->rsc->fns->location(first->rsc, NULL, FALSE);
if (first_node != NULL) {
pe_rsc_trace(first->rsc, "Found %s for 'first' %s",
pe__node_name(first_node), first->uuid);
}
}
if ((then->rsc != NULL)
&& (then->rsc->variant == pcmk_rsc_variant_group)
&& pcmk__str_eq(then->task, PCMK_ACTION_START, pcmk__str_none)) {
then_node = then->rsc->fns->location(then->rsc, NULL, FALSE);
if (then_node != NULL) {
pe_rsc_trace(then->rsc, "Found %s for 'then' %s",
pe__node_name(then_node), then->uuid);
}
}
// Disable constraint if it only applies when on same node, but isn't
- if (pcmk_is_set(other->type, pe_order_same_node)
+ if (pcmk_is_set(other->type, pcmk__ar_if_on_same_node)
&& (first_node != NULL) && (then_node != NULL)
&& !pe__same_node(first_node, then_node)) {
pe_rsc_trace(then->rsc,
"Disabled ordering %s on %s then %s on %s: "
"not same node",
other->action->uuid, pe__node_name(first_node),
then->uuid, pe__node_name(then_node));
other->type = pcmk__ar_none;
continue;
}
pcmk__clear_updated_flags(changed, then, pcmk__updated_first);
if ((first->rsc != NULL)
&& pcmk_is_set(other->type, pe_order_then_cancels_first)
&& !pcmk_is_set(then->flags, pcmk_action_optional)) {
/* 'then' is required, so we must abandon 'first'
* (e.g. a required stop cancels any agent reload).
*/
pe__set_action_flags(other->action, pcmk_action_optional);
if (!strcmp(first->task, PCMK_ACTION_RELOAD_AGENT)) {
pe__clear_resource_flags(first->rsc, pcmk_rsc_reload);
}
}
if ((first->rsc != NULL) && (then->rsc != NULL)
&& (first->rsc != then->rsc) && !is_parent(then->rsc, first->rsc)) {
first = action_for_ordering(first);
}
if (first != other->action) {
pe_rsc_trace(then->rsc, "Ordering %s after %s instead of %s",
then->uuid, first->uuid, other->action->uuid);
}
pe_rsc_trace(then->rsc,
"%s (%#.6x) then %s (%#.6x): type=%#.6x node=%s",
first->uuid, first->flags, then->uuid, then->flags,
other->type, action_node_str(first));
if (first == other->action) {
/* 'first' was not remapped (e.g. from 'start' to 'running'), which
* could mean it is a non-resource action, a primitive resource
* action, or already expanded.
*/
uint32_t first_flags, then_flags;
first_flags = action_flags_for_ordering(first, then_node);
then_flags = action_flags_for_ordering(then, first_node);
changed |= update_action_for_ordering_flags(first, then,
first_flags, then_flags,
other, data_set);
/* 'first' was for a complex resource (clone, group, etc),
* create a new dependency if necessary
*/
} else if (order_actions(first, then, other->type)) {
/* This was the first time 'first' and 'then' were associated,
* start again to get the new actions_before list
*/
pcmk__set_updated_flags(changed, then, pcmk__updated_then);
pe_rsc_trace(then->rsc,
"Disabled ordering %s then %s in favor of %s then %s",
other->action->uuid, then->uuid, first->uuid,
then->uuid);
other->type = pcmk__ar_none;
}
if (pcmk_is_set(changed, pcmk__updated_first)) {
crm_trace("Re-processing %s and its 'after' actions "
"because it changed", first->uuid);
for (GList *lpc2 = first->actions_after; lpc2 != NULL;
lpc2 = lpc2->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc2->data;
pcmk__update_action_for_orderings(other->action, data_set);
}
pcmk__update_action_for_orderings(first, data_set);
}
}
if (pcmk_is_set(then->flags, pcmk_action_min_runnable)) {
if (last_flags == then->flags) {
pcmk__clear_updated_flags(changed, then, pcmk__updated_then);
} else {
pcmk__set_updated_flags(changed, then, pcmk__updated_then);
}
}
if (pcmk_is_set(changed, pcmk__updated_then)) {
crm_trace("Re-processing %s and its 'after' actions because it changed",
then->uuid);
if (pcmk_is_set(last_flags, pcmk_action_runnable)
&& !pcmk_is_set(then->flags, pcmk_action_runnable)) {
pcmk__block_colocation_dependents(then);
}
pcmk__update_action_for_orderings(then, data_set);
for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
pcmk__update_action_for_orderings(other->action, data_set);
}
}
}
static inline bool
is_primitive_action(const pe_action_t *action)
{
return (action != NULL) && (action->rsc != NULL)
&& (action->rsc->variant == pcmk_rsc_variant_primitive);
}
/*!
* \internal
* \brief Clear a single action flag and set reason text
*
* \param[in,out] action Action whose flag should be cleared
* \param[in] flag Action flag that should be cleared
* \param[in] reason Action that is the reason why flag is being cleared
*/
#define clear_action_flag_because(action, flag, reason) do { \
if (pcmk_is_set((action)->flags, (flag))) { \
pe__clear_action_flags(action, flag); \
if ((action)->rsc != (reason)->rsc) { \
char *reason_text = pe__action2reason((reason), (flag)); \
pe_action_set_reason((action), reason_text, false); \
free(reason_text); \
} \
} \
} while (0)
/*!
* \internal
* \brief Update actions in an asymmetric ordering
*
* If the "first" action in an asymmetric ordering is unrunnable, make the
* "second" action unrunnable as well, if appropriate.
*
* \param[in] first 'First' action in an asymmetric ordering
* \param[in,out] then 'Then' action in an asymmetric ordering
*/
static void
handle_asymmetric_ordering(const pe_action_t *first, pe_action_t *then)
{
/* Only resource actions after an unrunnable 'first' action need updates for
* asymmetric ordering.
*/
if ((then->rsc == NULL)
|| pcmk_is_set(first->flags, pcmk_action_runnable)) {
return;
}
// Certain optional 'then' actions are unaffected by unrunnable 'first'
if (pcmk_is_set(then->flags, pcmk_action_optional)) {
enum rsc_role_e then_rsc_role = then->rsc->fns->state(then->rsc, TRUE);
if ((then_rsc_role == pcmk_role_stopped)
&& pcmk__str_eq(then->task, PCMK_ACTION_STOP, pcmk__str_none)) {
/* If 'then' should stop after 'first' but is already stopped, the
* ordering is irrelevant.
*/
return;
} else if ((then_rsc_role >= pcmk_role_started)
&& pcmk__str_eq(then->task, PCMK_ACTION_START, pcmk__str_none)
&& pe__rsc_running_on_only(then->rsc, then->node)) {
/* Similarly if 'then' should start after 'first' but is already
* started on a single node.
*/
return;
}
}
// 'First' can't run, so 'then' can't either
clear_action_flag_because(then, pcmk_action_optional, first);
clear_action_flag_because(then, pcmk_action_runnable, first);
}
/*!
* \internal
* \brief Set action bits appropriately when pe_restart_order is used
*
* \param[in,out] first 'First' action in an ordering with pe_restart_order
* \param[in,out] then 'Then' action in an ordering with pe_restart_order
* \param[in] filter What action flags to care about
*
* \note pe_restart_order is set for "stop resource before starting it" and
* "stop later group member before stopping earlier group member"
*/
static void
handle_restart_ordering(pe_action_t *first, pe_action_t *then, uint32_t filter)
{
const char *reason = NULL;
CRM_ASSERT(is_primitive_action(first));
CRM_ASSERT(is_primitive_action(then));
// We need to update the action in two cases:
// ... if 'then' is required
if (pcmk_is_set(filter, pcmk_action_optional)
&& !pcmk_is_set(then->flags, pcmk_action_optional)) {
reason = "restart";
}
/* ... if 'then' is unrunnable action on same resource (if a resource
* should restart but can't start, we still want to stop)
*/
if (pcmk_is_set(filter, pcmk_action_runnable)
&& !pcmk_is_set(then->flags, pcmk_action_runnable)
&& pcmk_is_set(then->rsc->flags, pcmk_rsc_managed)
&& (first->rsc == then->rsc)) {
reason = "stop";
}
if (reason == NULL) {
return;
}
pe_rsc_trace(first->rsc, "Handling %s -> %s for %s",
first->uuid, then->uuid, reason);
// Make 'first' required if it is runnable
if (pcmk_is_set(first->flags, pcmk_action_runnable)) {
clear_action_flag_because(first, pcmk_action_optional, then);
}
// Make 'first' required if 'then' is required
if (!pcmk_is_set(then->flags, pcmk_action_optional)) {
clear_action_flag_because(first, pcmk_action_optional, then);
}
// Make 'first' unmigratable if 'then' is unmigratable
if (!pcmk_is_set(then->flags, pcmk_action_migratable)) {
clear_action_flag_because(first, pcmk_action_migratable, then);
}
// Make 'then' unrunnable if 'first' is required but unrunnable
if (!pcmk_is_set(first->flags, pcmk_action_optional)
&& !pcmk_is_set(first->flags, pcmk_action_runnable)) {
clear_action_flag_because(then, pcmk_action_runnable, first);
}
}
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two actions, update the actions' flags
* (and runnable_before members if appropriate) as appropriate for the ordering.
* Effects may cascade to other orderings involving the actions as well.
*
* \param[in,out] first 'First' action in an ordering
* \param[in,out] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* (ignored)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pcmk_action_optional to affect only
* mandatory actions, and pcmk_action_runnable to
* affect only runnable actions)
* \param[in] type Group of enum pcmk__action_relation_flags to apply
* \param[in,out] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t
pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then,
const pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set)
{
uint32_t changed = pcmk__updated_none;
uint32_t then_flags = 0U;
uint32_t first_flags = 0U;
CRM_ASSERT((first != NULL) && (then != NULL) && (data_set != NULL));
then_flags = then->flags;
first_flags = first->flags;
if (pcmk_is_set(type, pe_order_asymmetrical)) {
handle_asymmetric_ordering(first, then);
}
if (pcmk_is_set(type, pcmk__ar_then_implies_first)
&& !pcmk_is_set(then_flags, pcmk_action_optional)) {
// Then is required, and implies first should be, too
if (pcmk_is_set(filter, pcmk_action_optional)
&& !pcmk_is_set(flags, pcmk_action_optional)
&& pcmk_is_set(first_flags, pcmk_action_optional)) {
clear_action_flag_because(first, pcmk_action_optional, then);
}
if (pcmk_is_set(flags, pcmk_action_migratable)
&& !pcmk_is_set(then->flags, pcmk_action_migratable)) {
clear_action_flag_because(first, pcmk_action_migratable, then);
}
}
if (pcmk_is_set(type, pcmk__ar_promoted_then_implies_first)
&& (then->rsc != NULL) && (then->rsc->role == pcmk_role_promoted)
&& pcmk_is_set(filter, pcmk_action_optional)
&& !pcmk_is_set(then->flags, pcmk_action_optional)) {
clear_action_flag_because(first, pcmk_action_optional, then);
if (pcmk_is_set(first->flags, pcmk_action_migratable)
&& !pcmk_is_set(then->flags, pcmk_action_migratable)) {
clear_action_flag_because(first, pcmk_action_migratable, then);
}
}
if (pcmk_is_set(type, pcmk__ar_unmigratable_then_blocks)
&& pcmk_is_set(filter, pcmk_action_optional)) {
if (!pcmk_all_flags_set(then->flags, pcmk_action_migratable
|pcmk_action_runnable)) {
clear_action_flag_because(first, pcmk_action_runnable, then);
}
if (!pcmk_is_set(then->flags, pcmk_action_optional)) {
clear_action_flag_because(first, pcmk_action_optional, then);
}
}
if (pcmk_is_set(type, pcmk__ar_first_else_then)
&& pcmk_is_set(filter, pcmk_action_optional)
&& !pcmk_is_set(first->flags, pcmk_action_runnable)) {
clear_action_flag_because(then, pcmk_action_migratable, first);
pe__clear_action_flags(then, pcmk_action_pseudo);
}
if (pcmk_is_set(type, pcmk__ar_unrunnable_first_blocks)
&& pcmk_is_set(filter, pcmk_action_runnable)
&& pcmk_is_set(then->flags, pcmk_action_runnable)
&& !pcmk_is_set(flags, pcmk_action_runnable)) {
clear_action_flag_because(then, pcmk_action_runnable, first);
clear_action_flag_because(then, pcmk_action_migratable, first);
}
if (pcmk_is_set(type, pcmk__ar_first_implies_then)
&& pcmk_is_set(filter, pcmk_action_optional)
&& pcmk_is_set(then->flags, pcmk_action_optional)
&& !pcmk_is_set(flags, pcmk_action_optional)
&& !pcmk_is_set(first->flags, pcmk_action_migratable)) {
clear_action_flag_because(then, pcmk_action_optional, first);
}
if (pcmk_is_set(type, pcmk__ar_intermediate_stop)) {
handle_restart_ordering(first, then, filter);
}
if (then_flags != then->flags) {
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
pe_rsc_trace(then->rsc,
"%s on %s: flags are now %#.6x (was %#.6x) "
"because of 'first' %s (%#.6x)",
then->uuid, pe__node_name(then->node),
then->flags, then_flags, first->uuid, first->flags);
if ((then->rsc != NULL) && (then->rsc->parent != NULL)) {
// Required to handle "X_stop then X_start" for cloned groups
pcmk__update_action_for_orderings(then, data_set);
}
}
if (first_flags != first->flags) {
pcmk__set_updated_flags(changed, first, pcmk__updated_first);
pe_rsc_trace(first->rsc,
"%s on %s: flags are now %#.6x (was %#.6x) "
"because of 'then' %s (%#.6x)",
first->uuid, pe__node_name(first->node),
first->flags, first_flags, then->uuid, then->flags);
}
return changed;
}
/*!
* \internal
* \brief Trace-log an action (optionally with its dependent actions)
*
* \param[in] pre_text If not NULL, prefix the log with this plus ": "
* \param[in] action Action to log
* \param[in] details If true, recursively log dependent actions
*/
void
pcmk__log_action(const char *pre_text, const pe_action_t *action, bool details)
{
const char *node_uname = NULL;
const char *node_uuid = NULL;
const char *desc = NULL;
CRM_CHECK(action != NULL, return);
if (!pcmk_is_set(action->flags, pcmk_action_pseudo)) {
if (action->node != NULL) {
node_uname = action->node->details->uname;
node_uuid = action->node->details->id;
} else {
node_uname = "<none>";
}
}
switch (text2task(action->task)) {
case pcmk_action_fence:
case pcmk_action_shutdown:
if (pcmk_is_set(action->flags, pcmk_action_pseudo)) {
desc = "Pseudo ";
} else if (pcmk_is_set(action->flags, pcmk_action_optional)) {
desc = "Optional ";
} else if (!pcmk_is_set(action->flags, pcmk_action_runnable)) {
desc = "!!Non-Startable!! ";
} else {
desc = "(Provisional) ";
}
crm_trace("%s%s%sAction %d: %s%s%s%s%s%s",
((pre_text == NULL)? "" : pre_text),
((pre_text == NULL)? "" : ": "),
desc, action->id, action->uuid,
(node_uname? "\ton " : ""), (node_uname? node_uname : ""),
(node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
(node_uuid? ")" : ""));
break;
default:
if (pcmk_is_set(action->flags, pcmk_action_optional)) {
desc = "Optional ";
} else if (pcmk_is_set(action->flags, pcmk_action_pseudo)) {
desc = "Pseudo ";
} else if (!pcmk_is_set(action->flags, pcmk_action_runnable)) {
desc = "!!Non-Startable!! ";
} else {
desc = "(Provisional) ";
}
crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s",
((pre_text == NULL)? "" : pre_text),
((pre_text == NULL)? "" : ": "),
desc, action->id, action->uuid,
(action->rsc? action->rsc->id : "<none>"),
(node_uname? "\ton " : ""), (node_uname? node_uname : ""),
(node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
(node_uuid? ")" : ""));
break;
}
if (details) {
const GList *iter = NULL;
const pe_action_wrapper_t *other = NULL;
crm_trace("\t\t====== Preceding Actions");
for (iter = action->actions_before; iter != NULL; iter = iter->next) {
other = (const pe_action_wrapper_t *) iter->data;
pcmk__log_action("\t\t", other->action, false);
}
crm_trace("\t\t====== Subsequent Actions");
for (iter = action->actions_after; iter != NULL; iter = iter->next) {
other = (const pe_action_wrapper_t *) iter->data;
pcmk__log_action("\t\t", other->action, false);
}
crm_trace("\t\t====== End");
} else {
crm_trace("\t\t(before=%d, after=%d)",
g_list_length(action->actions_before),
g_list_length(action->actions_after));
}
}
/*!
* \internal
* \brief Create a new shutdown action for a node
*
* \param[in,out] node Node being shut down
*
* \return Newly created shutdown action for \p node
*/
pe_action_t *
pcmk__new_shutdown_action(pe_node_t *node)
{
char *shutdown_id = NULL;
pe_action_t *shutdown_op = NULL;
CRM_ASSERT(node != NULL);
shutdown_id = crm_strdup_printf("%s-%s", PCMK_ACTION_DO_SHUTDOWN,
node->details->uname);
shutdown_op = custom_action(NULL, shutdown_id, PCMK_ACTION_DO_SHUTDOWN,
node, FALSE, TRUE, node->details->data_set);
pcmk__order_stops_before_shutdown(node, shutdown_op);
add_hash_param(shutdown_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
return shutdown_op;
}
/*!
* \internal
* \brief Calculate and add an operation digest to XML
*
* Calculate an operation digest, which enables us to later determine when a
* restart is needed due to the resource's parameters being changed, and add it
* to given XML.
*
* \param[in] op Operation result from executor
* \param[in,out] update XML to add digest to
*/
static void
add_op_digest_to_xml(const lrmd_event_data_t *op, xmlNode *update)
{
char *digest = NULL;
xmlNode *args_xml = NULL;
if (op->params == NULL) {
return;
}
args_xml = create_xml_node(NULL, XML_TAG_PARAMS);
g_hash_table_foreach(op->params, hash2field, args_xml);
pcmk__filter_op_for_digest(args_xml);
digest = calculate_operation_digest(args_xml, NULL);
crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest);
free_xml(args_xml);
free(digest);
}
#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
/*!
* \internal
* \brief Create XML for resource operation history update
*
* \param[in,out] parent Parent XML node to add to
* \param[in,out] op Operation event data
* \param[in] caller_version DC feature set
* \param[in] target_rc Expected result of operation
* \param[in] node Name of node on which operation was performed
* \param[in] origin Arbitrary description of update source
*
* \return Newly created XML node for history update
*/
xmlNode *
pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
const char *caller_version, int target_rc,
const char *node, const char *origin)
{
char *key = NULL;
char *magic = NULL;
char *op_id = NULL;
char *op_id_additional = NULL;
char *local_user_data = NULL;
const char *exit_reason = NULL;
xmlNode *xml_op = NULL;
const char *task = NULL;
CRM_CHECK(op != NULL, return NULL);
crm_trace("Creating history XML for %s-interval %s action for %s on %s "
"(DC version: %s, origin: %s)",
pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
((node == NULL)? "no node" : node), caller_version, origin);
task = op->op_type;
/* Record a successful agent reload as a start, and a failed one as a
* monitor, to make life easier for the scheduler when determining the
* current state.
*
* @COMPAT We should check "reload" here only if the operation was for a
* pre-OCF-1.1 resource agent, but we don't know that here, and we should
* only ever get results for actions scheduled by us, so we can reasonably
* assume any "reload" is actually a pre-1.1 agent reload.
*/
if (pcmk__str_any_of(task, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT,
NULL)) {
if (op->op_status == PCMK_EXEC_DONE) {
task = PCMK_ACTION_START;
} else {
task = PCMK_ACTION_MONITOR;
}
}
key = pcmk__op_key(op->rsc_id, task, op->interval_ms);
if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none)) {
const char *n_type = crm_meta_value(op->params, "notify_type");
const char *n_task = crm_meta_value(op->params, "notify_operation");
CRM_LOG_ASSERT(n_type != NULL);
CRM_LOG_ASSERT(n_task != NULL);
op_id = pcmk__notify_key(op->rsc_id, n_type, n_task);
if (op->op_status != PCMK_EXEC_PENDING) {
/* Ignore notify errors.
*
* @TODO It might be better to keep the correct result here, and
* ignore it in process_graph_event().
*/
lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
}
/* Migration history is preserved separately, which usually matters for
* multiple nodes and is important for future cluster transitions.
*/
} else if (pcmk__str_any_of(op->op_type, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
op_id = strdup(key);
} else if (did_rsc_op_fail(op, target_rc)) {
op_id = pcmk__op_key(op->rsc_id, "last_failure", 0);
if (op->interval_ms == 0) {
// Ensure 'last' gets updated, in case record-pending is true
op_id_additional = pcmk__op_key(op->rsc_id, "last", 0);
}
exit_reason = op->exit_reason;
} else if (op->interval_ms > 0) {
op_id = strdup(key);
} else {
op_id = pcmk__op_key(op->rsc_id, "last", 0);
}
again:
xml_op = pcmk__xe_match(parent, XML_LRM_TAG_RSC_OP, XML_ATTR_ID, op_id);
if (xml_op == NULL) {
xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP);
}
if (op->user_data == NULL) {
crm_debug("Generating fake transition key for: " PCMK__OP_FMT
" %d from %s", op->rsc_id, op->op_type, op->interval_ms,
op->call_id, origin);
local_user_data = pcmk__transition_key(-1, op->call_id, target_rc,
FAKE_TE_ID);
op->user_data = local_user_data;
}
if (magic == NULL) {
magic = crm_strdup_printf("%d:%d;%s", op->op_status, op->rc,
(const char *) op->user_data);
}
crm_xml_add(xml_op, XML_ATTR_ID, op_id);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task);
crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin);
crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic);
crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, pcmk__s(exit_reason, ""));
crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); // For context during triage
crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id);
crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc);
crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status);
crm_xml_add_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, op->interval_ms);
if (compare_version("2.1", caller_version) <= 0) {
if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) {
crm_trace("Timing data (" PCMK__OP_FMT
"): last=%u change=%u exec=%u queue=%u",
op->rsc_id, op->op_type, op->interval_ms,
op->t_run, op->t_rcchange, op->exec_time, op->queue_time);
if ((op->interval_ms != 0) && (op->t_rcchange != 0)) {
// Recurring ops may have changed rc after initial run
crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
(long long) op->t_rcchange);
} else {
crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
(long long) op->t_run);
}
crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time);
crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time);
}
}
if (pcmk__str_any_of(op->op_type, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
/*
* Record migrate_source and migrate_target always for migrate ops.
*/
const char *name = XML_LRM_ATTR_MIGRATE_SOURCE;
crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
name = XML_LRM_ATTR_MIGRATE_TARGET;
crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
}
add_op_digest_to_xml(op, xml_op);
if (op_id_additional) {
free(op_id);
op_id = op_id_additional;
op_id_additional = NULL;
goto again;
}
if (local_user_data) {
free(local_user_data);
op->user_data = NULL;
}
free(magic);
free(op_id);
free(key);
return xml_op;
}
/*!
* \internal
* \brief Check whether an action shutdown-locks a resource to a node
*
* If the shutdown-lock cluster property is set, resources will not be recovered
* on a different node if cleanly stopped, and may start only on that same node.
* This function checks whether that applies to a given action, so that the
* transition graph can be marked appropriately.
*
* \param[in] action Action to check
*
* \return true if \p action locks its resource to the action's node,
* otherwise false
*/
bool
pcmk__action_locks_rsc_to_node(const pe_action_t *action)
{
// Only resource actions taking place on resource's lock node are locked
if ((action == NULL) || (action->rsc == NULL)
|| !pe__same_node(action->node, action->rsc->lock_node)) {
return false;
}
/* During shutdown, only stops are locked (otherwise, another action such as
* a demote would cause the controller to clear the lock)
*/
if (action->node->details->shutdown && (action->task != NULL)
&& (strcmp(action->task, PCMK_ACTION_STOP) != 0)) {
return false;
}
return true;
}
/* lowest to highest */
static gint
sort_action_id(gconstpointer a, gconstpointer b)
{
const pe_action_wrapper_t *action_wrapper2 = (const pe_action_wrapper_t *)a;
const pe_action_wrapper_t *action_wrapper1 = (const pe_action_wrapper_t *)b;
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (action_wrapper1->action->id < action_wrapper2->action->id) {
return 1;
}
if (action_wrapper1->action->id > action_wrapper2->action->id) {
return -1;
}
return 0;
}
/*!
* \internal
* \brief Remove any duplicate action inputs, merging action flags
*
* \param[in,out] action Action whose inputs should be checked
*/
void
pcmk__deduplicate_action_inputs(pe_action_t *action)
{
GList *item = NULL;
GList *next = NULL;
pe_action_wrapper_t *last_input = NULL;
action->actions_before = g_list_sort(action->actions_before,
sort_action_id);
for (item = action->actions_before; item != NULL; item = next) {
pe_action_wrapper_t *input = (pe_action_wrapper_t *) item->data;
next = item->next;
if ((last_input != NULL)
&& (input->action->id == last_input->action->id)) {
crm_trace("Input %s (%d) duplicate skipped for action %s (%d)",
input->action->uuid, input->action->id,
action->uuid, action->id);
/* For the purposes of scheduling, the ordering flags no longer
* matter, but crm_simulate looks at certain ones when creating a
* dot graph. Combining the flags is sufficient for that purpose.
*/
last_input->type |= input->type;
if (input->state == pe_link_dumped) {
last_input->state = pe_link_dumped;
}
free(item->data);
action->actions_before = g_list_delete_link(action->actions_before,
item);
} else {
last_input = input;
input->state = pe_link_not_dumped;
}
}
}
/*!
* \internal
* \brief Output all scheduled actions
*
* \param[in,out] data_set Cluster working set
*/
void
pcmk__output_actions(pe_working_set_t *data_set)
{
pcmk__output_t *out = data_set->priv;
// Output node (non-resource) actions
for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) {
char *node_name = NULL;
char *task = NULL;
pe_action_t *action = (pe_action_t *) iter->data;
if (action->rsc != NULL) {
continue; // Resource actions will be output later
} else if (pcmk_is_set(action->flags, pcmk_action_optional)) {
continue; // This action was not scheduled
}
if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN,
pcmk__str_none)) {
task = strdup("Shutdown");
} else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH,
pcmk__str_none)) {
const char *op = g_hash_table_lookup(action->meta,
"stonith_action");
task = crm_strdup_printf("Fence (%s)", op);
} else {
continue; // Don't display other node action types
}
if (pe__is_guest_node(action->node)) {
const pe_resource_t *remote = action->node->details->remote_rsc;
node_name = crm_strdup_printf("%s (resource: %s)",
pe__node_name(action->node),
remote->container->id);
} else if (action->node != NULL) {
node_name = crm_strdup_printf("%s", pe__node_name(action->node));
}
out->message(out, "node-action", task, node_name, action->reason);
free(node_name);
free(task);
}
// Output resource actions
for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
rsc->cmds->output_actions(rsc);
}
}
/*!
* \internal
* \brief Check whether action from resource history is still in configuration
*
* \param[in] rsc Resource that action is for
* \param[in] task Action's name
* \param[in] interval_ms Action's interval (in milliseconds)
*
* \return true if action is still in resource configuration, otherwise false
*/
static bool
action_in_config(const pe_resource_t *rsc, const char *task, guint interval_ms)
{
char *key = pcmk__op_key(rsc->id, task, interval_ms);
bool config = (find_rsc_op_entry(rsc, key) != NULL);
free(key);
return config;
}
/*!
* \internal
* \brief Get action name needed to compare digest for configuration changes
*
* \param[in] task Action name from history
* \param[in] interval_ms Action interval (in milliseconds)
*
* \return Action name whose digest should be compared
*/
static const char *
task_for_digest(const char *task, guint interval_ms)
{
/* Certain actions need to be compared against the parameters used to start
* the resource.
*/
if ((interval_ms == 0)
&& pcmk__str_any_of(task, PCMK_ACTION_MONITOR, PCMK_ACTION_MIGRATE_FROM,
PCMK_ACTION_PROMOTE, NULL)) {
task = PCMK_ACTION_START;
}
return task;
}
/*!
* \internal
* \brief Check whether only sanitized parameters to an action changed
*
* When collecting CIB files for troubleshooting, crm_report will mask
* sensitive resource parameters. If simulations were run using that, affected
* resources would appear to need a restart, which would complicate
* troubleshooting. To avoid that, we save a "secure digest" of non-sensitive
* parameters. This function used that digest to check whether only masked
* parameters are different.
*
* \param[in] xml_op Resource history entry with secure digest
* \param[in] digest_data Operation digest information being compared
* \param[in] data_set Cluster working set
*
* \return true if only sanitized parameters changed, otherwise false
*/
static bool
only_sanitized_changed(const xmlNode *xml_op,
const op_digest_cache_t *digest_data,
const pe_working_set_t *data_set)
{
const char *digest_secure = NULL;
if (!pcmk_is_set(data_set->flags, pcmk_sched_sanitized)) {
// The scheduler is not being run as a simulation
return false;
}
digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
return (digest_data->rc != RSC_DIGEST_MATCH) && (digest_secure != NULL)
&& (digest_data->digest_secure_calc != NULL)
&& (strcmp(digest_data->digest_secure_calc, digest_secure) == 0);
}
/*!
* \internal
* \brief Force a restart due to a configuration change
*
* \param[in,out] rsc Resource that action is for
* \param[in] task Name of action whose configuration changed
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in,out] node Node where resource should be restarted
*/
static void
force_restart(pe_resource_t *rsc, const char *task, guint interval_ms,
pe_node_t *node)
{
char *key = pcmk__op_key(rsc->id, task, interval_ms);
pe_action_t *required = custom_action(rsc, key, task, NULL, FALSE, TRUE,
rsc->cluster);
pe_action_set_reason(required, "resource definition change", true);
trigger_unfencing(rsc, node, "Device parameters changed", NULL,
rsc->cluster);
}
/*!
* \internal
* \brief Schedule a reload of a resource on a node
*
* \param[in,out] data Resource to reload
* \param[in] user_data Where resource should be reloaded
*/
static void
schedule_reload(gpointer data, gpointer user_data)
{
pe_resource_t *rsc = data;
const pe_node_t *node = user_data;
pe_action_t *reload = NULL;
// For collective resources, just call recursively for children
if (rsc->variant > pcmk_rsc_variant_primitive) {
g_list_foreach(rsc->children, schedule_reload, user_data);
return;
}
// Skip the reload in certain situations
if ((node == NULL)
|| !pcmk_is_set(rsc->flags, pcmk_rsc_managed)
|| pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
pe_rsc_trace(rsc, "Skip reload of %s:%s%s %s",
rsc->id,
pcmk_is_set(rsc->flags, pcmk_rsc_managed)? "" : " unmanaged",
pcmk_is_set(rsc->flags, pcmk_rsc_failed)? " failed" : "",
(node == NULL)? "inactive" : node->details->uname);
return;
}
/* If a resource's configuration changed while a start was pending,
* force a full restart instead of a reload.
*/
if (pcmk_is_set(rsc->flags, pcmk_rsc_start_pending)) {
pe_rsc_trace(rsc, "%s: preventing agent reload because start pending",
rsc->id);
custom_action(rsc, stop_key(rsc), PCMK_ACTION_STOP, node, FALSE, TRUE,
rsc->cluster);
return;
}
// Schedule the reload
pe__set_resource_flags(rsc, pcmk_rsc_reload);
reload = custom_action(rsc, reload_key(rsc), PCMK_ACTION_RELOAD_AGENT, node,
FALSE, TRUE, rsc->cluster);
pe_action_set_reason(reload, "resource definition change", FALSE);
// Set orderings so that a required stop or demote cancels the reload
pcmk__new_ordering(NULL, NULL, reload, rsc, stop_key(rsc), NULL,
pcmk__ar_ordered|pe_order_then_cancels_first,
rsc->cluster);
pcmk__new_ordering(NULL, NULL, reload, rsc, demote_key(rsc), NULL,
pcmk__ar_ordered|pe_order_then_cancels_first,
rsc->cluster);
}
/*!
* \internal
* \brief Handle any configuration change for an action
*
* Given an action from resource history, if the resource's configuration
* changed since the action was done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, etc.).
*
* \param[in,out] rsc Resource that action is for
* \param[in,out] node Node that action was on
* \param[in] xml_op Action XML from resource history
*
* \return true if action configuration changed, otherwise false
*/
bool
pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node,
const xmlNode *xml_op)
{
guint interval_ms = 0;
const char *task = NULL;
const op_digest_cache_t *digest_data = NULL;
CRM_CHECK((rsc != NULL) && (node != NULL) && (xml_op != NULL),
return false);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
CRM_CHECK(task != NULL, return false);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
// If this is a recurring action, check whether it has been orphaned
if (interval_ms > 0) {
if (action_in_config(rsc, task, interval_ms)) {
pe_rsc_trace(rsc, "%s-interval %s for %s on %s is in configuration",
pcmk__readable_interval(interval_ms), task, rsc->id,
pe__node_name(node));
} else if (pcmk_is_set(rsc->cluster->flags,
pcmk_sched_cancel_removed_actions)) {
pcmk__schedule_cancel(rsc,
crm_element_value(xml_op,
XML_LRM_ATTR_CALLID),
task, interval_ms, node, "orphan");
return true;
} else {
pe_rsc_debug(rsc, "%s-interval %s for %s on %s is orphaned",
pcmk__readable_interval(interval_ms), task, rsc->id,
pe__node_name(node));
return true;
}
}
crm_trace("Checking %s-interval %s for %s on %s for configuration changes",
pcmk__readable_interval(interval_ms), task, rsc->id,
pe__node_name(node));
task = task_for_digest(task, interval_ms);
digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->cluster);
if (only_sanitized_changed(xml_op, digest_data, rsc->cluster)) {
if (!pcmk__is_daemon && (rsc->cluster->priv != NULL)) {
pcmk__output_t *out = rsc->cluster->priv;
out->info(out,
"Only 'private' parameters to %s-interval %s for %s "
"on %s changed: %s",
pcmk__readable_interval(interval_ms), task, rsc->id,
pe__node_name(node),
crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
}
return false;
}
switch (digest_data->rc) {
case RSC_DIGEST_RESTART:
crm_log_xml_debug(digest_data->params_restart, "params:restart");
force_restart(rsc, task, interval_ms, node);
return true;
case RSC_DIGEST_ALL:
case RSC_DIGEST_UNKNOWN:
// Changes that can potentially be handled by an agent reload
if (interval_ms > 0) {
/* Recurring actions aren't reloaded per se, they are just
* re-scheduled so the next run uses the new parameters.
* The old instance will be cancelled automatically.
*/
crm_log_xml_debug(digest_data->params_all, "params:reschedule");
pcmk__reschedule_recurring(rsc, task, interval_ms, node);
} else if (crm_element_value(xml_op,
XML_LRM_ATTR_RESTART_DIGEST) != NULL) {
// Agent supports reload, so use it
trigger_unfencing(rsc, node,
"Device parameters changed (reload)", NULL,
rsc->cluster);
crm_log_xml_debug(digest_data->params_all, "params:reload");
schedule_reload((gpointer) rsc, (gpointer) node);
} else {
pe_rsc_trace(rsc,
"Restarting %s "
"because agent doesn't support reload", rsc->id);
crm_log_xml_debug(digest_data->params_restart,
"params:restart");
force_restart(rsc, task, interval_ms, node);
}
return true;
default:
break;
}
return false;
}
/*!
* \internal
* \brief Create a list of resource's action history entries, sorted by call ID
*
* \param[in] rsc_entry Resource's <lrm_rsc_op> status XML
* \param[out] start_index Where to store index of start-like action, if any
* \param[out] stop_index Where to store index of stop action, if any
*/
static GList *
rsc_history_as_list(const xmlNode *rsc_entry, int *start_index, int *stop_index)
{
GList *ops = NULL;
for (xmlNode *rsc_op = first_named_child(rsc_entry, XML_LRM_TAG_RSC_OP);
rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) {
ops = g_list_prepend(ops, rsc_op);
}
ops = g_list_sort(ops, sort_op_by_callid);
calculate_active_ops(ops, start_index, stop_index);
return ops;
}
/*!
* \internal
* \brief Process a resource's action history from the CIB status
*
* Given a resource's action history, if the resource's configuration
* changed since the actions were done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, clean-up, etc.).
* (This also cancels recurring actions for maintenance mode, which is not
* entirely related but convenient to do here.)
*
* \param[in] rsc_entry Resource's <lrm_rsc_op> status XML
* \param[in,out] rsc Resource whose history is being processed
* \param[in,out] node Node whose history is being processed
*/
static void
process_rsc_history(const xmlNode *rsc_entry, pe_resource_t *rsc,
pe_node_t *node)
{
int offset = -1;
int stop_index = 0;
int start_index = 0;
GList *sorted_op_list = NULL;
if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
if (pe_rsc_is_anon_clone(pe__const_top_resource(rsc, false))) {
pe_rsc_trace(rsc,
"Skipping configuration check "
"for orphaned clone instance %s",
rsc->id);
} else {
pe_rsc_trace(rsc,
"Skipping configuration check and scheduling clean-up "
"for orphaned resource %s", rsc->id);
pcmk__schedule_cleanup(rsc, node, false);
}
return;
}
if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, false)) {
pcmk__schedule_cleanup(rsc, node, false);
}
pe_rsc_trace(rsc,
"Skipping configuration check for %s "
"because no longer active on %s",
rsc->id, pe__node_name(node));
return;
}
pe_rsc_trace(rsc, "Checking for configuration changes for %s on %s",
rsc->id, pe__node_name(node));
if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, true)) {
pcmk__schedule_cleanup(rsc, node, false);
}
sorted_op_list = rsc_history_as_list(rsc_entry, &start_index, &stop_index);
if (start_index < stop_index) {
return; // Resource is stopped
}
for (GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
xmlNode *rsc_op = (xmlNode *) iter->data;
const char *task = NULL;
guint interval_ms = 0;
if (++offset < start_index) {
// Skip actions that happened before a start
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if ((interval_ms > 0)
&& (pcmk_is_set(rsc->flags, pcmk_rsc_maintenance)
|| node->details->maintenance)) {
// Maintenance mode cancels recurring operations
pcmk__schedule_cancel(rsc,
crm_element_value(rsc_op,
XML_LRM_ATTR_CALLID),
task, interval_ms, node, "maintenance mode");
} else if ((interval_ms > 0)
|| pcmk__strcase_any_of(task, PCMK_ACTION_MONITOR,
PCMK_ACTION_START,
PCMK_ACTION_PROMOTE,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
/* If a resource operation failed, and the operation's definition
* has changed, clear any fail count so they can be retried fresh.
*/
if (pe__bundle_needs_remote_name(rsc)) {
/* We haven't assigned resources to nodes yet, so if the
* REMOTE_CONTAINER_HACK is used, we may calculate the digest
* based on the literal "#uname" value rather than the properly
* substituted value. That would mistakenly make the action
* definition appear to have been changed. Defer the check until
* later in this case.
*/
pe__add_param_check(rsc_op, rsc, node, pcmk__check_active,
rsc->cluster);
} else if (pcmk__check_action_config(rsc, node, rsc_op)
&& (pe_get_failcount(node, rsc, NULL, pe_fc_effective,
NULL) != 0)) {
pe__clear_failcount(rsc, node, "action definition changed",
rsc->cluster);
}
}
}
g_list_free(sorted_op_list);
}
/*!
* \internal
* \brief Process a node's action history from the CIB status
*
* Given a node's resource history, if the resource's configuration changed
* since the actions were done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, clean-up, etc.).
* (This also cancels recurring actions for maintenance mode, which is not
* entirely related but convenient to do here.)
*
* \param[in,out] node Node whose history is being processed
* \param[in] lrm_rscs Node's <lrm_resources> from CIB status XML
*/
static void
process_node_history(pe_node_t *node, const xmlNode *lrm_rscs)
{
crm_trace("Processing node history for %s", pe__node_name(node));
for (const xmlNode *rsc_entry = first_named_child(lrm_rscs,
XML_LRM_TAG_RESOURCE);
rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) {
if (rsc_entry->children != NULL) {
GList *result = pcmk__rscs_matching_id(ID(rsc_entry),
node->details->data_set);
for (GList *iter = result; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
if (rsc->variant == pcmk_rsc_variant_primitive) {
process_rsc_history(rsc_entry, rsc, node);
}
}
g_list_free(result);
}
}
}
// XPath to find a node's resource history
#define XPATH_NODE_HISTORY "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
"/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
"/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES
/*!
* \internal
* \brief Process any resource configuration changes in the CIB status
*
* Go through all nodes' resource history, and if a resource's configuration
* changed since its actions were done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, clean-up, etc.).
* (This also cancels recurring actions for maintenance mode, which is not
* entirely related but convenient to do here.)
*
* \param[in,out] data_set Cluster working set
*/
void
pcmk__handle_rsc_config_changes(pe_working_set_t *data_set)
{
crm_trace("Check resource and action configuration for changes");
/* Rather than iterate through the status section, iterate through the nodes
* and search for the appropriate status subsection for each. This skips
* orphaned nodes and lets us eliminate some cases before searching the XML.
*/
for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
/* Don't bother checking actions for a node that can't run actions ...
* unless it's in maintenance mode, in which case we still need to
* cancel any existing recurring monitors.
*/
if (node->details->maintenance
|| pcmk__node_available(node, false, false)) {
char *xpath = NULL;
xmlNode *history = NULL;
xpath = crm_strdup_printf(XPATH_NODE_HISTORY, node->details->uname);
history = get_xpath_object(xpath, data_set->input, LOG_NEVER);
free(xpath);
process_node_history(node, history);
}
}
}
diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c
index c3a9a3313f..7a62a7c3ef 100644
--- a/lib/pacemaker/pcmk_sched_bundle.c
+++ b/lib/pacemaker/pcmk_sched_bundle.c
@@ -1,1055 +1,1055 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
struct assign_data {
const pe_node_t *prefer;
bool stop_if_fail;
};
/*!
* \internal
* \brief Assign a single bundle replica's resources (other than container)
*
* \param[in,out] replica Replica to assign
* \param[in] user_data Preferred node, if any
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
assign_replica(pe__bundle_replica_t *replica, void *user_data)
{
pe_node_t *container_host = NULL;
struct assign_data *assign_data = user_data;
const pe_node_t *prefer = assign_data->prefer;
bool stop_if_fail = assign_data->stop_if_fail;
const pe_resource_t *bundle = pe__const_top_resource(replica->container,
true);
if (replica->ip != NULL) {
pe_rsc_trace(bundle, "Assigning bundle %s IP %s",
bundle->id, replica->ip->id);
replica->ip->cmds->assign(replica->ip, prefer, stop_if_fail);
}
container_host = replica->container->allocated_to;
if (replica->remote != NULL) {
if (pe__is_guest_or_remote_node(container_host)) {
/* REMOTE_CONTAINER_HACK: "Nested" connection resources must be on
* the same host because Pacemaker Remote only supports a single
* active connection.
*/
pcmk__new_colocation("#replica-remote-with-host-remote", NULL,
INFINITY, replica->remote,
container_host->details->remote_rsc, NULL,
NULL, pcmk__coloc_influence);
}
pe_rsc_trace(bundle, "Assigning bundle %s connection %s",
bundle->id, replica->remote->id);
replica->remote->cmds->assign(replica->remote, prefer, stop_if_fail);
}
if (replica->child != NULL) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, replica->child->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (!pe__same_node(node, replica->node)) {
node->weight = -INFINITY;
} else if (!pcmk__threshold_reached(replica->child, node, NULL)) {
node->weight = INFINITY;
}
}
pe__set_resource_flags(replica->child->parent, pcmk_rsc_assigning);
pe_rsc_trace(bundle, "Assigning bundle %s replica child %s",
bundle->id, replica->child->id);
replica->child->cmds->assign(replica->child, replica->node,
stop_if_fail);
pe__clear_resource_flags(replica->child->parent, pcmk_rsc_assigning);
}
return true;
}
/*!
* \internal
* \brief Assign a bundle resource to a node
*
* \param[in,out] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
* \param[in] stop_if_fail If \c true and a primitive descendant of \p rsc
* can't be assigned to a node, set the
* descendant's next role to stopped and update
* existing actions
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
pe_node_t *
pcmk__bundle_assign(pe_resource_t *rsc, const pe_node_t *prefer,
bool stop_if_fail)
{
GList *containers = NULL;
pe_resource_t *bundled_resource = NULL;
struct assign_data assign_data = { prefer, stop_if_fail };
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe_rsc_trace(rsc, "Assigning bundle %s", rsc->id);
pe__set_resource_flags(rsc, pcmk_rsc_assigning);
pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags,
pcmk_sched_output_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
// Assign all containers first, so we know what nodes the bundle will be on
containers = g_list_sort(pe__bundle_containers(rsc), pcmk__cmp_instance);
pcmk__assign_instances(rsc, containers, pe__bundle_max(rsc),
rsc->fns->max_per_node(rsc));
g_list_free(containers);
// Then assign remaining replica resources
pe__foreach_bundle_replica(rsc, assign_replica, (void *) &assign_data);
// Finally, assign the bundled resources to each bundle node
bundled_resource = pe__bundled_resource(rsc);
if (bundled_resource != NULL) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, bundled_resource->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
if (pe__node_is_bundle_instance(rsc, node)) {
node->weight = 0;
} else {
node->weight = -INFINITY;
}
}
bundled_resource->cmds->assign(bundled_resource, prefer, stop_if_fail);
}
pe__clear_resource_flags(rsc, pcmk_rsc_assigning|pcmk_rsc_unassigned);
return NULL;
}
/*!
* \internal
* \brief Create actions for a bundle replica's resources (other than child)
*
* \param[in,out] replica Replica to create actions for
* \param[in] user_data Unused
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
create_replica_actions(pe__bundle_replica_t *replica, void *user_data)
{
if (replica->ip != NULL) {
replica->ip->cmds->create_actions(replica->ip);
}
if (replica->container != NULL) {
replica->container->cmds->create_actions(replica->container);
}
if (replica->remote != NULL) {
replica->remote->cmds->create_actions(replica->remote);
}
return true;
}
/*!
* \internal
* \brief Create all actions needed for a given bundle resource
*
* \param[in,out] rsc Bundle resource to create actions for
*/
void
pcmk__bundle_create_actions(pe_resource_t *rsc)
{
pe_action_t *action = NULL;
GList *containers = NULL;
pe_resource_t *bundled_resource = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe__foreach_bundle_replica(rsc, create_replica_actions, NULL);
containers = pe__bundle_containers(rsc);
pcmk__create_instance_actions(rsc, containers);
g_list_free(containers);
bundled_resource = pe__bundled_resource(rsc);
if (bundled_resource != NULL) {
bundled_resource->cmds->create_actions(bundled_resource);
if (pcmk_is_set(bundled_resource->flags, pcmk_rsc_promotable)) {
pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_PROMOTE, true, true);
action = pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_PROMOTED,
true, true);
action->priority = INFINITY;
pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_DEMOTE, true, true);
action = pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_DEMOTED,
true, true);
action->priority = INFINITY;
}
}
}
/*!
* \internal
* \brief Create internal constraints for a bundle replica's resources
*
* \param[in,out] replica Replica to create internal constraints for
* \param[in,out] user_data Replica's parent bundle
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
replica_internal_constraints(pe__bundle_replica_t *replica, void *user_data)
{
pe_resource_t *bundle = user_data;
replica->container->cmds->internal_constraints(replica->container);
// Start bundle -> start replica container
pcmk__order_starts(bundle, replica->container,
pcmk__ar_unrunnable_first_blocks
|pe_order_implies_first_printed);
// Stop bundle -> stop replica child and container
if (replica->child != NULL) {
pcmk__order_stops(bundle, replica->child,
pe_order_implies_first_printed);
}
pcmk__order_stops(bundle, replica->container,
pe_order_implies_first_printed);
// Start replica container -> bundle is started
pcmk__order_resource_actions(replica->container, PCMK_ACTION_START, bundle,
PCMK_ACTION_RUNNING,
pe_order_implies_then_printed);
// Stop replica container -> bundle is stopped
pcmk__order_resource_actions(replica->container, PCMK_ACTION_STOP, bundle,
PCMK_ACTION_STOPPED,
pe_order_implies_then_printed);
if (replica->ip != NULL) {
replica->ip->cmds->internal_constraints(replica->ip);
// Replica IP address -> replica container (symmetric)
pcmk__order_starts(replica->ip, replica->container,
pcmk__ar_unrunnable_first_blocks|pe_order_preserve);
pcmk__order_stops(replica->container, replica->ip,
pcmk__ar_then_implies_first|pe_order_preserve);
pcmk__new_colocation("#ip-with-container", NULL, INFINITY, replica->ip,
replica->container, NULL, NULL,
pcmk__coloc_influence);
}
if (replica->remote != NULL) {
/* This handles ordering and colocating remote relative to container
* (via "#resource-with-container"). Since IP is also ordered and
* colocated relative to the container, we don't need to do anything
* explicit here with IP.
*/
replica->remote->cmds->internal_constraints(replica->remote);
}
if (replica->child != NULL) {
CRM_ASSERT(replica->remote != NULL);
// "Start remote then child" is implicit in scheduler's remote logic
}
return true;
}
/*!
* \internal
* \brief Create implicit constraints needed for a bundle resource
*
* \param[in,out] rsc Bundle resource to create implicit constraints for
*/
void
pcmk__bundle_internal_constraints(pe_resource_t *rsc)
{
pe_resource_t *bundled_resource = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe__foreach_bundle_replica(rsc, replica_internal_constraints, rsc);
bundled_resource = pe__bundled_resource(rsc);
if (bundled_resource == NULL) {
return;
}
// Start bundle -> start bundled clone
pcmk__order_resource_actions(rsc, PCMK_ACTION_START, bundled_resource,
PCMK_ACTION_START,
pe_order_implies_first_printed);
// Bundled clone is started -> bundle is started
pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_RUNNING,
rsc, PCMK_ACTION_RUNNING,
pe_order_implies_then_printed);
// Stop bundle -> stop bundled clone
pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP, bundled_resource,
PCMK_ACTION_STOP,
pe_order_implies_first_printed);
// Bundled clone is stopped -> bundle is stopped
pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_STOPPED,
rsc, PCMK_ACTION_STOPPED,
pe_order_implies_then_printed);
bundled_resource->cmds->internal_constraints(bundled_resource);
if (!pcmk_is_set(bundled_resource->flags, pcmk_rsc_promotable)) {
return;
}
pcmk__promotable_restart_ordering(rsc);
// Demote bundle -> demote bundled clone
pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTE, bundled_resource,
PCMK_ACTION_DEMOTE,
pe_order_implies_first_printed);
// Bundled clone is demoted -> bundle is demoted
pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_DEMOTED,
rsc, PCMK_ACTION_DEMOTED,
pe_order_implies_then_printed);
// Promote bundle -> promote bundled clone
pcmk__order_resource_actions(rsc, PCMK_ACTION_PROMOTE,
bundled_resource, PCMK_ACTION_PROMOTE,
pe_order_implies_first_printed);
// Bundled clone is promoted -> bundle is promoted
pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_PROMOTED,
rsc, PCMK_ACTION_PROMOTED,
pe_order_implies_then_printed);
}
struct match_data {
const pe_node_t *node; // Node to compare against replica
pe_resource_t *container; // Replica container corresponding to node
};
/*!
* \internal
* \brief Check whether a replica container is assigned to a given node
*
* \param[in] replica Replica to check
* \param[in,out] user_data struct match_data with node to compare against
*
* \return true if the replica does not match (to indicate further replicas
* should be processed), otherwise false
*/
static bool
match_replica_container(const pe__bundle_replica_t *replica, void *user_data)
{
struct match_data *match_data = user_data;
if (pcmk__instance_matches(replica->container, match_data->node,
pcmk_role_unknown, false)) {
match_data->container = replica->container;
return false; // Match found, don't bother searching further replicas
}
return true; // No match, keep searching
}
/*!
* \internal
* \brief Get the host to which a bundle node is assigned
*
* \param[in] node Possible bundle node to check
*
* \return Node to which the container for \p node is assigned if \p node is a
* bundle node, otherwise \p node itself
*/
static const pe_node_t *
get_bundle_node_host(const pe_node_t *node)
{
if (pe__is_bundle_node(node)) {
const pe_resource_t *container = node->details->remote_rsc->container;
return container->fns->location(container, NULL, 0);
}
return node;
}
/*!
* \internal
* \brief Find a bundle container compatible with a dependent resource
*
* \param[in] dependent Dependent resource in colocation with bundle
* \param[in] bundle Bundle that \p dependent is colocated with
*
* \return A container from \p bundle assigned to the same node as \p dependent
* if assigned, otherwise assigned to any of dependent's allowed nodes,
* otherwise NULL.
*/
static pe_resource_t *
compatible_container(const pe_resource_t *dependent,
const pe_resource_t *bundle)
{
GList *scratch = NULL;
struct match_data match_data = { NULL, NULL };
// If dependent is assigned, only check there
match_data.node = dependent->fns->location(dependent, NULL, 0);
match_data.node = get_bundle_node_host(match_data.node);
if (match_data.node != NULL) {
pe__foreach_const_bundle_replica(bundle, match_replica_container,
&match_data);
return match_data.container;
}
// Otherwise, check for any of the dependent's allowed nodes
scratch = g_hash_table_get_values(dependent->allowed_nodes);
scratch = pcmk__sort_nodes(scratch, NULL);
for (const GList *iter = scratch; iter != NULL; iter = iter->next) {
match_data.node = iter->data;
match_data.node = get_bundle_node_host(match_data.node);
if (match_data.node == NULL) {
continue;
}
pe__foreach_const_bundle_replica(bundle, match_replica_container,
&match_data);
if (match_data.container != NULL) {
break;
}
}
g_list_free(scratch);
return match_data.container;
}
struct coloc_data {
const pcmk__colocation_t *colocation;
pe_resource_t *dependent;
GList *container_hosts;
};
/*!
* \internal
* \brief Apply a colocation score to replica node scores or resource priority
*
* \param[in] replica Replica of primary bundle resource in colocation
* \param[in,out] user_data struct coloc_data for colocation being applied
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
replica_apply_coloc_score(const pe__bundle_replica_t *replica, void *user_data)
{
struct coloc_data *coloc_data = user_data;
pe_node_t *chosen = NULL;
if (coloc_data->colocation->score < INFINITY) {
replica->container->cmds->apply_coloc_score(coloc_data->dependent,
replica->container,
coloc_data->colocation,
false);
return true;
}
chosen = replica->container->fns->location(replica->container, NULL, 0);
if ((chosen == NULL)
|| is_set_recursive(replica->container, pcmk_rsc_blocked, true)) {
return true;
}
if ((coloc_data->colocation->primary_role >= pcmk_role_promoted)
&& ((replica->child == NULL)
|| (replica->child->next_role < pcmk_role_promoted))) {
return true;
}
pe_rsc_trace(pe__const_top_resource(replica->container, true),
"Allowing mandatory colocation %s using %s @%d",
coloc_data->colocation->id, pe__node_name(chosen),
chosen->weight);
coloc_data->container_hosts = g_list_prepend(coloc_data->container_hosts,
chosen);
return true;
}
/*!
* \internal
* \brief Apply a colocation's score to node scores or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node scores (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__bundle_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent)
{
struct coloc_data coloc_data = { colocation, dependent, NULL };
/* This should never be called for the bundle itself as a dependent.
* Instead, we add its colocation constraints to its containers and bundled
* primitive and call the apply_coloc_score() method for them as dependents.
*/
CRM_ASSERT((primary != NULL)
&& (primary->variant == pcmk_rsc_variant_bundle)
&& (dependent != NULL)
&& (dependent->variant == pcmk_rsc_variant_primitive)
&& (colocation != NULL) && !for_dependent);
if (pcmk_is_set(primary->flags, pcmk_rsc_unassigned)) {
pe_rsc_trace(primary,
"Skipping applying colocation %s "
"because %s is still provisional",
colocation->id, primary->id);
return;
}
pe_rsc_trace(primary, "Applying colocation %s (%s with %s at %s)",
colocation->id, dependent->id, primary->id,
pcmk_readable_score(colocation->score));
/* If the constraint dependent is a clone or bundle, "dependent" here is one
* of its instances. Look for a compatible instance of this bundle.
*/
if (colocation->dependent->variant > pcmk_rsc_variant_group) {
const pe_resource_t *primary_container = compatible_container(dependent,
primary);
if (primary_container != NULL) { // Success, we found one
pe_rsc_debug(primary, "Pairing %s with %s",
dependent->id, primary_container->id);
dependent->cmds->apply_coloc_score(dependent, primary_container,
colocation, true);
} else if (colocation->score >= INFINITY) { // Failure, and it's fatal
crm_notice("%s cannot run because there is no compatible "
"instance of %s to colocate with",
dependent->id, primary->id);
pcmk__assign_resource(dependent, NULL, true, true);
} else { // Failure, but we can ignore it
pe_rsc_debug(primary,
"%s cannot be colocated with any instance of %s",
dependent->id, primary->id);
}
return;
}
pe__foreach_const_bundle_replica(primary, replica_apply_coloc_score,
&coloc_data);
if (colocation->score >= INFINITY) {
pcmk__colocation_intersect_nodes(dependent, primary, colocation,
coloc_data.container_hosts, false);
}
g_list_free(coloc_data.container_hosts);
}
// Bundle implementation of resource_alloc_functions_t:with_this_colocations()
void
pcmk__with_bundle_colocations(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList **list)
{
const pe_resource_t *bundled_rsc = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle)
&& (orig_rsc != NULL) && (list != NULL));
// The bundle itself and its containers always get its colocations
if ((orig_rsc == rsc)
|| pcmk_is_set(orig_rsc->flags, pcmk_rsc_replica_container)) {
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
return;
}
/* The bundled resource gets the colocations if it's promotable and we've
* begun choosing roles
*/
bundled_rsc = pe__bundled_resource(rsc);
if ((bundled_rsc == NULL)
|| !pcmk_is_set(bundled_rsc->flags, pcmk_rsc_promotable)
|| (pe__const_top_resource(orig_rsc, false) != bundled_rsc)) {
return;
}
if (orig_rsc == bundled_rsc) {
if (pe__clone_flag_is_set(orig_rsc, pe__clone_promotion_constrained)) {
/* orig_rsc is the clone and we're setting roles (or have already
* done so)
*/
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
}
} else if (!pcmk_is_set(orig_rsc->flags, pcmk_rsc_unassigned)) {
/* orig_rsc is an instance and is already assigned. If something
* requests colocations for orig_rsc now, it's for setting roles.
*/
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
}
}
// Bundle implementation of resource_alloc_functions_t:this_with_colocations()
void
pcmk__bundle_with_colocations(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList **list)
{
const pe_resource_t *bundled_rsc = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle)
&& (orig_rsc != NULL) && (list != NULL));
// The bundle itself and its containers always get its colocations
if ((orig_rsc == rsc)
|| pcmk_is_set(orig_rsc->flags, pcmk_rsc_replica_container)) {
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
return;
}
/* The bundled resource gets the colocations if it's promotable and we've
* begun choosing roles
*/
bundled_rsc = pe__bundled_resource(rsc);
if ((bundled_rsc == NULL)
|| !pcmk_is_set(bundled_rsc->flags, pcmk_rsc_promotable)
|| (pe__const_top_resource(orig_rsc, false) != bundled_rsc)) {
return;
}
if (orig_rsc == bundled_rsc) {
if (pe__clone_flag_is_set(orig_rsc, pe__clone_promotion_constrained)) {
/* orig_rsc is the clone and we're setting roles (or have already
* done so)
*/
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
}
} else if (!pcmk_is_set(orig_rsc->flags, pcmk_rsc_unassigned)) {
/* orig_rsc is an instance and is already assigned. If something
* requests colocations for orig_rsc now, it's for setting roles.
*/
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
}
}
/*!
* \internal
* \brief Return action flags for a given bundle resource action
*
* \param[in,out] action Bundle resource action to get flags for
* \param[in] node If not NULL, limit effects to this node
*
* \return Flags appropriate to \p action on \p node
*/
uint32_t
pcmk__bundle_action_flags(pe_action_t *action, const pe_node_t *node)
{
GList *containers = NULL;
uint32_t flags = 0;
pe_resource_t *bundled_resource = NULL;
CRM_ASSERT((action != NULL) && (action->rsc != NULL)
&& (action->rsc->variant == pcmk_rsc_variant_bundle));
bundled_resource = pe__bundled_resource(action->rsc);
if (bundled_resource != NULL) {
// Clone actions are done on the bundled clone resource, not container
switch (get_complex_task(bundled_resource, action->task)) {
case pcmk_action_unspecified:
case pcmk_action_notify:
case pcmk_action_notified:
case pcmk_action_promote:
case pcmk_action_promoted:
case pcmk_action_demote:
case pcmk_action_demoted:
return pcmk__collective_action_flags(action,
bundled_resource->children,
node);
default:
break;
}
}
containers = pe__bundle_containers(action->rsc);
flags = pcmk__collective_action_flags(action, containers, node);
g_list_free(containers);
return flags;
}
/*!
* \internal
* \brief Apply a location constraint to a bundle replica
*
* \param[in,out] replica Replica to apply constraint to
* \param[in,out] user_data Location constraint to apply
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
apply_location_to_replica(pe__bundle_replica_t *replica, void *user_data)
{
pe__location_t *location = user_data;
if (replica->container != NULL) {
replica->container->cmds->apply_location(replica->container, location);
}
if (replica->ip != NULL) {
replica->ip->cmds->apply_location(replica->ip, location);
}
return true;
}
/*!
* \internal
* \brief Apply a location constraint to a bundle resource's allowed node scores
*
* \param[in,out] rsc Bundle resource to apply constraint to
* \param[in,out] location Location constraint to apply
*/
void
pcmk__bundle_apply_location(pe_resource_t *rsc, pe__location_t *location)
{
pe_resource_t *bundled_resource = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle)
&& (location != NULL));
pcmk__apply_location(rsc, location);
pe__foreach_bundle_replica(rsc, apply_location_to_replica, location);
bundled_resource = pe__bundled_resource(rsc);
if ((bundled_resource != NULL)
&& ((location->role_filter == pcmk_role_unpromoted)
|| (location->role_filter == pcmk_role_promoted))) {
bundled_resource->cmds->apply_location(bundled_resource, location);
bundled_resource->rsc_location = g_list_prepend(
bundled_resource->rsc_location, location);
}
}
#define XPATH_REMOTE "//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']"
/*!
* \internal
* \brief Add a bundle replica's actions to transition graph
*
* \param[in,out] replica Replica to add to graph
* \param[in] user_data Bundle that replica belongs to (for logging only)
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
add_replica_actions_to_graph(pe__bundle_replica_t *replica, void *user_data)
{
if ((replica->remote != NULL) && (replica->container != NULL)
&& pe__bundle_needs_remote_name(replica->remote)) {
/* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that
* run pacemaker-remoted inside, without needing a separate IP for
* the container. This is done by configuring the inner remote's
* connection host as the magic string "#uname", then
* replacing it with the underlying host when needed.
*/
xmlNode *nvpair = get_xpath_object(XPATH_REMOTE, replica->remote->xml,
LOG_ERR);
const char *calculated_addr = NULL;
// Replace the value in replica->remote->xml (if appropriate)
calculated_addr = pe__add_bundle_remote_name(replica->remote,
replica->remote->cluster,
nvpair, "value");
if (calculated_addr != NULL) {
/* Since this is for the bundle as a resource, and not any
* particular action, replace the value in the default
* parameters (not evaluated for node). create_graph_action()
* will grab it from there to replace it in node-evaluated
* parameters.
*/
GHashTable *params = pe_rsc_params(replica->remote,
NULL, replica->remote->cluster);
g_hash_table_replace(params,
strdup(XML_RSC_ATTR_REMOTE_RA_ADDR),
strdup(calculated_addr));
} else {
pe_resource_t *bundle = user_data;
/* The only way to get here is if the remote connection is
* neither currently running nor scheduled to run. That means we
* won't be doing any operations that require addr (only start
* requires it; we additionally use it to compare digests when
* unpacking status, promote, and migrate_from history, but
* that's already happened by this point).
*/
pe_rsc_info(bundle,
"Unable to determine address for bundle %s "
"remote connection", bundle->id);
}
}
if (replica->ip != NULL) {
replica->ip->cmds->add_actions_to_graph(replica->ip);
}
if (replica->container != NULL) {
replica->container->cmds->add_actions_to_graph(replica->container);
}
if (replica->remote != NULL) {
replica->remote->cmds->add_actions_to_graph(replica->remote);
}
return true;
}
/*!
* \internal
* \brief Add a bundle resource's actions to the transition graph
*
* \param[in,out] rsc Bundle resource whose actions should be added
*/
void
pcmk__bundle_add_actions_to_graph(pe_resource_t *rsc)
{
pe_resource_t *bundled_resource = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
bundled_resource = pe__bundled_resource(rsc);
if (bundled_resource != NULL) {
bundled_resource->cmds->add_actions_to_graph(bundled_resource);
}
pe__foreach_bundle_replica(rsc, add_replica_actions_to_graph, rsc);
}
struct probe_data {
pe_resource_t *bundle; // Bundle being probed
pe_node_t *node; // Node to create probes on
bool any_created; // Whether any probes have been created
};
/*!
* \internal
* \brief Order a bundle replica's start after another replica's probe
*
* \param[in,out] replica Replica to order start for
* \param[in,out] user_data Replica with probe to order after
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
order_replica_start_after(pe__bundle_replica_t *replica, void *user_data)
{
pe__bundle_replica_t *probed_replica = user_data;
if ((replica == probed_replica) || (replica->container == NULL)) {
return true;
}
pcmk__new_ordering(probed_replica->container,
pcmk__op_key(probed_replica->container->id,
PCMK_ACTION_MONITOR, 0),
NULL, replica->container,
pcmk__op_key(replica->container->id, PCMK_ACTION_START,
0),
- NULL, pcmk__ar_ordered|pe_order_same_node,
+ NULL, pcmk__ar_ordered|pcmk__ar_if_on_same_node,
replica->container->cluster);
return true;
}
/*!
* \internal
* \brief Create probes for a bundle replica's resources
*
* \param[in,out] replica Replica to create probes for
* \param[in,out] user_data struct probe_data
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
create_replica_probes(pe__bundle_replica_t *replica, void *user_data)
{
struct probe_data *probe_data = user_data;
if ((replica->ip != NULL)
&& replica->ip->cmds->create_probe(replica->ip, probe_data->node)) {
probe_data->any_created = true;
}
if ((replica->child != NULL)
&& pe__same_node(probe_data->node, replica->node)
&& replica->child->cmds->create_probe(replica->child,
probe_data->node)) {
probe_data->any_created = true;
}
if ((replica->container != NULL)
&& replica->container->cmds->create_probe(replica->container,
probe_data->node)) {
probe_data->any_created = true;
/* If we're limited to one replica per host (due to
* the lack of an IP range probably), then we don't
* want any of our peer containers starting until
* we've established that no other copies are already
* running.
*
* Partly this is to ensure that the maximum replicas per host is
* observed, but also to ensure that the containers
* don't fail to start because the necessary port
* mappings (which won't include an IP for uniqueness)
* are already taken
*/
if (probe_data->bundle->fns->max_per_node(probe_data->bundle) == 1) {
pe__foreach_bundle_replica(probe_data->bundle,
order_replica_start_after, replica);
}
}
if ((replica->container != NULL) && (replica->remote != NULL)
&& replica->remote->cmds->create_probe(replica->remote,
probe_data->node)) {
/* Do not probe the remote resource until we know where the container is
* running. This is required for REMOTE_CONTAINER_HACK to correctly
* probe remote resources.
*/
char *probe_uuid = pcmk__op_key(replica->remote->id,
PCMK_ACTION_MONITOR, 0);
pe_action_t *probe = find_first_action(replica->remote->actions,
probe_uuid, NULL,
probe_data->node);
free(probe_uuid);
if (probe != NULL) {
probe_data->any_created = true;
pe_rsc_trace(probe_data->bundle, "Ordering %s probe on %s",
replica->remote->id, pe__node_name(probe_data->node));
pcmk__new_ordering(replica->container,
pcmk__op_key(replica->container->id,
PCMK_ACTION_START, 0),
NULL, replica->remote, NULL, probe,
pcmk__ar_nested_remote_probe,
probe_data->bundle->cluster);
}
}
return true;
}
/*!
* \internal
*
* \brief Schedule any probes needed for a bundle resource on a node
*
* \param[in,out] rsc Bundle resource to create probes for
* \param[in,out] node Node to create probe on
*
* \return true if any probe was created, otherwise false
*/
bool
pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node)
{
struct probe_data probe_data = { rsc, node, false };
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe__foreach_bundle_replica(rsc, create_replica_probes, &probe_data);
return probe_data.any_created;
}
/*!
* \internal
* \brief Output actions for one bundle replica
*
* \param[in,out] replica Replica to output actions for
* \param[in] user_data Unused
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
output_replica_actions(pe__bundle_replica_t *replica, void *user_data)
{
if (replica->ip != NULL) {
replica->ip->cmds->output_actions(replica->ip);
}
if (replica->container != NULL) {
replica->container->cmds->output_actions(replica->container);
}
if (replica->remote != NULL) {
replica->remote->cmds->output_actions(replica->remote);
}
if (replica->child != NULL) {
replica->child->cmds->output_actions(replica->child);
}
return true;
}
/*!
* \internal
* \brief Output a summary of scheduled actions for a bundle resource
*
* \param[in,out] rsc Bundle resource to output actions for
*/
void
pcmk__output_bundle_actions(pe_resource_t *rsc)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe__foreach_bundle_replica(rsc, output_replica_actions, NULL);
}
// Bundle implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__bundle_add_utilization(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization)
{
pe_resource_t *container = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
return;
}
/* All bundle replicas are identical, so using the utilization of the first
* is sufficient for any. Only the implicit container resource can have
* utilization values.
*/
container = pe__first_container(rsc);
if (container != NULL) {
container->cmds->add_utilization(container, orig_rsc, all_rscs,
utilization);
}
}
// Bundle implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__bundle_shutdown_lock(pe_resource_t *rsc)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
// Bundles currently don't support shutdown locks
}
diff --git a/lib/pacemaker/pcmk_sched_fencing.c b/lib/pacemaker/pcmk_sched_fencing.c
index 9901ac7eef..50bf9b75af 100644
--- a/lib/pacemaker/pcmk_sched_fencing.c
+++ b/lib/pacemaker/pcmk_sched_fencing.c
@@ -1,499 +1,500 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <crm/crm.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Check whether a resource is known on a particular node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return TRUE if resource (or parent if an anonymous clone) is known
*/
static bool
rsc_is_known_on(const pe_resource_t *rsc, const pe_node_t *node)
{
if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
return TRUE;
} else if ((rsc->variant == pcmk_rsc_variant_primitive)
&& pe_rsc_is_anon_clone(rsc->parent)
&& (g_hash_table_lookup(rsc->parent->known_on,
node->details->id) != NULL)) {
/* We check only the parent, not the uber-parent, because we cannot
* assume that the resource is known if it is in an anonymously cloned
* group (which may be only partially known).
*/
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Order a resource's start and promote actions relative to fencing
*
* \param[in,out] rsc Resource to be ordered
* \param[in,out] stonith_op Fence action
*/
static void
order_start_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op)
{
pe_node_t *target;
CRM_CHECK(stonith_op && stonith_op->node, return);
target = stonith_op->node;
for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
pe_action_t *action = iter->data;
switch (action->needs) {
case pcmk_requires_nothing:
// Anything other than start or promote requires nothing
break;
case pcmk_requires_fencing:
order_actions(stonith_op, action, pcmk__ar_ordered);
break;
case pcmk_requires_quorum:
if (pcmk__str_eq(action->task, PCMK_ACTION_START,
pcmk__str_none)
&& (g_hash_table_lookup(rsc->allowed_nodes,
target->details->id) != NULL)
&& !rsc_is_known_on(rsc, target)) {
/* If we don't know the status of the resource on the node
* we're about to shoot, we have to assume it may be active
* there. Order the resource start after the fencing. This
* is analogous to waiting for all the probes for a resource
* to complete before starting it.
*
* The most likely explanation is that the DC died and took
* its status with it.
*/
pe_rsc_debug(rsc, "Ordering %s after %s recovery",
action->uuid, pe__node_name(target));
order_actions(stonith_op, action,
pcmk__ar_ordered
|pcmk__ar_unrunnable_first_blocks);
}
break;
}
}
}
/*!
* \internal
* \brief Order a resource's stop and demote actions relative to fencing
*
* \param[in,out] rsc Resource to be ordered
* \param[in,out] stonith_op Fence action
*/
static void
order_stop_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op)
{
GList *iter = NULL;
GList *action_list = NULL;
bool order_implicit = false;
pe_resource_t *top = uber_parent(rsc);
pe_action_t *parent_stop = NULL;
pe_node_t *target;
CRM_CHECK(stonith_op && stonith_op->node, return);
target = stonith_op->node;
/* Get a list of stop actions potentially implied by the fencing */
action_list = pe__resource_actions(rsc, target, PCMK_ACTION_STOP, FALSE);
/* If resource requires fencing, implicit actions must occur after fencing.
*
* Implied stops and demotes of resources running on guest nodes are always
* ordered after fencing, even if the resource does not require fencing,
* because guest node "fencing" is actually just a resource stop.
*/
if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
|| pe__is_guest_node(target)) {
order_implicit = true;
}
if (action_list && order_implicit) {
parent_stop = find_first_action(top->actions, NULL, PCMK_ACTION_STOP,
NULL);
}
for (iter = action_list; iter != NULL; iter = iter->next) {
pe_action_t *action = iter->data;
// The stop would never complete, so convert it into a pseudo-action.
pe__set_action_flags(action, pcmk_action_pseudo|pcmk_action_runnable);
if (order_implicit) {
/* Order the stonith before the parent stop (if any).
*
* Also order the stonith before the resource stop, unless the
* resource is inside a bundle -- that would cause a graph loop.
* We can rely on the parent stop's ordering instead.
*
* User constraints must not order a resource in a guest node
* relative to the guest node container resource. The
* pe_order_preserve flag marks constraints as generated by the
* cluster and thus immune to that check (and is irrelevant if
* target is not a guest).
*/
if (!pe_rsc_is_bundled(rsc)) {
order_actions(stonith_op, action, pe_order_preserve);
}
order_actions(stonith_op, parent_stop, pe_order_preserve);
}
if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
crm_notice("Stop of failed resource %s is implicit %s %s is fenced",
rsc->id, (order_implicit? "after" : "because"),
pe__node_name(target));
} else {
crm_info("%s is implicit %s %s is fenced",
action->uuid, (order_implicit? "after" : "because"),
pe__node_name(target));
}
if (pcmk_is_set(rsc->flags, pcmk_rsc_notify)) {
pe__order_notifs_after_fencing(action, rsc, stonith_op);
}
#if 0
/* It might be a good idea to stop healthy resources on a node about to
* be fenced, when possible.
*
* However, fencing must be done before a failed resource's
* (pseudo-)stop action, so that could create a loop. For example, given
* a group of A and B running on node N with a failed stop of B:
*
* fence N -> stop B (pseudo-op) -> stop A -> fence N
*
* The block below creates the stop A -> fence N ordering and therefore
* must (at least for now) be disabled. Instead, run the block above and
* treat all resources on N as B would be (i.e., as a pseudo-op after
* the fencing).
*
* @TODO Maybe break the "A requires B" dependency in
* pcmk__update_action_for_orderings() and use this block for healthy
* resources instead of the above.
*/
crm_info("Moving healthy resource %s off %s before fencing",
rsc->id, pe__node_name(node));
pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL,
strdup(PCMK_ACTION_STONITH), stonith_op,
pcmk__ar_ordered, rsc->cluster);
#endif
}
g_list_free(action_list);
/* Get a list of demote actions potentially implied by the fencing */
action_list = pe__resource_actions(rsc, target, PCMK_ACTION_DEMOTE, FALSE);
for (iter = action_list; iter != NULL; iter = iter->next) {
pe_action_t *action = iter->data;
if (!(action->node->details->online) || action->node->details->unclean
|| pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
pe_rsc_info(rsc,
"Demote of failed resource %s is implicit "
"after %s is fenced",
rsc->id, pe__node_name(target));
} else {
pe_rsc_info(rsc, "%s is implicit after %s is fenced",
action->uuid, pe__node_name(target));
}
/* The demote would never complete and is now implied by the
* fencing, so convert it into a pseudo-action.
*/
pe__set_action_flags(action,
pcmk_action_pseudo|pcmk_action_runnable);
if (pe_rsc_is_bundled(rsc)) {
// Recovery will be ordered as usual after parent's implied stop
} else if (order_implicit) {
order_actions(stonith_op, action,
pe_order_preserve|pcmk__ar_ordered);
}
}
}
g_list_free(action_list);
}
/*!
* \internal
* \brief Order resource actions properly relative to fencing
*
* \param[in,out] rsc Resource whose actions should be ordered
* \param[in,out] stonith_op Fencing operation to be ordered against
*/
static void
rsc_stonith_ordering(pe_resource_t *rsc, pe_action_t *stonith_op)
{
if (rsc->children) {
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child_rsc = iter->data;
rsc_stonith_ordering(child_rsc, stonith_op);
}
} else if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
pe_rsc_trace(rsc,
"Skipping fencing constraints for unmanaged resource: %s",
rsc->id);
} else {
order_start_vs_fencing(rsc, stonith_op);
order_stop_vs_fencing(rsc, stonith_op);
}
}
/*!
* \internal
* \brief Order all actions appropriately relative to a fencing operation
*
* Ensure start operations of affected resources are ordered after fencing,
* imply stop and demote operations of affected resources by marking them as
* pseudo-actions, etc.
*
* \param[in,out] stonith_op Fencing operation
* \param[in,out] data_set Working set of cluster
*/
void
pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set)
{
CRM_CHECK(stonith_op && data_set, return);
for (GList *r = data_set->resources; r != NULL; r = r->next) {
rsc_stonith_ordering((pe_resource_t *) r->data, stonith_op);
}
}
/*!
* \internal
* \brief Order an action after unfencing
*
* \param[in] rsc Resource that action is for
* \param[in,out] node Node that action is on
* \param[in,out] action Action to be ordered after unfencing
* \param[in] order Ordering flags
*/
void
pcmk__order_vs_unfence(const pe_resource_t *rsc, pe_node_t *node,
pe_action_t *action,
enum pcmk__action_relation_flags order)
{
/* When unfencing is in use, we order unfence actions before any probe or
* start of resources that require unfencing, and also of fence devices.
*
* This might seem to violate the principle that fence devices require
* only quorum. However, fence agents that unfence often don't have enough
* information to even probe or start unless the node is first unfenced.
*/
if ((pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)
&& pcmk_is_set(rsc->cluster->flags, pcmk_sched_enable_unfencing))
|| pcmk_is_set(rsc->flags, pcmk_rsc_needs_unfencing)) {
/* Start with an optional ordering. Requiring unfencing would result in
* the node being unfenced, and all its resources being stopped,
* whenever a new resource is added -- which would be highly suboptimal.
*/
pe_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, TRUE, NULL,
FALSE, node->details->data_set);
order_actions(unfence, action, order);
if (!pcmk__node_unfenced(node)) {
// But unfencing is required if it has never been done
char *reason = crm_strdup_printf("required by %s %s",
rsc->id, action->task);
trigger_unfencing(NULL, node, reason, NULL,
node->details->data_set);
free(reason);
}
}
}
/*!
* \internal
* \brief Create pseudo-op for guest node fence, and order relative to it
*
* \param[in,out] node Guest node to fence
*/
void
pcmk__fence_guest(pe_node_t *node)
{
pe_resource_t *container = NULL;
pe_action_t *stop = NULL;
pe_action_t *stonith_op = NULL;
/* The fence action is just a label; we don't do anything differently for
* off vs. reboot. We specify it explicitly, rather than let it default to
* cluster's default action, because we are not _initiating_ fencing -- we
* are creating a pseudo-event to describe fencing that is already occurring
* by other means (container recovery).
*/
const char *fence_action = PCMK_ACTION_OFF;
CRM_ASSERT(node != NULL);
/* Check whether guest's container resource has any explicit stop or
* start (the stop may be implied by fencing of the guest's host).
*/
container = node->details->remote_rsc->container;
if (container) {
stop = find_first_action(container->actions, NULL, PCMK_ACTION_STOP,
NULL);
if (find_first_action(container->actions, NULL, PCMK_ACTION_START,
NULL)) {
fence_action = PCMK_ACTION_REBOOT;
}
}
/* Create a fence pseudo-event, so we have an event to order actions
* against, and the controller can always detect it.
*/
stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean",
FALSE, node->details->data_set);
pe__set_action_flags(stonith_op, pcmk_action_pseudo|pcmk_action_runnable);
/* We want to imply stops/demotes after the guest is stopped, not wait until
* it is restarted, so we always order pseudo-fencing after stop, not start
* (even though start might be closer to what is done for a real reboot).
*/
if ((stop != NULL) && pcmk_is_set(stop->flags, pcmk_action_pseudo)) {
pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE,
NULL, FALSE,
node->details->data_set);
crm_info("Implying guest %s is down (action %d) after %s fencing",
pe__node_name(node), stonith_op->id,
pe__node_name(stop->node));
order_actions(parent_stonith_op, stonith_op,
pcmk__ar_unrunnable_first_blocks
|pcmk__ar_first_implies_then);
} else if (stop) {
order_actions(stop, stonith_op,
pcmk__ar_unrunnable_first_blocks
|pcmk__ar_first_implies_then);
crm_info("Implying guest %s is down (action %d) "
"after container %s is stopped (action %d)",
pe__node_name(node), stonith_op->id,
container->id, stop->id);
} else {
/* If we're fencing the guest node but there's no stop for the guest
* resource, we must think the guest is already stopped. However, we may
* think so because its resource history was just cleaned. To avoid
* unnecessarily considering the guest node down if it's really up,
* order the pseudo-fencing after any stop of the connection resource,
* which will be ordered after any container (re-)probe.
*/
stop = find_first_action(node->details->remote_rsc->actions, NULL,
PCMK_ACTION_STOP, NULL);
if (stop) {
order_actions(stop, stonith_op, pcmk__ar_ordered);
crm_info("Implying guest %s is down (action %d) "
"after connection is stopped (action %d)",
pe__node_name(node), stonith_op->id, stop->id);
} else {
/* Not sure why we're fencing, but everything must already be
* cleanly stopped.
*/
crm_info("Implying guest %s is down (action %d) ",
pe__node_name(node), stonith_op->id);
}
}
// Order/imply other actions relative to pseudo-fence as with real fence
pcmk__order_vs_fence(stonith_op, node->details->data_set);
}
/*!
* \internal
* \brief Check whether node has already been unfenced
*
* \param[in] node Node to check
*
* \return true if node has a nonzero #node-unfenced attribute (or none),
* otherwise false
*/
bool
pcmk__node_unfenced(const pe_node_t *node)
{
const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED);
return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches);
}
/*!
* \internal
* \brief Order a resource's start and stop relative to unfencing of a node
*
* \param[in,out] data Node that could be unfenced
* \param[in,out] user_data Resource to order
*/
void
pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
{
pe_node_t *node = (pe_node_t *) data;
pe_resource_t *rsc = (pe_resource_t *) user_data;
pe_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, true, NULL, false,
rsc->cluster);
crm_debug("Ordering any stops of %s before %s, and any starts after",
rsc->id, unfence->uuid);
/*
* It would be more efficient to order clone resources once,
* rather than order each instance, but ordering the instance
* allows us to avoid unnecessary dependencies that might conflict
* with user constraints.
*
* @TODO: This constraint can still produce a transition loop if the
* resource has a stop scheduled on the node being unfenced, and
* there is a user ordering constraint to start some other resource
* (which will be ordered after the unfence) before stopping this
* resource. An example is "start some slow-starting cloned service
* before stopping an associated virtual IP that may be moving to
* it":
* stop this -> unfencing -> start that -> stop this
*/
pcmk__new_ordering(rsc, stop_key(rsc), NULL,
NULL, strdup(unfence->uuid), unfence,
- pcmk__ar_ordered|pe_order_same_node,
+ pcmk__ar_ordered|pcmk__ar_if_on_same_node,
rsc->cluster);
pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
rsc, start_key(rsc), NULL,
- pcmk__ar_first_implies_same_node_then|pe_order_same_node,
+ pcmk__ar_first_implies_same_node_then
+ |pcmk__ar_if_on_same_node,
rsc->cluster);
}
diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c
index 97023ec2f3..16660d1dc7 100644
--- a/lib/pacemaker/pcmk_sched_primitive.c
+++ b/lib/pacemaker/pcmk_sched_primitive.c
@@ -1,1655 +1,1656 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <stdint.h> // uint8_t, uint32_t
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
static void stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
static void start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
static void demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
static void promote_resource(pe_resource_t *rsc, pe_node_t *node,
bool optional);
static void assert_role_error(pe_resource_t *rsc, pe_node_t *node,
bool optional);
#define RSC_ROLE_MAX (pcmk_role_promoted + 1)
static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* This array lists the immediate next role when transitioning from one role
* to a target role. For example, when going from Stopped to Promoted, the
* next role is Unpromoted, because the resource must be started before it
* can be promoted. The current state then becomes Started, which is fed
* into this array again, giving a next role of Promoted.
*
* Current role Immediate next role Final target role
* ------------ ------------------- -----------------
*/
/* Unknown */ { pcmk_role_unknown, /* Unknown */
pcmk_role_stopped, /* Stopped */
pcmk_role_stopped, /* Started */
pcmk_role_stopped, /* Unpromoted */
pcmk_role_stopped, /* Promoted */
},
/* Stopped */ { pcmk_role_stopped, /* Unknown */
pcmk_role_stopped, /* Stopped */
pcmk_role_started, /* Started */
pcmk_role_unpromoted, /* Unpromoted */
pcmk_role_unpromoted, /* Promoted */
},
/* Started */ { pcmk_role_stopped, /* Unknown */
pcmk_role_stopped, /* Stopped */
pcmk_role_started, /* Started */
pcmk_role_unpromoted, /* Unpromoted */
pcmk_role_promoted, /* Promoted */
},
/* Unpromoted */ { pcmk_role_stopped, /* Unknown */
pcmk_role_stopped, /* Stopped */
pcmk_role_stopped, /* Started */
pcmk_role_unpromoted, /* Unpromoted */
pcmk_role_promoted, /* Promoted */
},
/* Promoted */ { pcmk_role_stopped, /* Unknown */
pcmk_role_unpromoted, /* Stopped */
pcmk_role_unpromoted, /* Started */
pcmk_role_unpromoted, /* Unpromoted */
pcmk_role_promoted, /* Promoted */
},
};
/*!
* \internal
* \brief Function to schedule actions needed for a role change
*
* \param[in,out] rsc Resource whose role is changing
* \param[in,out] node Node where resource will be in its next role
* \param[in] optional Whether scheduled actions should be optional
*/
typedef void (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *node,
bool optional);
static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* This array lists the function needed to transition directly from one role
* to another. NULL indicates that nothing is needed.
*
* Current role Transition function Next role
* ------------ ------------------- ----------
*/
/* Unknown */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
assert_role_error, /* Started */
assert_role_error, /* Unpromoted */
assert_role_error, /* Promoted */
},
/* Stopped */ { assert_role_error, /* Unknown */
NULL, /* Stopped */
start_resource, /* Started */
start_resource, /* Unpromoted */
assert_role_error, /* Promoted */
},
/* Started */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
NULL, /* Started */
NULL, /* Unpromoted */
promote_resource, /* Promoted */
},
/* Unpromoted */ { assert_role_error, /* Unknown */
stop_resource, /* Stopped */
stop_resource, /* Started */
NULL, /* Unpromoted */
promote_resource, /* Promoted */
},
/* Promoted */ { assert_role_error, /* Unknown */
demote_resource, /* Stopped */
demote_resource, /* Started */
demote_resource, /* Unpromoted */
NULL, /* Promoted */
},
};
/*!
* \internal
* \brief Get a list of a resource's allowed nodes sorted by node score
*
* \param[in] rsc Resource to check
*
* \return List of allowed nodes sorted by node score
*/
static GList *
sorted_allowed_nodes(const pe_resource_t *rsc)
{
if (rsc->allowed_nodes != NULL) {
GList *nodes = g_hash_table_get_values(rsc->allowed_nodes);
if (nodes != NULL) {
return pcmk__sort_nodes(nodes, pe__current_node(rsc));
}
}
return NULL;
}
/*!
* \internal
* \brief Assign a resource to its best allowed node, if possible
*
* \param[in,out] rsc Resource to choose a node for
* \param[in] prefer If not \c NULL, prefer this node when all else
* equal
* \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a
* node, set next role to stopped and update
* existing actions
*
* \return true if \p rsc could be assigned to a node, otherwise false
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
static bool
assign_best_node(pe_resource_t *rsc, const pe_node_t *prefer, bool stop_if_fail)
{
GList *nodes = NULL;
pe_node_t *chosen = NULL;
pe_node_t *best = NULL;
const pe_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc);
if (prefer == NULL) {
prefer = most_free_node;
}
if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
// We've already finished assignment of resources to nodes
return rsc->allocated_to != NULL;
}
// Sort allowed nodes by score
nodes = sorted_allowed_nodes(rsc);
if (nodes != NULL) {
best = (pe_node_t *) nodes->data; // First node has best score
}
if ((prefer != NULL) && (nodes != NULL)) {
// Get the allowed node version of prefer
chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
if (chosen == NULL) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unknown",
pe__node_name(prefer), rsc->id);
/* Favor the preferred node as long as its score is at least as good as
* the best allowed node's.
*
* An alternative would be to favor the preferred node even if the best
* node is better, when the best node's score is less than INFINITY.
*/
} else if (chosen->weight < best->weight) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
pe__node_name(chosen), rsc->id);
chosen = NULL;
} else if (!pcmk__node_available(chosen, true, false)) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable",
pe__node_name(chosen), rsc->id);
chosen = NULL;
} else {
pe_rsc_trace(rsc,
"Chose preferred node %s for %s "
"(ignoring %d candidates)",
pe__node_name(chosen), rsc->id, g_list_length(nodes));
}
}
if ((chosen == NULL) && (best != NULL)) {
/* Either there is no preferred node, or the preferred node is not
* suitable, but another node is allowed to run the resource.
*/
chosen = best;
if (!pe_rsc_is_unique_clone(rsc->parent)
&& (chosen->weight > 0) // Zero not acceptable
&& pcmk__node_available(chosen, false, false)) {
/* If the resource is already running on a node, prefer that node if
* it is just as good as the chosen node.
*
* We don't do this for unique clone instances, because
* pcmk__assign_instances() has already assigned instances to their
* running nodes when appropriate, and if we get here, we don't want
* remaining unassigned instances to prefer a node that's already
* running another instance.
*/
pe_node_t *running = pe__current_node(rsc);
if (running == NULL) {
// Nothing to do
} else if (!pcmk__node_available(running, true, false)) {
pe_rsc_trace(rsc,
"Current node for %s (%s) can't run resources",
rsc->id, pe__node_name(running));
} else {
int nodes_with_best_score = 1;
for (GList *iter = nodes->next; iter; iter = iter->next) {
pe_node_t *allowed = (pe_node_t *) iter->data;
if (allowed->weight != chosen->weight) {
// The nodes are sorted by score, so no more are equal
break;
}
if (pe__same_node(allowed, running)) {
// Scores are equal, so prefer the current node
chosen = allowed;
}
nodes_with_best_score++;
}
if (nodes_with_best_score > 1) {
uint8_t log_level = LOG_INFO;
if (chosen->weight >= INFINITY) {
log_level = LOG_WARNING;
}
do_crm_log(log_level,
"Chose %s for %s from %d nodes with score %s",
pe__node_name(chosen), rsc->id,
nodes_with_best_score,
pcmk_readable_score(chosen->weight));
}
}
}
pe_rsc_trace(rsc, "Chose %s for %s from %d candidates",
pe__node_name(chosen), rsc->id, g_list_length(nodes));
}
pcmk__assign_resource(rsc, chosen, false, stop_if_fail);
g_list_free(nodes);
return rsc->allocated_to != NULL;
}
/*!
* \internal
* \brief Apply a "this with" colocation to a node's allowed node scores
*
* \param[in,out] colocation Colocation to apply
* \param[in,out] rsc Resource being assigned
*/
static void
apply_this_with(pcmk__colocation_t *colocation, pe_resource_t *rsc)
{
GHashTable *archive = NULL;
pe_resource_t *other = colocation->primary;
// In certain cases, we will need to revert the node scores
if ((colocation->dependent_role >= pcmk_role_promoted)
|| ((colocation->score < 0) && (colocation->score > -INFINITY))) {
archive = pcmk__copy_node_table(rsc->allowed_nodes);
}
if (pcmk_is_set(other->flags, pcmk_rsc_unassigned)) {
pe_rsc_trace(rsc,
"%s: Assigning colocation %s primary %s first"
"(score=%d role=%s)",
rsc->id, colocation->id, other->id,
colocation->score, role2text(colocation->dependent_role));
other->cmds->assign(other, NULL, true);
}
// Apply the colocation score to this resource's allowed node scores
rsc->cmds->apply_coloc_score(rsc, other, colocation, true);
if ((archive != NULL)
&& !pcmk__any_node_available(rsc->allowed_nodes)) {
pe_rsc_info(rsc,
"%s: Reverting scores from colocation with %s "
"because no nodes allowed",
rsc->id, other->id);
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = archive;
archive = NULL;
}
if (archive != NULL) {
g_hash_table_destroy(archive);
}
}
/*!
* \internal
* \brief Update a Pacemaker Remote node once its connection has been assigned
*
* \param[in] connection Connection resource that has been assigned
*/
static void
remote_connection_assigned(const pe_resource_t *connection)
{
pe_node_t *remote_node = pe_find_node(connection->cluster->nodes,
connection->id);
CRM_CHECK(remote_node != NULL, return);
if ((connection->allocated_to != NULL)
&& (connection->next_role != pcmk_role_stopped)) {
crm_trace("Pacemaker Remote node %s will be online",
remote_node->details->id);
remote_node->details->online = TRUE;
if (remote_node->details->unseen) {
// Avoid unnecessary fence, since we will attempt connection
remote_node->details->unclean = FALSE;
}
} else {
crm_trace("Pacemaker Remote node %s will be shut down "
"(%sassigned connection's next role is %s)",
remote_node->details->id,
((connection->allocated_to == NULL)? "un" : ""),
role2text(connection->next_role));
remote_node->details->shutdown = TRUE;
}
}
/*!
* \internal
* \brief Assign a primitive resource to a node
*
* \param[in,out] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
* \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a
* node, set next role to stopped and update
* existing actions
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
pe_node_t *
pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer,
bool stop_if_fail)
{
GList *this_with_colocations = NULL;
GList *with_this_colocations = NULL;
GList *iter = NULL;
pcmk__colocation_t *colocation = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
// Never assign a child without parent being assigned first
if ((rsc->parent != NULL)
&& !pcmk_is_set(rsc->parent->flags, pcmk_rsc_assigning)) {
pe_rsc_debug(rsc, "%s: Assigning parent %s first",
rsc->id, rsc->parent->id);
rsc->parent->cmds->assign(rsc->parent, prefer, stop_if_fail);
}
if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
// Assignment has already been done
const char *node_name = "no node";
if (rsc->allocated_to != NULL) {
node_name = pe__node_name(rsc->allocated_to);
}
pe_rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id, node_name);
return rsc->allocated_to;
}
// Ensure we detect assignment loops
if (pcmk_is_set(rsc->flags, pcmk_rsc_assigning)) {
pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
return NULL;
}
pe__set_resource_flags(rsc, pcmk_rsc_assigning);
pe__show_node_scores(true, rsc, "Pre-assignment", rsc->allowed_nodes,
rsc->cluster);
this_with_colocations = pcmk__this_with_colocations(rsc);
with_this_colocations = pcmk__with_this_colocations(rsc);
// Apply mandatory colocations first, to satisfy as many as possible
for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
colocation = iter->data;
if ((colocation->score <= -CRM_SCORE_INFINITY)
|| (colocation->score >= CRM_SCORE_INFINITY)) {
apply_this_with(colocation, rsc);
}
}
for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
colocation = iter->data;
if ((colocation->score <= -CRM_SCORE_INFINITY)
|| (colocation->score >= CRM_SCORE_INFINITY)) {
pcmk__add_dependent_scores(colocation, rsc);
}
}
pe__show_node_scores(true, rsc, "Mandatory-colocations",
rsc->allowed_nodes, rsc->cluster);
// Then apply optional colocations
for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
colocation = iter->data;
if ((colocation->score > -CRM_SCORE_INFINITY)
&& (colocation->score < CRM_SCORE_INFINITY)) {
apply_this_with(colocation, rsc);
}
}
for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
colocation = iter->data;
if ((colocation->score > -CRM_SCORE_INFINITY)
&& (colocation->score < CRM_SCORE_INFINITY)) {
pcmk__add_dependent_scores(colocation, rsc);
}
}
g_list_free(this_with_colocations);
g_list_free(with_this_colocations);
if (rsc->next_role == pcmk_role_stopped) {
pe_rsc_trace(rsc,
"Banning %s from all nodes because it will be stopped",
rsc->id);
resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE,
rsc->cluster);
} else if ((rsc->next_role > rsc->role)
&& !pcmk_is_set(rsc->cluster->flags, pcmk_sched_quorate)
&& (rsc->cluster->no_quorum_policy == pcmk_no_quorum_freeze)) {
crm_notice("Resource %s cannot be elevated from %s to %s due to "
"no-quorum-policy=freeze",
rsc->id, role2text(rsc->role), role2text(rsc->next_role));
pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze");
}
pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags,
pcmk_sched_output_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
// Unmanage resource if fencing is enabled but no device is configured
if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)
&& !pcmk_is_set(rsc->cluster->flags, pcmk_sched_have_fencing)) {
pe__clear_resource_flags(rsc, pcmk_rsc_managed);
}
if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
// Unmanaged resources stay on their current node
const char *reason = NULL;
pe_node_t *assign_to = NULL;
pe__set_next_role(rsc, rsc->role, "unmanaged");
assign_to = pe__current_node(rsc);
if (assign_to == NULL) {
reason = "inactive";
} else if (rsc->role == pcmk_role_promoted) {
reason = "promoted";
} else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
reason = "failed";
} else {
reason = "active";
}
pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
(assign_to? assign_to->details->uname : "no node"), reason);
pcmk__assign_resource(rsc, assign_to, true, stop_if_fail);
} else if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_stop_all)) {
// Must stop at some point, but be consistent with stop_if_fail
if (stop_if_fail) {
pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources",
rsc->id);
}
pcmk__assign_resource(rsc, NULL, true, stop_if_fail);
} else if (!assign_best_node(rsc, prefer, stop_if_fail)) {
// Assignment failed
if (!pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
} else if ((rsc->running_on != NULL) && stop_if_fail) {
pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id);
}
}
pe__clear_resource_flags(rsc, pcmk_rsc_assigning);
if (rsc->is_remote_node) {
remote_connection_assigned(rsc);
}
return rsc->allocated_to;
}
/*!
* \internal
* \brief Schedule actions to bring resource down and back to current role
*
* \param[in,out] rsc Resource to restart
* \param[in,out] current Node that resource should be brought down on
* \param[in] need_stop Whether the resource must be stopped
* \param[in] need_promote Whether the resource must be promoted
*
* \return Role that resource would have after scheduled actions are taken
*/
static void
schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
bool need_stop, bool need_promote)
{
enum rsc_role_e role = rsc->role;
enum rsc_role_e next_role;
rsc_transition_fn fn = NULL;
pe__set_resource_flags(rsc, pcmk_rsc_restarting);
// Bring resource down to a stop on its current node
while (role != pcmk_role_stopped) {
next_role = rsc_state_matrix[role][pcmk_role_stopped];
pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
(need_stop? "required" : "optional"), rsc->id,
role2text(role), role2text(next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, current, !need_stop);
role = next_role;
}
// Bring resource up to its next role on its next node
while ((rsc->role <= rsc->next_role) && (role != rsc->role)
&& !pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
bool required = need_stop;
next_role = rsc_state_matrix[role][rsc->role];
if ((next_role == pcmk_role_promoted) && need_promote) {
required = true;
}
pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
(required? "required" : "optional"), rsc->id,
role2text(role), role2text(next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, rsc->allocated_to, !required);
role = next_role;
}
pe__clear_resource_flags(rsc, pcmk_rsc_restarting);
}
/*!
* \internal
* \brief If a resource's next role is not explicitly specified, set a default
*
* \param[in,out] rsc Resource to set next role for
*
* \return "explicit" if next role was explicitly set, otherwise "implicit"
*/
static const char *
set_default_next_role(pe_resource_t *rsc)
{
if (rsc->next_role != pcmk_role_unknown) {
return "explicit";
}
if (rsc->allocated_to == NULL) {
pe__set_next_role(rsc, pcmk_role_stopped, "assignment");
} else {
pe__set_next_role(rsc, pcmk_role_started, "assignment");
}
return "implicit";
}
/*!
* \internal
* \brief Create an action to represent an already pending start
*
* \param[in,out] rsc Resource to create start action for
*/
static void
create_pending_start(pe_resource_t *rsc)
{
pe_action_t *start = NULL;
pe_rsc_trace(rsc,
"Creating action for %s to represent already pending start",
rsc->id);
start = start_action(rsc, rsc->allocated_to, TRUE);
pe__set_action_flags(start, pcmk_action_always_in_graph);
}
/*!
* \internal
* \brief Schedule actions needed to take a resource to its next role
*
* \param[in,out] rsc Resource to schedule actions for
*/
static void
schedule_role_transition_actions(pe_resource_t *rsc)
{
enum rsc_role_e role = rsc->role;
while (role != rsc->next_role) {
enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role];
rsc_transition_fn fn = NULL;
pe_rsc_trace(rsc,
"Creating action to take %s from %s to %s (ending at %s)",
rsc->id, role2text(role), role2text(next_role),
role2text(rsc->next_role));
fn = rsc_action_matrix[role][next_role];
if (fn == NULL) {
break;
}
fn(rsc, rsc->allocated_to, false);
role = next_role;
}
}
/*!
* \internal
* \brief Create all actions needed for a given primitive resource
*
* \param[in,out] rsc Primitive resource to create actions for
*/
void
pcmk__primitive_create_actions(pe_resource_t *rsc)
{
bool need_stop = false;
bool need_promote = false;
bool is_moving = false;
bool allow_migrate = false;
bool multiply_active = false;
pe_node_t *current = NULL;
unsigned int num_all_active = 0;
unsigned int num_clean_active = 0;
const char *next_role_source = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
next_role_source = set_default_next_role(rsc);
pe_rsc_trace(rsc,
"Creating all actions for %s transition from %s to %s "
"(%s) on %s",
rsc->id, role2text(rsc->role), role2text(rsc->next_role),
next_role_source, pe__node_name(rsc->allocated_to));
current = rsc->fns->active_node(rsc, &num_all_active, &num_clean_active);
g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration,
rsc);
if ((current != NULL) && (rsc->allocated_to != NULL)
&& !pe__same_node(current, rsc->allocated_to)
&& (rsc->next_role >= pcmk_role_started)) {
pe_rsc_trace(rsc, "Moving %s from %s to %s",
rsc->id, pe__node_name(current),
pe__node_name(rsc->allocated_to));
is_moving = true;
allow_migrate = pcmk__rsc_can_migrate(rsc, current);
// This is needed even if migrating (though I'm not sure why ...)
need_stop = true;
}
// Check whether resource is partially migrated and/or multiply active
if ((rsc->partial_migration_source != NULL)
&& (rsc->partial_migration_target != NULL)
&& allow_migrate && (num_all_active == 2)
&& pe__same_node(current, rsc->partial_migration_source)
&& pe__same_node(rsc->allocated_to, rsc->partial_migration_target)) {
/* A partial migration is in progress, and the migration target remains
* the same as when the migration began.
*/
pe_rsc_trace(rsc, "Partial migration of %s from %s to %s will continue",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
} else if ((rsc->partial_migration_source != NULL)
|| (rsc->partial_migration_target != NULL)) {
// A partial migration is in progress but can't be continued
if (num_all_active > 2) {
// The resource is migrating *and* multiply active!
crm_notice("Forcing recovery of %s because it is migrating "
"from %s to %s and possibly active elsewhere",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
} else {
// The migration source or target isn't available
crm_notice("Forcing recovery of %s because it can no longer "
"migrate from %s to %s",
rsc->id, pe__node_name(rsc->partial_migration_source),
pe__node_name(rsc->partial_migration_target));
}
need_stop = true;
rsc->partial_migration_source = rsc->partial_migration_target = NULL;
allow_migrate = false;
} else if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)) {
multiply_active = (num_all_active > 1);
} else {
/* If a resource has "requires" set to nothing or quorum, don't consider
* it active on unclean nodes (similar to how all resources behave when
* stonith-enabled is false). We can start such resources elsewhere
* before fencing completes, and if we considered the resource active on
* the failed node, we would attempt recovery for being active on
* multiple nodes.
*/
multiply_active = (num_clean_active > 1);
}
if (multiply_active) {
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
// Resource was (possibly) incorrectly multiply active
pe_proc_err("%s resource %s might be active on %u nodes (%s)",
pcmk__s(class, "Untyped"), rsc->id, num_all_active,
recovery2text(rsc->recovery_type));
crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ"
"#Resource_is_Too_Active for more information");
switch (rsc->recovery_type) {
case pcmk_multiply_active_restart:
need_stop = true;
break;
case pcmk_multiply_active_unexpected:
need_stop = true; // stop_resource() will skip expected node
pe__set_resource_flags(rsc, pcmk_rsc_stop_unexpected);
break;
default:
break;
}
} else {
pe__clear_resource_flags(rsc, pcmk_rsc_stop_unexpected);
}
if (pcmk_is_set(rsc->flags, pcmk_rsc_start_pending)) {
create_pending_start(rsc);
}
if (is_moving) {
// Remaining tests are only for resources staying where they are
} else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
if (pcmk_is_set(rsc->flags, pcmk_rsc_stop_if_failed)) {
need_stop = true;
pe_rsc_trace(rsc, "Recovering %s", rsc->id);
} else {
pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
if (rsc->next_role == pcmk_role_promoted) {
need_promote = true;
}
}
} else if (pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
need_stop = true;
} else if ((rsc->role > pcmk_role_started) && (current != NULL)
&& (rsc->allocated_to != NULL)) {
pe_action_t *start = NULL;
pe_rsc_trace(rsc, "Creating start action for promoted resource %s",
rsc->id);
start = start_action(rsc, rsc->allocated_to, TRUE);
if (!pcmk_is_set(start->flags, pcmk_action_optional)) {
// Recovery of a promoted resource
pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
need_stop = true;
}
}
// Create any actions needed to bring resource down and back up to same role
schedule_restart_actions(rsc, current, need_stop, need_promote);
// Create any actions needed to take resource from this role to the next
schedule_role_transition_actions(rsc);
pcmk__create_recurring_actions(rsc);
if (allow_migrate) {
pcmk__create_migration_actions(rsc, current);
}
}
/*!
* \internal
* \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes
*
* \param[in] rsc Resource to check
*/
static void
rsc_avoids_remote_nodes(const pe_resource_t *rsc)
{
GHashTableIter iter;
pe_node_t *node = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (node->details->remote_rsc != NULL) {
node->weight = -INFINITY;
}
}
}
/*!
* \internal
* \brief Return allowed nodes as (possibly sorted) list
*
* Convert a resource's hash table of allowed nodes to a list. If printing to
* stdout, sort the list, to keep action ID numbers consistent for regression
* test output (while avoiding the performance hit on a live cluster).
*
* \param[in] rsc Resource to check for allowed nodes
*
* \return List of resource's allowed nodes
* \note Callers should take care not to rely on the list being sorted.
*/
static GList *
allowed_nodes_as_list(const pe_resource_t *rsc)
{
GList *allowed_nodes = NULL;
if (rsc->allowed_nodes) {
allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes);
}
if (!pcmk__is_daemon) {
allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
}
return allowed_nodes;
}
/*!
* \internal
* \brief Create implicit constraints needed for a primitive resource
*
* \param[in,out] rsc Primitive resource to create implicit constraints for
*/
void
pcmk__primitive_internal_constraints(pe_resource_t *rsc)
{
GList *allowed_nodes = NULL;
bool check_unfencing = false;
bool check_utilization = false;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
pe_rsc_trace(rsc,
"Skipping implicit constraints for unmanaged resource %s",
rsc->id);
return;
}
// Whether resource requires unfencing
check_unfencing = !pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)
&& pcmk_is_set(rsc->cluster->flags,
pcmk_sched_enable_unfencing)
&& pcmk_is_set(rsc->flags, pcmk_rsc_needs_unfencing);
// Whether a non-default placement strategy is used
check_utilization = (g_hash_table_size(rsc->utilization) > 0)
&& !pcmk__str_eq(rsc->cluster->placement_strategy,
"default", pcmk__str_casei);
// Order stops before starts (i.e. restart)
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL,
rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL,
pcmk__ar_ordered
|pcmk__ar_first_implies_then
|pcmk__ar_intermediate_stop,
rsc->cluster);
// Promotable ordering: demote before stop, start before promote
if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
pcmk_rsc_promotable)
|| (rsc->role > pcmk_role_unpromoted)) {
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_DEMOTE, 0),
NULL,
rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
NULL,
pcmk__ar_promoted_then_implies_first, rsc->cluster);
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
NULL,
rsc, pcmk__op_key(rsc->id, PCMK_ACTION_PROMOTE, 0),
NULL,
pcmk__ar_unrunnable_first_blocks, rsc->cluster);
}
// Don't clear resource history if probing on same node
pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_LRM_DELETE, 0),
NULL, rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0),
- NULL, pe_order_same_node|pe_order_then_cancels_first,
+ NULL,
+ pcmk__ar_if_on_same_node|pe_order_then_cancels_first,
rsc->cluster);
// Certain checks need allowed nodes
if (check_unfencing || check_utilization || (rsc->container != NULL)) {
allowed_nodes = allowed_nodes_as_list(rsc);
}
if (check_unfencing) {
g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc);
}
if (check_utilization) {
pcmk__create_utilization_constraints(rsc, allowed_nodes);
}
if (rsc->container != NULL) {
pe_resource_t *remote_rsc = NULL;
if (rsc->is_remote_node) {
// rsc is the implicit remote connection for a guest or bundle node
/* Guest resources are not allowed to run on Pacemaker Remote nodes,
* to avoid nesting remotes. However, bundles are allowed.
*/
if (!pcmk_is_set(rsc->flags, pcmk_rsc_remote_nesting_allowed)) {
rsc_avoids_remote_nodes(rsc->container);
}
/* If someone cleans up a guest or bundle node's container, we will
* likely schedule a (re-)probe of the container and recovery of the
* connection. Order the connection stop after the container probe,
* so that if we detect the container running, we will trigger a new
* transition and avoid the unnecessary recovery.
*/
pcmk__order_resource_actions(rsc->container, PCMK_ACTION_MONITOR,
rsc, PCMK_ACTION_STOP,
pcmk__ar_ordered);
/* A user can specify that a resource must start on a Pacemaker Remote
* node by explicitly configuring it with the container=NODENAME
* meta-attribute. This is of questionable merit, since location
* constraints can accomplish the same thing. But we support it, so here
* we check whether a resource (that is not itself a remote connection)
* has container set to a remote node or guest node resource.
*/
} else if (rsc->container->is_remote_node) {
remote_rsc = rsc->container;
} else {
remote_rsc = pe__resource_contains_guest_node(rsc->cluster,
rsc->container);
}
if (remote_rsc != NULL) {
/* Force the resource on the Pacemaker Remote node instead of
* colocating the resource with the container resource.
*/
for (GList *item = allowed_nodes; item; item = item->next) {
pe_node_t *node = item->data;
if (node->details->remote_rsc != remote_rsc) {
node->weight = -INFINITY;
}
}
} else {
/* This resource is either a filler for a container that does NOT
* represent a Pacemaker Remote node, or a Pacemaker Remote
* connection resource for a guest node or bundle.
*/
int score;
crm_trace("Order and colocate %s relative to its container %s",
rsc->id, rsc->container->id);
pcmk__new_ordering(rsc->container,
pcmk__op_key(rsc->container->id,
PCMK_ACTION_START, 0),
NULL, rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
NULL,
pcmk__ar_first_implies_then
|pcmk__ar_unrunnable_first_blocks,
rsc->cluster);
pcmk__new_ordering(rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
NULL,
rsc->container,
pcmk__op_key(rsc->container->id,
PCMK_ACTION_STOP, 0),
NULL, pcmk__ar_then_implies_first, rsc->cluster);
if (pcmk_is_set(rsc->flags, pcmk_rsc_remote_nesting_allowed)) {
score = 10000; /* Highly preferred but not essential */
} else {
score = INFINITY; /* Force them to run on the same host */
}
pcmk__new_colocation("#resource-with-container", NULL, score, rsc,
rsc->container, NULL, NULL,
pcmk__coloc_influence);
}
}
if (rsc->is_remote_node
|| pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)) {
/* Remote connections and fencing devices are not allowed to run on
* Pacemaker Remote nodes
*/
rsc_avoids_remote_nodes(rsc);
}
g_list_free(allowed_nodes);
}
/*!
* \internal
* \brief Apply a colocation's score to node scores or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node scores (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__primitive_apply_coloc_score(pe_resource_t *dependent,
const pe_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent)
{
enum pcmk__coloc_affects filter_results;
CRM_ASSERT((dependent != NULL) && (primary != NULL)
&& (colocation != NULL));
if (for_dependent) {
// Always process on behalf of primary resource
primary->cmds->apply_coloc_score(dependent, primary, colocation, false);
return;
}
filter_results = pcmk__colocation_affects(dependent, primary, colocation,
false);
pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
((colocation->score > 0)? "Colocating" : "Anti-colocating"),
dependent->id, primary->id, colocation->id, colocation->score,
filter_results);
switch (filter_results) {
case pcmk__coloc_affects_role:
pcmk__apply_coloc_to_priority(dependent, primary, colocation);
break;
case pcmk__coloc_affects_location:
pcmk__apply_coloc_to_scores(dependent, primary, colocation);
break;
default: // pcmk__coloc_affects_nothing
return;
}
}
/* Primitive implementation of
* resource_alloc_functions_t:with_this_colocations()
*/
void
pcmk__with_primitive_colocations(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList **list)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
&& (list != NULL));
if (rsc == orig_rsc) {
/* For the resource itself, add all of its own colocations and relevant
* colocations from its parent (if any).
*/
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
if (rsc->parent != NULL) {
rsc->parent->cmds->with_this_colocations(rsc->parent, orig_rsc, list);
}
} else {
// For an ancestor, add only explicitly configured constraints
for (GList *iter = rsc->rsc_cons_lhs; iter != NULL; iter = iter->next) {
pcmk__colocation_t *colocation = iter->data;
if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
pcmk__add_with_this(list, colocation, orig_rsc);
}
}
}
}
/* Primitive implementation of
* resource_alloc_functions_t:this_with_colocations()
*/
void
pcmk__primitive_with_colocations(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList **list)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
&& (list != NULL));
if (rsc == orig_rsc) {
/* For the resource itself, add all of its own colocations and relevant
* colocations from its parent (if any).
*/
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
if (rsc->parent != NULL) {
rsc->parent->cmds->this_with_colocations(rsc->parent, orig_rsc, list);
}
} else {
// For an ancestor, add only explicitly configured constraints
for (GList *iter = rsc->rsc_cons; iter != NULL; iter = iter->next) {
pcmk__colocation_t *colocation = iter->data;
if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
pcmk__add_this_with(list, colocation, orig_rsc);
}
}
}
}
/*!
* \internal
* \brief Return action flags for a given primitive resource action
*
* \param[in,out] action Action to get flags for
* \param[in] node If not NULL, limit effects to this node (ignored)
*
* \return Flags appropriate to \p action on \p node
*/
uint32_t
pcmk__primitive_action_flags(pe_action_t *action, const pe_node_t *node)
{
CRM_ASSERT(action != NULL);
return (uint32_t) action->flags;
}
/*!
* \internal
* \brief Check whether a node is a multiply active resource's expected node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p rsc is multiply active with multiple-active set to
* stop_unexpected, and \p node is the node where it will remain active
* \note This assumes that the resource's next role cannot be changed to stopped
* after this is called, which should be reasonable if status has already
* been unpacked and resources have been assigned to nodes.
*/
static bool
is_expected_node(const pe_resource_t *rsc, const pe_node_t *node)
{
return pcmk_all_flags_set(rsc->flags,
pcmk_rsc_stop_unexpected|pcmk_rsc_restarting)
&& (rsc->next_role > pcmk_role_stopped)
&& pe__same_node(rsc->allocated_to, node);
}
/*!
* \internal
* \brief Schedule actions needed to stop a resource wherever it is active
*
* \param[in,out] rsc Resource being stopped
* \param[in] node Node where resource is being stopped (ignored)
* \param[in] optional Whether actions should be optional
*/
static void
stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
pe_node_t *current = (pe_node_t *) iter->data;
pe_action_t *stop = NULL;
if (is_expected_node(rsc, current)) {
/* We are scheduling restart actions for a multiply active resource
* with multiple-active=stop_unexpected, and this is where it should
* not be stopped.
*/
pe_rsc_trace(rsc,
"Skipping stop of multiply active resource %s "
"on expected node %s",
rsc->id, pe__node_name(current));
continue;
}
if (rsc->partial_migration_target != NULL) {
// Continue migration if node originally was and remains target
if (pe__same_node(current, rsc->partial_migration_target)
&& pe__same_node(current, rsc->allocated_to)) {
pe_rsc_trace(rsc,
"Skipping stop of %s on %s "
"because partial migration there will continue",
rsc->id, pe__node_name(current));
continue;
} else {
pe_rsc_trace(rsc,
"Forcing stop of %s on %s "
"because migration target changed",
rsc->id, pe__node_name(current));
optional = false;
}
}
pe_rsc_trace(rsc, "Scheduling stop of %s on %s",
rsc->id, pe__node_name(current));
stop = stop_action(rsc, current, optional);
if (rsc->allocated_to == NULL) {
pe_action_set_reason(stop, "node availability", true);
} else if (pcmk_all_flags_set(rsc->flags, pcmk_rsc_restarting
|pcmk_rsc_stop_unexpected)) {
/* We are stopping a multiply active resource on a node that is
* not its expected node, and we are still scheduling restart
* actions, so the stop is for being multiply active.
*/
pe_action_set_reason(stop, "being multiply active", true);
}
if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
pe__clear_action_flags(stop, pcmk_action_runnable);
}
if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_remove_after_stop)) {
pcmk__schedule_cleanup(rsc, current, optional);
}
if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_unfencing)) {
pe_action_t *unfence = pe_fence_op(current, PCMK_ACTION_ON, true,
NULL, false, rsc->cluster);
order_actions(stop, unfence, pcmk__ar_then_implies_first);
if (!pcmk__node_unfenced(current)) {
pe_proc_err("Stopping %s until %s can be unfenced",
rsc->id, pe__node_name(current));
}
}
}
}
/*!
* \internal
* \brief Schedule actions needed to start a resource on a node
*
* \param[in,out] rsc Resource being started
* \param[in,out] node Node where resource should be started
* \param[in] optional Whether actions should be optional
*/
static void
start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
pe_action_t *start = NULL;
CRM_ASSERT(node != NULL);
pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)",
(optional? "optional" : "required"), rsc->id,
pe__node_name(node), node->weight);
start = start_action(rsc, node, TRUE);
pcmk__order_vs_unfence(rsc, node, start, pcmk__ar_first_implies_then);
if (pcmk_is_set(start->flags, pcmk_action_runnable) && !optional) {
pe__clear_action_flags(start, pcmk_action_optional);
}
if (is_expected_node(rsc, node)) {
/* This could be a problem if the start becomes necessary for other
* reasons later.
*/
pe_rsc_trace(rsc,
"Start of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, pe__node_name(node));
pe__set_action_flags(start, pcmk_action_pseudo);
}
}
/*!
* \internal
* \brief Schedule actions needed to promote a resource on a node
*
* \param[in,out] rsc Resource being promoted
* \param[in] node Node where resource should be promoted
* \param[in] optional Whether actions should be optional
*/
static void
promote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
GList *iter = NULL;
GList *action_list = NULL;
bool runnable = true;
CRM_ASSERT(node != NULL);
// Any start must be runnable for promotion to be runnable
action_list = pe__resource_actions(rsc, node, PCMK_ACTION_START, true);
for (iter = action_list; iter != NULL; iter = iter->next) {
pe_action_t *start = (pe_action_t *) iter->data;
if (!pcmk_is_set(start->flags, pcmk_action_runnable)) {
runnable = false;
}
}
g_list_free(action_list);
if (runnable) {
pe_action_t *promote = promote_action(rsc, node, optional);
pe_rsc_trace(rsc, "Scheduling %s promotion of %s on %s",
(optional? "optional" : "required"), rsc->id,
pe__node_name(node));
if (is_expected_node(rsc, node)) {
/* This could be a problem if the promote becomes necessary for
* other reasons later.
*/
pe_rsc_trace(rsc,
"Promotion of multiply active resouce %s "
"on expected node %s will be a pseudo-action",
rsc->id, pe__node_name(node));
pe__set_action_flags(promote, pcmk_action_pseudo);
}
} else {
pe_rsc_trace(rsc, "Not promoting %s on %s: start unrunnable",
rsc->id, pe__node_name(node));
action_list = pe__resource_actions(rsc, node, PCMK_ACTION_PROMOTE,
true);
for (iter = action_list; iter != NULL; iter = iter->next) {
pe_action_t *promote = (pe_action_t *) iter->data;
pe__clear_action_flags(promote, pcmk_action_runnable);
}
g_list_free(action_list);
}
}
/*!
* \internal
* \brief Schedule actions needed to demote a resource wherever it is active
*
* \param[in,out] rsc Resource being demoted
* \param[in] node Node where resource should be demoted (ignored)
* \param[in] optional Whether actions should be optional
*/
static void
demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
/* Since this will only be called for a primitive (possibly as an instance
* of a collective resource), the resource is multiply active if it is
* running on more than one node, so we want to demote on all of them as
* part of recovery, regardless of which one is the desired node.
*/
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
pe_node_t *current = (pe_node_t *) iter->data;
if (is_expected_node(rsc, current)) {
pe_rsc_trace(rsc,
"Skipping demote of multiply active resource %s "
"on expected node %s",
rsc->id, pe__node_name(current));
} else {
pe_rsc_trace(rsc, "Scheduling %s demotion of %s on %s",
(optional? "optional" : "required"), rsc->id,
pe__node_name(current));
demote_action(rsc, current, optional);
}
}
}
static void
assert_role_error(pe_resource_t *rsc, pe_node_t *node, bool optional)
{
CRM_ASSERT(false);
}
/*!
* \internal
* \brief Schedule cleanup of a resource
*
* \param[in,out] rsc Resource to clean up
* \param[in] node Node to clean up on
* \param[in] optional Whether clean-up should be optional
*/
void
pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, bool optional)
{
/* If the cleanup is required, its orderings are optional, because they're
* relevant only if both actions are required. Conversely, if the cleanup is
* optional, the orderings make the then action required if the first action
* becomes required.
*/
uint32_t flag = optional? pcmk__ar_first_implies_then : pcmk__ar_ordered;
CRM_CHECK((rsc != NULL) && (node != NULL), return);
if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed",
rsc->id, pe__node_name(node));
return;
}
if (node->details->unclean || !node->details->online) {
pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable",
rsc->id, pe__node_name(node));
return;
}
crm_notice("Scheduling clean-up of %s on %s", rsc->id, pe__node_name(node));
delete_action(rsc, node, optional);
// stop -> clean-up -> start
pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP,
rsc, PCMK_ACTION_DELETE, flag);
pcmk__order_resource_actions(rsc, PCMK_ACTION_DELETE,
rsc, PCMK_ACTION_START, flag);
}
/*!
* \internal
* \brief Add primitive meta-attributes relevant to graph actions to XML
*
* \param[in] rsc Primitive resource whose meta-attributes should be added
* \param[in,out] xml Transition graph action attributes XML to add to
*/
void
pcmk__primitive_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml)
{
char *name = NULL;
char *value = NULL;
const pe_resource_t *parent = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
&& (xml != NULL));
/* Clone instance numbers get set internally as meta-attributes, and are
* needed in the transition graph (for example, to tell unique clone
* instances apart).
*/
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION);
if (value != NULL) {
name = crm_meta_name(XML_RSC_ATTR_INCARNATION);
crm_xml_add(xml, name, value);
free(name);
}
// Not sure if this one is really needed ...
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE);
if (value != NULL) {
name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE);
crm_xml_add(xml, name, value);
free(name);
}
/* The container meta-attribute can be set on the primitive itself or one of
* its parents (for example, a group inside a container resource), so check
* them all, and keep the highest one found.
*/
for (parent = rsc; parent != NULL; parent = parent->parent) {
if (parent->container != NULL) {
crm_xml_add(xml, CRM_META "_" XML_RSC_ATTR_CONTAINER,
parent->container->id);
}
}
/* Bundle replica children will get their external-ip set internally as a
* meta-attribute. The graph action needs it, but under a different naming
* convention than other meta-attributes.
*/
value = g_hash_table_lookup(rsc->meta, "external-ip");
if (value != NULL) {
crm_xml_add(xml, "pcmk_external_ip", value);
}
}
// Primitive implementation of resource_alloc_functions_t:add_utilization()
void
pcmk__primitive_add_utilization(const pe_resource_t *rsc,
const pe_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
&& (orig_rsc != NULL) && (utilization != NULL));
if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
return;
}
pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization",
orig_rsc->id, rsc->id);
pcmk__release_node_capacity(utilization, rsc);
}
/*!
* \internal
* \brief Get epoch time of node's shutdown attribute (or now if none)
*
* \param[in,out] node Node to check
*
* \return Epoch time corresponding to shutdown attribute if set or now if not
*/
static time_t
shutdown_time(pe_node_t *node)
{
const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
time_t result = 0;
if (shutdown != NULL) {
long long result_ll;
if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) {
result = (time_t) result_ll;
}
}
return (result == 0)? get_effective_time(node->details->data_set) : result;
}
/*!
* \internal
* \brief Ban a resource from a node if it's not locked to the node
*
* \param[in] data Node to check
* \param[in,out] user_data Resource to check
*/
static void
ban_if_not_locked(gpointer data, gpointer user_data)
{
const pe_node_t *node = (const pe_node_t *) data;
pe_resource_t *rsc = (pe_resource_t *) user_data;
if (strcmp(node->details->uname, rsc->lock_node->details->uname) != 0) {
resource_location(rsc, node, -CRM_SCORE_INFINITY,
XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster);
}
}
// Primitive implementation of resource_alloc_functions_t:shutdown_lock()
void
pcmk__primitive_shutdown_lock(pe_resource_t *rsc)
{
const char *class = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
// Fence devices and remote connections can't be locked
if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches)
|| pe__resource_is_remote_conn(rsc)) {
return;
}
if (rsc->lock_node != NULL) {
// The lock was obtained from resource history
if (rsc->running_on != NULL) {
/* The resource was started elsewhere even though it is now
* considered locked. This shouldn't be possible, but as a
* failsafe, we don't want to disturb the resource now.
*/
pe_rsc_info(rsc,
"Cancelling shutdown lock because %s is already active",
rsc->id);
pe__clear_resource_history(rsc, rsc->lock_node);
rsc->lock_node = NULL;
rsc->lock_time = 0;
}
// Only a resource active on exactly one node can be locked
} else if (pcmk__list_of_1(rsc->running_on)) {
pe_node_t *node = rsc->running_on->data;
if (node->details->shutdown) {
if (node->details->unclean) {
pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
rsc->id, pe__node_name(node));
} else {
rsc->lock_node = node;
rsc->lock_time = shutdown_time(node);
}
}
}
if (rsc->lock_node == NULL) {
// No lock needed
return;
}
if (rsc->cluster->shutdown_lock > 0) {
time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock;
pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
rsc->id, pe__node_name(rsc->lock_node),
(long long) lock_expiration);
pe__update_recheck_time(++lock_expiration, rsc->cluster);
} else {
pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
rsc->id, pe__node_name(rsc->lock_node));
}
// If resource is locked to one node, ban it from all other nodes
g_list_foreach(rsc->cluster->nodes, ban_if_not_locked, rsc);
}
diff --git a/lib/pacemaker/pcmk_sched_probes.c b/lib/pacemaker/pcmk_sched_probes.c
index 93bf19ca8d..7d66d39dc8 100644
--- a/lib/pacemaker/pcmk_sched_probes.c
+++ b/lib/pacemaker/pcmk_sched_probes.c
@@ -1,902 +1,902 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <crm/crm.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Add the expected result to a newly created probe
*
* \param[in,out] probe Probe action to add expected result to
* \param[in] rsc Resource that probe is for
* \param[in] node Node that probe will run on
*/
static void
add_expected_result(pe_action_t *probe, const pe_resource_t *rsc,
const pe_node_t *node)
{
// Check whether resource is currently active on node
pe_node_t *running = pe_find_node_id(rsc->running_on, node->details->id);
// The expected result is what we think the resource's current state is
if (running == NULL) {
pe__add_action_expected_result(probe, CRM_EX_NOT_RUNNING);
} else if (rsc->role == pcmk_role_promoted) {
pe__add_action_expected_result(probe, CRM_EX_PROMOTED);
}
}
/*!
* \internal
* \brief Create any needed robes on a node for a list of resources
*
* \param[in,out] rscs List of resources to create probes for
* \param[in,out] node Node to create probes on
*
* \return true if any probe was created, otherwise false
*/
bool
pcmk__probe_resource_list(GList *rscs, pe_node_t *node)
{
bool any_created = false;
for (GList *iter = rscs; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
if (rsc->cmds->create_probe(rsc, node)) {
any_created = true;
}
}
return any_created;
}
/*!
* \internal
* \brief Order one resource's start after another's start-up probe
*
* \param[in,out] rsc1 Resource that might get start-up probe
* \param[in] rsc2 Resource that might be started
*/
static void
probe_then_start(pe_resource_t *rsc1, pe_resource_t *rsc2)
{
if ((rsc1->allocated_to != NULL)
&& (g_hash_table_lookup(rsc1->known_on,
rsc1->allocated_to->details->id) == NULL)) {
pcmk__new_ordering(rsc1,
pcmk__op_key(rsc1->id, PCMK_ACTION_MONITOR, 0),
NULL,
rsc2, pcmk__op_key(rsc2->id, PCMK_ACTION_START, 0),
NULL,
pcmk__ar_ordered, rsc1->cluster);
}
}
/*!
* \internal
* \brief Check whether a guest resource will stop
*
* \param[in] node Guest node to check
*
* \return true if guest resource will likely stop, otherwise false
*/
static bool
guest_resource_will_stop(const pe_node_t *node)
{
const pe_resource_t *guest_rsc = node->details->remote_rsc->container;
/* Ideally, we'd check whether the guest has a required stop, but that
* information doesn't exist yet, so approximate it ...
*/
return node->details->remote_requires_reset
|| node->details->unclean
|| pcmk_is_set(guest_rsc->flags, pcmk_rsc_failed)
|| (guest_rsc->next_role == pcmk_role_stopped)
// Guest is moving
|| ((guest_rsc->role > pcmk_role_stopped)
&& (guest_rsc->allocated_to != NULL)
&& (pe_find_node(guest_rsc->running_on,
guest_rsc->allocated_to->details->uname) == NULL));
}
/*!
* \internal
* \brief Create a probe action for a resource on a node
*
* \param[in,out] rsc Resource to create probe for
* \param[in,out] node Node to create probe on
*
* \return Newly created probe action
*/
static pe_action_t *
probe_action(pe_resource_t *rsc, pe_node_t *node)
{
pe_action_t *probe = NULL;
char *key = pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0);
crm_debug("Scheduling probe of %s %s on %s",
role2text(rsc->role), rsc->id, pe__node_name(node));
probe = custom_action(rsc, key, PCMK_ACTION_MONITOR, node, FALSE, TRUE,
rsc->cluster);
pe__clear_action_flags(probe, pcmk_action_optional);
pcmk__order_vs_unfence(rsc, node, probe, pcmk__ar_ordered);
add_expected_result(probe, rsc, node);
return probe;
}
/*!
* \internal
* \brief Create probes for a resource on a node, if needed
*
* \brief Schedule any probes needed for a resource on a node
*
* \param[in,out] rsc Resource to create probe for
* \param[in,out] node Node to create probe on
*
* \return true if any probe was created, otherwise false
*/
bool
pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node)
{
uint32_t flags = pcmk__ar_ordered;
pe_action_t *probe = NULL;
pe_node_t *allowed = NULL;
pe_resource_t *top = uber_parent(rsc);
const char *reason = NULL;
CRM_ASSERT((rsc != NULL) && (node != NULL));
if (!pcmk_is_set(rsc->cluster->flags, pcmk_sched_probe_resources)) {
reason = "start-up probes are disabled";
goto no_probe;
}
if (pe__is_guest_or_remote_node(node)) {
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) {
reason = "Pacemaker Remote nodes cannot run stonith agents";
goto no_probe;
} else if (pe__is_guest_node(node)
&& pe__resource_contains_guest_node(rsc->cluster, rsc)) {
reason = "guest nodes cannot run resources containing guest nodes";
goto no_probe;
} else if (rsc->is_remote_node) {
reason = "Pacemaker Remote nodes cannot host remote connections";
goto no_probe;
}
}
// If this is a collective resource, probes are created for its children
if (rsc->children != NULL) {
return pcmk__probe_resource_list(rsc->children, node);
}
if ((rsc->container != NULL) && !rsc->is_remote_node) {
reason = "resource is inside a container";
goto no_probe;
} else if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
reason = "resource is orphaned";
goto no_probe;
} else if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
reason = "resource state is already known";
goto no_probe;
}
allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (rsc->exclusive_discover || top->exclusive_discover) {
// Exclusive discovery is enabled ...
if (allowed == NULL) {
// ... but this node is not allowed to run the resource
reason = "resource has exclusive discovery but is not allowed "
"on node";
goto no_probe;
} else if (allowed->rsc_discover_mode != pcmk_probe_exclusive) {
// ... but no constraint marks this node for discovery of resource
reason = "resource has exclusive discovery but is not enabled "
"on node";
goto no_probe;
}
}
if (allowed == NULL) {
allowed = node;
}
if (allowed->rsc_discover_mode == pcmk_probe_never) {
reason = "node has discovery disabled";
goto no_probe;
}
if (pe__is_guest_node(node)) {
pe_resource_t *guest = node->details->remote_rsc->container;
if (guest->role == pcmk_role_stopped) {
// The guest is stopped, so we know no resource is active there
reason = "node's guest is stopped";
probe_then_start(guest, top);
goto no_probe;
} else if (guest_resource_will_stop(node)) {
reason = "node's guest will stop";
// Order resource start after guest stop (in case it's restarting)
pcmk__new_ordering(guest,
pcmk__op_key(guest->id, PCMK_ACTION_STOP, 0),
NULL, top,
pcmk__op_key(top->id, PCMK_ACTION_START, 0),
NULL, pcmk__ar_ordered, rsc->cluster);
goto no_probe;
}
}
// We've eliminated all cases where a probe is not needed, so now it is
probe = probe_action(rsc, node);
/* Below, we will order the probe relative to start or reload. If this is a
* clone instance, the start or reload is for the entire clone rather than
* just the instance. Otherwise, the start or reload is for the resource
* itself.
*/
if (!pe_rsc_is_clone(top)) {
top = rsc;
}
/* Prevent a start if the resource can't be probed, but don't cause the
* resource or entire clone to stop if already active.
*/
if (!pcmk_is_set(probe->flags, pcmk_action_runnable)
&& (top->running_on == NULL)) {
pe__set_order_flags(flags, pcmk__ar_unrunnable_first_blocks);
}
// Start or reload after probing the resource
pcmk__new_ordering(rsc, NULL, probe,
top, pcmk__op_key(top->id, PCMK_ACTION_START, 0), NULL,
flags, rsc->cluster);
pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
pcmk__ar_ordered, rsc->cluster);
return true;
no_probe:
pe_rsc_trace(rsc,
"Skipping probe for %s on %s because %s",
rsc->id, node->details->id, reason);
return false;
}
/*!
* \internal
* \brief Check whether a probe should be ordered before another action
*
* \param[in] probe Probe action to check
* \param[in] then Other action to check
*
* \return true if \p probe should be ordered before \p then, otherwise false
*/
static bool
probe_needed_before_action(const pe_action_t *probe, const pe_action_t *then)
{
// Probes on a node are performed after unfencing it, not before
if (pcmk__str_eq(then->task, PCMK_ACTION_STONITH, pcmk__str_none)
&& pe__same_node(probe->node, then->node)) {
const char *op = g_hash_table_lookup(then->meta, "stonith_action");
if (pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) {
return false;
}
}
// Probes should be done on a node before shutting it down
if (pcmk__str_eq(then->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)
&& (probe->node != NULL) && (then->node != NULL)
&& !pe__same_node(probe->node, then->node)) {
return false;
}
// Otherwise probes should always be done before any other action
return true;
}
/*!
* \internal
* \brief Add implicit "probe then X" orderings for "stop then X" orderings
*
* If the state of a resource is not known yet, a probe will be scheduled,
* expecting a "not running" result. If the probe fails, a stop will not be
* scheduled until the next transition. Thus, if there are ordering constraints
* like "stop this resource then do something else that's not for the same
* resource", add implicit "probe this resource then do something" equivalents
* so the relation is upheld until we know whether a stop is needed.
*
* \param[in,out] data_set Cluster working set
*/
static void
add_probe_orderings_for_stops(pe_working_set_t *data_set)
{
for (GList *iter = data_set->ordering_constraints; iter != NULL;
iter = iter->next) {
pe__ordering_t *order = iter->data;
uint32_t order_flags = pcmk__ar_ordered;
GList *probes = NULL;
GList *then_actions = NULL;
pe_action_t *first = NULL;
pe_action_t *then = NULL;
// Skip disabled orderings
if (order->flags == pcmk__ar_none) {
continue;
}
// Skip non-resource orderings, and orderings for the same resource
if ((order->lh_rsc == NULL) || (order->lh_rsc == order->rh_rsc)) {
continue;
}
// Skip invalid orderings (shouldn't be possible)
first = order->lh_action;
then = order->rh_action;
if (((first == NULL) && (order->lh_action_task == NULL))
|| ((then == NULL) && (order->rh_action_task == NULL))) {
continue;
}
// Skip orderings for first actions other than stop
if ((first != NULL) && !pcmk__str_eq(first->task, PCMK_ACTION_STOP,
pcmk__str_none)) {
continue;
} else if ((first == NULL)
&& !pcmk__ends_with(order->lh_action_task,
"_" PCMK_ACTION_STOP "_0")) {
continue;
}
/* Do not imply a probe ordering for a resource inside of a stopping
* container. Otherwise, it might introduce a transition loop, since a
* probe could be scheduled after the container starts again.
*/
if ((order->rh_rsc != NULL)
&& (order->lh_rsc->container == order->rh_rsc)) {
if ((then != NULL) && pcmk__str_eq(then->task, PCMK_ACTION_STOP,
pcmk__str_none)) {
continue;
} else if ((then == NULL)
&& pcmk__ends_with(order->rh_action_task,
"_" PCMK_ACTION_STOP "_0")) {
continue;
}
}
// Preserve certain order options for future filtering
if (pcmk_is_set(order->flags, pcmk__ar_if_first_unmigratable)) {
pe__set_order_flags(order_flags, pcmk__ar_if_first_unmigratable);
}
- if (pcmk_is_set(order->flags, pe_order_same_node)) {
- pe__set_order_flags(order_flags, pe_order_same_node);
+ if (pcmk_is_set(order->flags, pcmk__ar_if_on_same_node)) {
+ pe__set_order_flags(order_flags, pcmk__ar_if_on_same_node);
}
// Preserve certain order types for future filtering
if ((order->flags == pe_order_anti_colocation)
|| (order->flags == pe_order_load)) {
order_flags = order->flags;
}
// List all scheduled probes for the first resource
probes = pe__resource_actions(order->lh_rsc, NULL, PCMK_ACTION_MONITOR,
FALSE);
if (probes == NULL) { // There aren't any
continue;
}
// List all relevant "then" actions
if (then != NULL) {
then_actions = g_list_prepend(NULL, then);
} else if (order->rh_rsc != NULL) {
then_actions = find_actions(order->rh_rsc->actions,
order->rh_action_task, NULL);
if (then_actions == NULL) { // There aren't any
g_list_free(probes);
continue;
}
}
crm_trace("Implying 'probe then' orderings for '%s then %s' "
"(id=%d, type=%.6x)",
((first == NULL)? order->lh_action_task : first->uuid),
((then == NULL)? order->rh_action_task : then->uuid),
order->id, order->flags);
for (GList *probe_iter = probes; probe_iter != NULL;
probe_iter = probe_iter->next) {
pe_action_t *probe = (pe_action_t *) probe_iter->data;
for (GList *then_iter = then_actions; then_iter != NULL;
then_iter = then_iter->next) {
pe_action_t *then = (pe_action_t *) then_iter->data;
if (probe_needed_before_action(probe, then)) {
order_actions(probe, then, order_flags);
}
}
}
g_list_free(then_actions);
g_list_free(probes);
}
}
/*!
* \internal
* \brief Add necessary orderings between probe and starts of clone instances
*
* , in additon to the ordering with the parent resource added upon creating
* the probe.
*
* \param[in,out] probe Probe as 'first' action in an ordering
* \param[in,out] after 'then' action wrapper in the ordering
*/
static void
add_start_orderings_for_probe(pe_action_t *probe, pe_action_wrapper_t *after)
{
uint32_t flags = pcmk__ar_ordered|pcmk__ar_unrunnable_first_blocks;
/* Although the ordering between the probe of the clone instance and the
* start of its parent has been added in pcmk__probe_rsc_on_node(), we
* avoided enforcing `pcmk__ar_unrunnable_first_blocks` order type for that
* as long as any of the clone instances are running to prevent them from
* being unexpectedly stopped.
*
* On the other hand, we still need to prevent any inactive instances from
* starting unless the probe is runnable so that we don't risk starting too
* many instances before we know the state on all nodes.
*/
if ((after->action->rsc->variant <= pcmk_rsc_variant_group)
|| pcmk_is_set(probe->flags, pcmk_action_runnable)
// The order type is already enforced for its parent.
|| pcmk_is_set(after->type, pcmk__ar_unrunnable_first_blocks)
|| (pe__const_top_resource(probe->rsc, false) != after->action->rsc)
|| !pcmk__str_eq(after->action->task, PCMK_ACTION_START,
pcmk__str_none)) {
return;
}
crm_trace("Adding probe start orderings for 'unrunnable %s@%s "
"then instances of %s@%s'",
probe->uuid, pe__node_name(probe->node),
after->action->uuid, pe__node_name(after->action->node));
for (GList *then_iter = after->action->actions_after; then_iter != NULL;
then_iter = then_iter->next) {
pe_action_wrapper_t *then = (pe_action_wrapper_t *) then_iter->data;
if (then->action->rsc->running_on
|| (pe__const_top_resource(then->action->rsc, false)
!= after->action->rsc)
|| !pcmk__str_eq(then->action->task, PCMK_ACTION_START,
pcmk__str_none)) {
continue;
}
crm_trace("Adding probe start ordering for 'unrunnable %s@%s "
"then %s@%s' (type=%#.6x)",
probe->uuid, pe__node_name(probe->node),
then->action->uuid, pe__node_name(then->action->node), flags);
/* Prevent the instance from starting if the instance can't, but don't
* cause any other intances to stop if already active.
*/
order_actions(probe, then->action, flags);
}
return;
}
/*!
* \internal
* \brief Order probes before restarts and re-promotes
*
* If a given ordering is a "probe then start" or "probe then promote" ordering,
* add an implicit "probe then stop/demote" ordering in case the action is part
* of a restart/re-promote, and do the same recursively for all actions ordered
* after the "then" action.
*
* \param[in,out] probe Probe as 'first' action in an ordering
* \param[in,out] after 'then' action in the ordering
*/
static void
add_restart_orderings_for_probe(pe_action_t *probe, pe_action_t *after)
{
GList *iter = NULL;
bool interleave = false;
pe_resource_t *compatible_rsc = NULL;
// Validate that this is a resource probe followed by some action
if ((after == NULL) || (probe == NULL) || (probe->rsc == NULL)
|| (probe->rsc->variant != pcmk_rsc_variant_primitive)
|| !pcmk__str_eq(probe->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
return;
}
// Avoid running into any possible loop
if (pcmk_is_set(after->flags, pcmk_action_detect_loop)) {
return;
}
pe__set_action_flags(after, pcmk_action_detect_loop);
crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
probe->uuid, pe__node_name(probe->node),
after->uuid, pe__node_name(after->node));
/* Add restart orderings if "then" is for a different primitive.
* Orderings for collective resources will be added later.
*/
if ((after->rsc != NULL)
&& (after->rsc->variant == pcmk_rsc_variant_primitive)
&& (probe->rsc != after->rsc)) {
GList *then_actions = NULL;
if (pcmk__str_eq(after->task, PCMK_ACTION_START, pcmk__str_none)) {
then_actions = pe__resource_actions(after->rsc, NULL,
PCMK_ACTION_STOP, FALSE);
} else if (pcmk__str_eq(after->task, PCMK_ACTION_PROMOTE,
pcmk__str_none)) {
then_actions = pe__resource_actions(after->rsc, NULL,
PCMK_ACTION_DEMOTE, FALSE);
}
for (iter = then_actions; iter != NULL; iter = iter->next) {
pe_action_t *then = (pe_action_t *) iter->data;
// Skip pseudo-actions (for example, those implied by fencing)
if (!pcmk_is_set(then->flags, pcmk_action_pseudo)) {
order_actions(probe, then, pcmk__ar_ordered);
}
}
g_list_free(then_actions);
}
/* Detect whether "then" is an interleaved clone action. For these, we want
* to add orderings only for the relevant instance.
*/
if ((after->rsc != NULL)
&& (after->rsc->variant > pcmk_rsc_variant_group)) {
const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
XML_RSC_ATTR_INTERLEAVE);
interleave = crm_is_true(interleave_s);
if (interleave) {
compatible_rsc = pcmk__find_compatible_instance(probe->rsc,
after->rsc,
pcmk_role_unknown,
false);
}
}
/* Now recursively do the same for all actions ordered after "then". This
* also handles collective resources since the collective action will be
* ordered before its individual instances' actions.
*/
for (iter = after->actions_after; iter != NULL; iter = iter->next) {
pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) iter->data;
/* pcmk__ar_first_implies_then is the reason why a required A.start
* implies/enforces B.start to be required too, which is the cause of
* B.restart/re-promote.
*
* Not sure about pcmk__ar_first_implies_same_node_then though. It's now
* only used for unfencing case, which tends to introduce transition
* loops...
*/
if (!pcmk_is_set(after_wrapper->type, pcmk__ar_first_implies_then)) {
/* The order type between a group/clone and its child such as
* B.start-> B_child.start is:
* pe_order_implies_first_printed|pcmk__ar_unrunnable_first_blocks
*
* Proceed through the ordering chain and build dependencies with
* its children.
*/
if ((after->rsc == NULL)
|| (after->rsc->variant < pcmk_rsc_variant_group)
|| (probe->rsc->parent == after->rsc)
|| (after_wrapper->action->rsc == NULL)
|| (after_wrapper->action->rsc->variant > pcmk_rsc_variant_group)
|| (after->rsc != after_wrapper->action->rsc->parent)) {
continue;
}
/* Proceed to the children of a group or a non-interleaved clone.
* For an interleaved clone, proceed only to the relevant child.
*/
if ((after->rsc->variant > pcmk_rsc_variant_group) && interleave
&& ((compatible_rsc == NULL)
|| (compatible_rsc != after_wrapper->action->rsc))) {
continue;
}
}
crm_trace("Recursively adding probe restart orderings for "
"'%s@%s then %s@%s' (type=%#.6x)",
after->uuid, pe__node_name(after->node),
after_wrapper->action->uuid,
pe__node_name(after_wrapper->action->node),
after_wrapper->type);
add_restart_orderings_for_probe(probe, after_wrapper->action);
}
}
/*!
* \internal
* \brief Clear the tracking flag on all scheduled actions
*
* \param[in,out] data_set Cluster working set
*/
static void
clear_actions_tracking_flag(pe_working_set_t *data_set)
{
for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) {
pe_action_t *action = iter->data;
pe__clear_action_flags(action, pcmk_action_detect_loop);
}
}
/*!
* \internal
* \brief Add start and restart orderings for probes scheduled for a resource
*
* \param[in,out] data Resource whose probes should be ordered
* \param[in] user_data Unused
*/
static void
add_start_restart_orderings_for_rsc(gpointer data, gpointer user_data)
{
pe_resource_t *rsc = data;
GList *probes = NULL;
// For collective resources, order each instance recursively
if (rsc->variant != pcmk_rsc_variant_primitive) {
g_list_foreach(rsc->children, add_start_restart_orderings_for_rsc,
NULL);
return;
}
// Find all probes for given resource
probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
// Add probe restart orderings for each probe found
for (GList *iter = probes; iter != NULL; iter = iter->next) {
pe_action_t *probe = (pe_action_t *) iter->data;
for (GList *then_iter = probe->actions_after; then_iter != NULL;
then_iter = then_iter->next) {
pe_action_wrapper_t *then = (pe_action_wrapper_t *) then_iter->data;
add_start_orderings_for_probe(probe, then);
add_restart_orderings_for_probe(probe, then->action);
clear_actions_tracking_flag(rsc->cluster);
}
}
g_list_free(probes);
}
/*!
* \internal
* \brief Add "A then probe B" orderings for "A then B" orderings
*
* \param[in,out] data_set Cluster working set
*
* \note This function is currently disabled (see next comment).
*/
static void
order_then_probes(pe_working_set_t *data_set)
{
#if 0
/* Given an ordering "A then B", we would prefer to wait for A to be started
* before probing B.
*
* For example, if A is a filesystem which B can't even run without, it
* would be helpful if the author of B's agent could assume that A is
* running before B.monitor will be called.
*
* However, we can't _only_ probe after A is running, otherwise we wouldn't
* detect the state of B if A could not be started. We can't even do an
* opportunistic version of this, because B may be moving:
*
* A.stop -> A.start -> B.probe -> B.stop -> B.start
*
* and if we add B.stop -> A.stop here, we get a loop:
*
* A.stop -> A.start -> B.probe -> B.stop -> A.stop
*
* We could kill the "B.probe -> B.stop" dependency, but that could mean
* stopping B "too" soon, because B.start must wait for the probe, and
* we don't want to stop B if we can't start it.
*
* We could add the ordering only if A is an anonymous clone with
* clone-max == node-max (since we'll never be moving it). However, we could
* still be stopping one instance at the same time as starting another.
*
* The complexity of checking for allowed conditions combined with the ever
* narrowing use case suggests that this code should remain disabled until
* someone gets smarter.
*/
for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
pe_action_t *start = NULL;
GList *actions = NULL;
GList *probes = NULL;
actions = pe__resource_actions(rsc, NULL, PCMK_ACTION_START, FALSE);
if (actions) {
start = actions->data;
g_list_free(actions);
}
if (start == NULL) {
crm_err("No start action for %s", rsc->id);
continue;
}
probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
for (actions = start->actions_before; actions != NULL;
actions = actions->next) {
pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data;
pe_action_t *first = before->action;
pe_resource_t *first_rsc = first->rsc;
if (first->required_runnable_before) {
for (GList *clone_actions = first->actions_before;
clone_actions != NULL;
clone_actions = clone_actions->next) {
before = (pe_action_wrapper_t *) clone_actions->data;
crm_trace("Testing '%s then %s' for %s",
first->uuid, before->action->uuid, start->uuid);
CRM_ASSERT(before->action->rsc != NULL);
first_rsc = before->action->rsc;
break;
}
} else if (!pcmk__str_eq(first->task, PCMK_ACTION_START,
pcmk__str_none)) {
crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
}
if (first_rsc == NULL) {
continue;
} else if (pe__const_top_resource(first_rsc, false)
== pe__const_top_resource(start->rsc, false)) {
crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
continue;
} else if (!pe_rsc_is_clone(pe__const_top_resource(first_rsc,
false))) {
crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
continue;
}
crm_err("Applying %s before %s %d", first->uuid, start->uuid,
pe__const_top_resource(first_rsc, false)->variant);
for (GList *probe_iter = probes; probe_iter != NULL;
probe_iter = probe_iter->next) {
pe_action_t *probe = (pe_action_t *) probe_iter->data;
crm_err("Ordering %s before %s", first->uuid, probe->uuid);
order_actions(first, probe, pcmk__ar_ordered);
}
}
}
#endif
}
void
pcmk__order_probes(pe_working_set_t *data_set)
{
// Add orderings for "probe then X"
g_list_foreach(data_set->resources, add_start_restart_orderings_for_rsc,
NULL);
add_probe_orderings_for_stops(data_set);
order_then_probes(data_set);
}
/*!
* \internal
* \brief Schedule any probes needed
*
* \param[in,out] data_set Cluster working set
*
* \note This may also schedule fencing of failed remote nodes.
*/
void
pcmk__schedule_probes(pe_working_set_t *data_set)
{
// Schedule probes on each node in the cluster as needed
for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
const char *probed = NULL;
if (!node->details->online) { // Don't probe offline nodes
if (pcmk__is_failed_remote_node(node)) {
pe_fence_node(data_set, node,
"the connection is unrecoverable", FALSE);
}
continue;
} else if (node->details->unclean) { // ... or nodes that need fencing
continue;
} else if (!node->details->rsc_discovery_enabled) {
// The user requested that probes not be done on this node
continue;
}
/* This is no longer needed for live clusters, since the probe_complete
* node attribute will never be in the CIB. However this is still useful
* for processing old saved CIBs (< 1.1.14), including the
* reprobe-target_rc regression test.
*/
probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
if (probed != NULL && crm_is_true(probed) == FALSE) {
pe_action_t *probe_op = NULL;
probe_op = custom_action(NULL,
crm_strdup_printf("%s-%s", CRM_OP_REPROBE,
node->details->uname),
CRM_OP_REPROBE, node, FALSE, TRUE,
data_set);
add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT,
XML_BOOLEAN_TRUE);
continue;
}
// Probe each resource in the cluster on this node, as needed
pcmk__probe_resource_list(data_set->resources, node);
}
}

File Metadata

Mime Type
text/x-diff
Expires
Thu, Oct 16, 12:22 AM (16 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2530819
Default Alt Text
(235 KB)

Event Timeline