Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F1841416
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
179 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/include/pcmki/pcmki_sched_utils.h b/include/pcmki/pcmki_sched_utils.h
index f19fa56b79..e8d619588a 100644
--- a/include/pcmki/pcmki_sched_utils.h
+++ b/include/pcmki/pcmki_sched_utils.h
@@ -1,70 +1,69 @@
/*
* Copyright 2004-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PENGINE_AUTILS__H
# define PENGINE_AUTILS__H
#include <stdbool.h> // bool
#include <glib.h> // GList, GHashTable, gboolean, guint
#include <crm/lrmd.h> // lrmd_event_data_t
#include <crm/cib.h> // cib_t
#include <crm/pengine/pe_types.h>
#include <crm/pengine/internal.h>
#include <pcmki/pcmki_scheduler.h>
#include <pcmki/pcmki_transition.h>
#include <pacemaker.h>
/* Constraint helper functions */
pcmk__colocation_t *invert_constraint(pcmk__colocation_t *constraint);
pe__location_t *copy_constraint(pe__location_t *constraint);
GHashTable *pcmk__copy_node_table(GHashTable *nodes);
GList *pcmk__copy_node_list(const GList *list, bool reset);
GList *sort_nodes_by_weight(GList *nodes, pe_node_t *active_node,
pe_working_set_t *data_set);
extern gboolean can_run_resources(const pe_node_t * node);
gboolean can_run_any(GHashTable * nodes);
pe_resource_t *find_compatible_child(pe_resource_t *local_child,
pe_resource_t *rsc, enum rsc_role_e filter,
gboolean current,
pe_working_set_t *data_set);
pe_resource_t *find_compatible_child_by_node(pe_resource_t * local_child, pe_node_t * local_node, pe_resource_t * rsc,
enum rsc_role_e filter, gboolean current);
gboolean is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current);
enum pe_action_flags summary_action_flags(pe_action_t * action, GList *children, pe_node_t * node);
enum action_tasks clone_child_action(pe_action_t * action);
int copies_per_node(pe_resource_t * rsc);
extern int compare_capacity(const pe_node_t * node1, const pe_node_t * node2);
extern void calculate_utilization(GHashTable * current_utilization,
GHashTable * utilization, gboolean plus);
extern void process_utilization(pe_resource_t * rsc, pe_node_t ** prefer, pe_working_set_t * data_set);
-pe_action_t *create_pseudo_resource_op(pe_resource_t * rsc, const char *task, bool optional, bool runnable, pe_working_set_t *data_set);
pe_action_t *pe_cancel_op(pe_resource_t *rsc, const char *name,
guint interval_ms, pe_node_t *node,
pe_working_set_t *data_set);
pe_action_t *sched_shutdown_op(pe_node_t *node, pe_working_set_t *data_set);
xmlNode *pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *event,
const char *caller_version, int target_rc,
const char *node, const char *origin,
int level);
# define LOAD_STOPPED "load_stopped"
void modify_configuration(pe_working_set_t *data_set, cib_t *cib,
pcmk_injections_t *injections);
enum transition_status run_simulation(pe_working_set_t * data_set, cib_t *cib, GList *op_fail_list);
#endif
diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h
index db8ae11c35..f58ea53ff0 100644
--- a/lib/pacemaker/libpacemaker_private.h
+++ b/lib/pacemaker/libpacemaker_private.h
@@ -1,225 +1,229 @@
/*
* Copyright 2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__LIBPACEMAKER_PRIVATE__H
# define PCMK__LIBPACEMAKER_PRIVATE__H
/* This header is for the sole use of libpacemaker, so that functions can be
* declared with G_GNUC_INTERNAL for efficiency.
*/
#include <crm/pengine/pe_types.h> // pe_action_t, pe_node_t, pe_working_set_t
// Actions
G_GNUC_INTERNAL
void pcmk__update_action_for_orderings(pe_action_t *action,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__log_action(const char *pre_text, pe_action_t *action, bool details);
+G_GNUC_INTERNAL
+pe_action_t *pcmk__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task,
+ bool optional, bool runnable);
+
G_GNUC_INTERNAL
bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action,
pe_action_wrapper_t *input);
G_GNUC_INTERNAL
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *action, enum pe_ordering order,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__fence_guest(pe_node_t *node, pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__node_unfenced(pe_node_t *node);
G_GNUC_INTERNAL
bool pcmk__is_unfence_device(const pe_resource_t *rsc,
const pe_working_set_t *data_set);
G_GNUC_INTERNAL
pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id);
G_GNUC_INTERNAL
xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__valid_resource_or_tag(pe_working_set_t *data_set, const char *id,
pe_resource_t **rsc, pe_tag_t **tag);
G_GNUC_INTERNAL
bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr,
bool convert_rsc, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__create_internal_constraints(pe_working_set_t *data_set);
// Location constraints
G_GNUC_INTERNAL
void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc,
int node_weight, const char *discover_mode,
pe_node_t *foo_node,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_locations(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_location(pe__location_t *constraint, pe_resource_t *rsc);
// Colocation constraints
enum pcmk__coloc_affects {
pcmk__coloc_affects_nothing = 0,
pcmk__coloc_affects_location,
pcmk__coloc_affects_role,
};
G_GNUC_INTERNAL
enum pcmk__coloc_affects pcmk__colocation_affects(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint,
bool preview);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_weights(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_priority(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint);
G_GNUC_INTERNAL
void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__new_colocation(const char *id, const char *node_attr, int score,
pe_resource_t *dependent, pe_resource_t *primary,
const char *dependent_role, const char *primary_role,
bool influence, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__block_colocated_starts(pe_action_t *action,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__new_ordering(pe_resource_t *lh_rsc, char *lh_task,
pe_action_t *lh_action, pe_resource_t *rh_rsc,
char *rh_task, pe_action_t *rh_action,
enum pe_ordering type, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__disable_invalid_orderings(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_stops_before_shutdown(pe_node_t *node,
pe_action_t *shutdown_op,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_orderings(pe_working_set_t *data_set);
/*!
* \internal
* \brief Create a new ordering between two resource actions
*
* \param[in] lh_rsc Resource for 'first' action
* \param[in] rh_rsc Resource for 'then' action
* \param[in] lh_task Action key for 'first' action
* \param[in] rh_task Action key for 'then' action
* \param[in] flags Bitmask of enum pe_ordering flags
* \param[in] data_set Cluster working set to add ordering to
*/
#define pcmk__order_resource_actions(lh_rsc, lh_task, rh_rsc, rh_task, \
flags, data_set) \
pcmk__new_ordering((lh_rsc), pcmk__op_key((lh_rsc)->id, (lh_task), 0), \
NULL, \
(rh_rsc), pcmk__op_key((rh_rsc)->id, (rh_task), 0), \
NULL, (flags), (data_set))
#define pcmk__order_starts(rsc1, rsc2, type, data_set) \
pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \
(rsc2), CRMD_ACTION_START, (type), (data_set))
#define pcmk__order_stops(rsc1, rsc2, type, data_set) \
pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \
(rsc2), CRMD_ACTION_STOP, (type), (data_set))
G_GNUC_INTERNAL
void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_probes(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__is_failed_remote_node(pe_node_t *node);
G_GNUC_INTERNAL
void pcmk__order_remote_connection_actions(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__rsc_corresponds_to_guest(pe_resource_t *rsc, pe_node_t *node);
G_GNUC_INTERNAL
pe_node_t *pcmk__connection_host_for_action(pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, pe_action_t *action);
// Groups (pcmk_sched_group.c)
G_GNUC_INTERNAL
GList *pcmk__group_colocated_resources(pe_resource_t *rsc,
pe_resource_t *orig_rsc,
GList *colocated_rscs);
// Functions applying to more than one variant (pcmk_sched_resource.c)
G_GNUC_INTERNAL
GList *pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs);
G_GNUC_INTERNAL
bool pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force);
G_GNUC_INTERNAL
bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force);
G_GNUC_INTERNAL
void pcmk__unassign_resource(pe_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node,
pe_working_set_t *data_set,
pe_resource_t **failed);
#endif // PCMK__LIBPACEMAKER_PRIVATE__H
diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c
index 2064398aa9..a5d9abf144 100644
--- a/lib/pacemaker/pcmk_sched_actions.c
+++ b/lib/pacemaker/pcmk_sched_actions.c
@@ -1,767 +1,781 @@
/*
* Copyright 2004-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <sys/param.h>
#include <glib.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Get the action flags relevant to ordering constraints
*
* \param[in] action Action to check
* \param[in] node Node that *other* action in the ordering is on
* (used only for clone resource actions)
*
* \return Action flags that should be used for orderings
*/
static enum pe_action_flags
action_flags_for_ordering(pe_action_t *action, pe_node_t *node)
{
bool runnable = false;
enum pe_action_flags flags;
// For non-resource actions, return the action flags
if (action->rsc == NULL) {
return action->flags;
}
/* For non-clone resources, or a clone action not assigned to a node,
* return the flags as determined by the resource method without a node
* specified.
*/
flags = action->rsc->cmds->action_flags(action, NULL);
if ((node == NULL) || !pe_rsc_is_clone(action->rsc)) {
return flags;
}
/* Otherwise (i.e., for clone resource actions on a specific node), first
* remember whether the non-node-specific action is runnable.
*/
runnable = pcmk_is_set(flags, pe_action_runnable);
// Then recheck the resource method with the node
flags = action->rsc->cmds->action_flags(action, node);
/* For clones in ordering constraints, the node-specific "runnable" doesn't
* matter, just the non-node-specific setting (i.e., is the action runnable
* anywhere).
*
* This applies only to runnable, and only for ordering constraints. This
* function shouldn't be used for other types of constraints without
* changes. Not very satisfying, but it's logical and appears to work well.
*/
if (runnable && !pcmk_is_set(flags, pe_action_runnable)) {
pe__set_raw_action_flags(flags, action->rsc->id,
pe_action_runnable);
}
return flags;
}
/*!
* \internal
* \brief Get action UUID that should be used with a resource ordering
*
* When an action is ordered relative to an action for a collective resource
* (clone, group, or bundle), it actually needs to be ordered after all
* instances of the collective have completed the relevant action (for example,
* given "start CLONE then start RSC", RSC must wait until all instances of
* CLONE have started). Given the UUID and resource of the first action in an
* ordering, this returns the UUID of the action that should actually be used
* for ordering (for example, "CLONE_started_0" instead of "CLONE_start_0").
*
* \param[in] first_uuid UUID of first action in ordering
* \param[in] first_rsc Resource of first action in ordering
*
* \return Newly allocated copy of UUID to use with ordering
* \note It is the caller's responsibility to free the return value.
*/
static char *
action_uuid_for_ordering(const char *first_uuid, pe_resource_t *first_rsc)
{
guint interval_ms = 0;
char *uuid = NULL;
char *rid = NULL;
char *first_task_str = NULL;
enum action_tasks first_task = no_action;
enum action_tasks remapped_task = no_action;
// Only non-notify actions for collective resources need remapping
if ((strstr(first_uuid, "notify") != NULL)
|| (first_rsc->variant < pe_group)) {
goto done;
}
// Only non-recurring actions need remapping
CRM_ASSERT(parse_op_key(first_uuid, &rid, &first_task_str, &interval_ms));
if (interval_ms > 0) {
goto done;
}
first_task = text2task(first_task_str);
switch (first_task) {
case stop_rsc:
case start_rsc:
case action_notify:
case action_promote:
case action_demote:
remapped_task = first_task + 1;
break;
case stopped_rsc:
case started_rsc:
case action_notified:
case action_promoted:
case action_demoted:
remapped_task = first_task;
break;
case monitor_rsc:
case shutdown_crm:
case stonith_node:
break;
default:
crm_err("Unknown action '%s' in ordering", first_task_str);
break;
}
if (remapped_task != no_action) {
/* If a (clone) resource has notifications enabled, we want to order
* relative to when all notifications have been sent for the remapped
* task. Only outermost resources or those in bundles have
* notifications.
*/
if (pcmk_is_set(first_rsc->flags, pe_rsc_notify)
&& ((first_rsc->parent == NULL)
|| (pe_rsc_is_clone(first_rsc)
&& (first_rsc->parent->variant == pe_container)))) {
uuid = pcmk__notify_key(rid, "confirmed-post",
task2text(remapped_task));
} else {
uuid = pcmk__op_key(rid, task2text(remapped_task), 0);
}
pe_rsc_trace(first_rsc,
"Remapped action UUID %s to %s for ordering purposes",
first_uuid, uuid);
}
done:
if (uuid == NULL) {
uuid = strdup(first_uuid);
CRM_ASSERT(uuid != NULL);
}
free(first_task_str);
free(rid);
return uuid;
}
/*!
* \internal
* \brief Get actual action that should be used with an ordering
*
* When an action is ordered relative to an action for a collective resource
* (clone, group, or bundle), it actually needs to be ordered after all
* instances of the collective have completed the relevant action (for example,
* given "start CLONE then start RSC", RSC must wait until all instances of
* CLONE have started). Given the first action in an ordering, this returns the
* the action that should actually be used for ordering (for example, the
* started action instead of the start action).
*
* \param[in] action First action in an ordering
*
* \return Actual action that should be used for the ordering
*/
static pe_action_t *
action_for_ordering(pe_action_t *action)
{
pe_action_t *result = action;
pe_resource_t *rsc = action->rsc;
if ((rsc != NULL) && (rsc->variant >= pe_group) && (action->uuid != NULL)) {
char *uuid = action_uuid_for_ordering(action->uuid, rsc);
result = find_first_action(rsc->actions, uuid, NULL, NULL);
if (result == NULL) {
crm_warn("Not remapping %s to %s because %s does not have "
"remapped action", action->uuid, uuid, rsc->id);
result = action;
}
free(uuid);
}
return result;
}
/*!
* \internal
* \brief Update flags for ordering's actions appropriately for ordering's flags
*
* \param[in] first First action in an ordering
* \param[in] then Then action in an ordering
* \param[in] first_flags Action flags for \p first for ordering purposes
* \param[in] then_flags Action flags for \p then for ordering purposes
* \param[in] order Action wrapper for \p first in ordering
* \param[in] data_set Cluster working set
*
* \return Mask of pe_graph_updated_first and/or pe_graph_updated_then
*/
static enum pe_graph_flags
update_action_for_ordering_flags(pe_action_t *first, pe_action_t *then,
enum pe_action_flags first_flags,
enum pe_action_flags then_flags,
pe_action_wrapper_t *order,
pe_working_set_t *data_set)
{
enum pe_graph_flags changed = pe_graph_none;
/* The node will only be used for clones. If interleaved, node will be NULL,
* otherwise the ordering scope will be limited to the node. Normally, the
* whole 'then' clone should restart if 'first' is restarted, so then->node
* is needed.
*/
pe_node_t *node = then->node;
if (pcmk_is_set(order->type, pe_order_implies_then_on_node)) {
/* For unfencing, only instances of 'then' on the same node as 'first'
* (the unfencing operation) should restart, so reset node to
* first->node, at which point this case is handled like a normal
* pe_order_implies_then.
*/
pe__clear_order_flags(order->type, pe_order_implies_then_on_node);
pe__set_order_flags(order->type, pe_order_implies_then);
node = first->node;
pe_rsc_trace(then->rsc,
"%s then %s: mapped pe_order_implies_then_on_node to "
"pe_order_implies_then on %s",
first->uuid, then->uuid, node->details->uname);
}
if (pcmk_is_set(order->type, pe_order_implies_then)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags & pe_action_optional,
pe_action_optional,
pe_order_implies_then,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_optional)
&& pcmk_is_set(then->flags, pe_action_optional)) {
pe__clear_action_flags(then, pe_action_optional);
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_then",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_restart) && (then->rsc != NULL)) {
enum pe_action_flags restart = pe_action_optional|pe_action_runnable;
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, restart,
pe_order_restart, data_set);
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_restart",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_implies_first)) {
if (first->rsc != NULL) {
changed |= first->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_optional,
pe_order_implies_first,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_optional)
&& pcmk_is_set(first->flags, pe_action_runnable)) {
pe__clear_action_flags(first, pe_action_runnable);
pe__set_graph_flags(changed, first, pe_graph_updated_first);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_first",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_promoted_implies_first)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags & pe_action_optional,
pe_action_optional,
pe_order_promoted_implies_first,
data_set);
}
pe_rsc_trace(then->rsc,
"%s then %s: %s after pe_order_promoted_implies_first",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_one_or_more)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_one_or_more,
data_set);
} else if (pcmk_is_set(first_flags, pe_action_runnable)) {
// We have another runnable instance of "first"
then->runnable_before++;
/* Mark "then" as runnable if it requires a certain number of
* "before" instances to be runnable, and they now are.
*/
if ((then->runnable_before >= then->required_runnable_before)
&& !pcmk_is_set(then->flags, pe_action_runnable)) {
pe__set_action_flags(then, pe_action_runnable);
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_one_or_more",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_probe) && (then->rsc != NULL)) {
if (!pcmk_is_set(first_flags, pe_action_runnable)
&& (first->rsc->running_on != NULL)) {
pe_rsc_trace(then->rsc,
"%s then %s: ignoring because first is stopping",
first->uuid, then->uuid);
order->type = pe_order_none;
} else {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_runnable_left,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_probe",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_runnable_left)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_runnable_left,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_runnable)
&& pcmk_is_set(then->flags, pe_action_runnable)) {
pe__clear_action_flags(then, pe_action_runnable);
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_runnable_left",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_implies_first_migratable)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, pe_action_optional,
pe_order_implies_first_migratable, data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after "
"pe_order_implies_first_migratable",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_pseudo_left)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_optional,
pe_order_pseudo_left,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_pseudo_left",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_optional)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_optional,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_optional",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_asymmetrical)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_asymmetrical,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_asymmetrical",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(first->flags, pe_action_runnable)
&& pcmk_is_set(order->type, pe_order_implies_then_printed)
&& !pcmk_is_set(first_flags, pe_action_optional)) {
pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
then->uuid, first->uuid);
pe__set_action_flags(then, pe_action_print_always);
// Don't bother marking 'then' as changed just for this
}
if (pcmk_is_set(order->type, pe_order_implies_first_printed)
&& !pcmk_is_set(then_flags, pe_action_optional)) {
pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
first->uuid, then->uuid);
pe__set_action_flags(first, pe_action_print_always);
// Don't bother marking 'first' as changed just for this
}
if (pcmk_any_flags_set(order->type, pe_order_implies_then
|pe_order_implies_first
|pe_order_restart)
&& (first->rsc != NULL)
&& !pcmk_is_set(first->rsc->flags, pe_rsc_managed)
&& pcmk_is_set(first->rsc->flags, pe_rsc_block)
&& !pcmk_is_set(first->flags, pe_action_runnable)
&& pcmk__str_eq(first->task, RSC_STOP, pcmk__str_casei)) {
if (pcmk_is_set(then->flags, pe_action_runnable)) {
pe__clear_action_flags(then, pe_action_runnable);
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after checking whether first "
"is blocked, unmanaged, unrunnable stop",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
return changed;
}
// Convenience macros for logging action properties
#define action_type_str(flags) \
(pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action")
#define action_optional_str(flags) \
(pcmk_is_set((flags), pe_action_optional)? "optional" : "required")
#define action_runnable_str(flags) \
(pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable")
#define action_node_str(a) \
(((a)->node == NULL)? "no node" : (a)->node->details->uname)
/*!
* \internal
* \brief Update an action's flags for all orderings where it is "then"
*
* \param[in] then Action to update
* \param[in] data_set Cluster working set
*/
void
pcmk__update_action_for_orderings(pe_action_t *then, pe_working_set_t *data_set)
{
GList *lpc = NULL;
enum pe_graph_flags changed = pe_graph_none;
int last_flags = then->flags;
pe_rsc_trace(then->rsc, "Updating %s %s (%s %s) on %s",
action_type_str(then->flags), then->uuid,
action_optional_str(then->flags),
action_runnable_str(then->flags), action_node_str(then));
if (pcmk_is_set(then->flags, pe_action_requires_any)) {
/* Initialize current known "runnable before" actions. As
* update_action_for_ordering_flags() is called for each of then's
* before actions, this number will increment as runnable 'first'
* actions are encountered.
*/
then->runnable_before = 0;
if (then->required_runnable_before == 0) {
/* @COMPAT This ordering constraint uses the deprecated
* "require-all=false" attribute. Treat it like "clone-min=1".
*/
then->required_runnable_before = 1;
}
/* The pe_order_one_or_more clause of update_action_for_ordering_flags()
* (called below) will reset runnable if appropriate.
*/
pe__clear_action_flags(then, pe_action_runnable);
}
for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
pe_action_t *first = other->action;
pe_node_t *then_node = then->node;
pe_node_t *first_node = first->node;
if ((first->rsc != NULL)
&& (first->rsc->variant == pe_group)
&& pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) {
first_node = first->rsc->fns->location(first->rsc, NULL, FALSE);
if (first_node != NULL) {
pe_rsc_trace(first->rsc, "Found node %s for 'first' %s",
first_node->details->uname, first->uuid);
}
}
if ((then->rsc != NULL)
&& (then->rsc->variant == pe_group)
&& pcmk__str_eq(then->task, RSC_START, pcmk__str_casei)) {
then_node = then->rsc->fns->location(then->rsc, NULL, FALSE);
if (then_node != NULL) {
pe_rsc_trace(then->rsc, "Found node %s for 'then' %s",
then_node->details->uname, then->uuid);
}
}
// Disable constraint if it only applies when on same node, but isn't
if (pcmk_is_set(other->type, pe_order_same_node)
&& (first_node != NULL) && (then_node != NULL)
&& (first_node->details != then_node->details)) {
pe_rsc_trace(then->rsc,
"Disabled ordering %s on %s then %s on %s: not same node",
other->action->uuid, first_node->details->uname,
then->uuid, then_node->details->uname);
other->type = pe_order_none;
continue;
}
pe__clear_graph_flags(changed, then, pe_graph_updated_first);
if ((first->rsc != NULL)
&& pcmk_is_set(other->type, pe_order_then_cancels_first)
&& !pcmk_is_set(then->flags, pe_action_optional)) {
/* 'then' is required, so we must abandon 'first'
* (e.g. a required stop cancels any agent reload).
*/
pe__set_action_flags(other->action, pe_action_optional);
if (!strcmp(first->task, CRMD_ACTION_RELOAD_AGENT)) {
pe__clear_resource_flags(first->rsc, pe_rsc_reload);
}
}
if ((first->rsc != NULL) && (then->rsc != NULL)
&& (first->rsc != then->rsc) && !is_parent(then->rsc, first->rsc)) {
first = action_for_ordering(first);
}
if (first != other->action) {
pe_rsc_trace(then->rsc, "Ordering %s after %s instead of %s",
then->uuid, first->uuid, other->action->uuid);
}
pe_rsc_trace(then->rsc,
"%s (0x%.6x) then %s (0x%.6x): type=0x%.6x node=%s",
first->uuid, first->flags, then->uuid, then->flags,
other->type, action_node_str(first));
if (first == other->action) {
/* 'first' was not remapped (e.g. from 'start' to 'running'), which
* could mean it is a non-resource action, a primitive resource
* action, or already expanded.
*/
enum pe_action_flags first_flags, then_flags;
first_flags = action_flags_for_ordering(first, then_node);
then_flags = action_flags_for_ordering(then, first_node);
changed |= update_action_for_ordering_flags(first, then,
first_flags, then_flags,
other, data_set);
/* 'first' was for a complex resource (clone, group, etc),
* create a new dependency if necessary
*/
} else if (order_actions(first, then, other->type)) {
/* This was the first time 'first' and 'then' were associated,
* start again to get the new actions_before list
*/
pe__set_graph_flags(changed, then,
pe_graph_updated_then|pe_graph_disable);
}
if (pcmk_is_set(changed, pe_graph_disable)) {
pe_rsc_trace(then->rsc,
"Disabled ordering %s then %s in favor of %s then %s",
other->action->uuid, then->uuid, first->uuid,
then->uuid);
pe__clear_graph_flags(changed, then, pe_graph_disable);
other->type = pe_order_none;
}
if (pcmk_is_set(changed, pe_graph_updated_first)) {
crm_trace("Re-processing %s and its 'after' actions "
"because it changed", first->uuid);
for (GList *lpc2 = first->actions_after; lpc2 != NULL;
lpc2 = lpc2->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc2->data;
pcmk__update_action_for_orderings(other->action, data_set);
}
pcmk__update_action_for_orderings(first, data_set);
}
}
if (pcmk_is_set(then->flags, pe_action_requires_any)) {
if (last_flags == then->flags) {
pe__clear_graph_flags(changed, then, pe_graph_updated_then);
} else {
pe__set_graph_flags(changed, then, pe_graph_updated_then);
}
}
if (pcmk_is_set(changed, pe_graph_updated_then)) {
crm_trace("Re-processing %s and its 'after' actions because it changed",
then->uuid);
if (pcmk_is_set(last_flags, pe_action_runnable)
&& !pcmk_is_set(then->flags, pe_action_runnable)) {
pcmk__block_colocated_starts(then, data_set);
}
pcmk__update_action_for_orderings(then, data_set);
for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
pcmk__update_action_for_orderings(other->action, data_set);
}
}
}
/*!
* \internal
* \brief Trace-log an action (optionally with its dependent actions)
*
* \param[in] pre_text If not NULL, prefix the log with this plus ": "
* \param[in] action Action to log
* \param[in] details If true, recursively log dependent actions
*/
void
pcmk__log_action(const char *pre_text, pe_action_t *action, bool details)
{
const char *node_uname = NULL;
const char *node_uuid = NULL;
const char *desc = NULL;
CRM_CHECK(action != NULL, return);
if (!pcmk_is_set(action->flags, pe_action_pseudo)) {
if (action->node != NULL) {
node_uname = action->node->details->uname;
node_uuid = action->node->details->id;
} else {
node_uname = "<none>";
}
}
switch (text2task(action->task)) {
case stonith_node:
case shutdown_crm:
if (pcmk_is_set(action->flags, pe_action_pseudo)) {
desc = "Pseudo ";
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
desc = "Optional ";
} else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
desc = "!!Non-Startable!! ";
} else if (pcmk_is_set(action->flags, pe_action_processed)) {
desc = "";
} else {
desc = "(Provisional) ";
}
crm_trace("%s%s%sAction %d: %s%s%s%s%s%s",
((pre_text == NULL)? "" : pre_text),
((pre_text == NULL)? "" : ": "),
desc, action->id, action->uuid,
(node_uname? "\ton " : ""), (node_uname? node_uname : ""),
(node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
(node_uuid? ")" : ""));
break;
default:
if (pcmk_is_set(action->flags, pe_action_optional)) {
desc = "Optional ";
} else if (pcmk_is_set(action->flags, pe_action_pseudo)) {
desc = "Pseudo ";
} else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
desc = "!!Non-Startable!! ";
} else if (pcmk_is_set(action->flags, pe_action_processed)) {
desc = "";
} else {
desc = "(Provisional) ";
}
crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s",
((pre_text == NULL)? "" : pre_text),
((pre_text == NULL)? "" : ": "),
desc, action->id, action->uuid,
(action->rsc? action->rsc->id : "<none>"),
(node_uname? "\ton " : ""), (node_uname? node_uname : ""),
(node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
(node_uuid? ")" : ""));
break;
}
if (details) {
GList *iter = NULL;
crm_trace("\t\t====== Preceding Actions");
for (iter = action->actions_before; iter != NULL; iter = iter->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) iter->data;
pcmk__log_action("\t\t", other->action, false);
}
crm_trace("\t\t====== Subsequent Actions");
for (iter = action->actions_after; iter != NULL; iter = iter->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) iter->data;
pcmk__log_action("\t\t", other->action, false);
}
crm_trace("\t\t====== End");
} else {
crm_trace("\t\t(before=%d, after=%d)",
g_list_length(action->actions_before),
g_list_length(action->actions_after));
}
}
+/*!
+ * \internal
+ * \brief Create a new pseudo-action for a resource
+ *
+ * \param[in] rsc Resource to create action for
+ * \param[in] task Action name
+ * \param[in] optional Whether action should be considered optional
+ * \param[in] runnable Whethe action should be considered runnable
+ *
+ * \return New action object corresponding to arguments
+ */
pe_action_t *
-create_pseudo_resource_op(pe_resource_t *rsc, const char *task, bool optional,
- bool runnable, pe_working_set_t *data_set)
+pcmk__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task,
+ bool optional, bool runnable)
{
- pe_action_t *action = custom_action(rsc, pcmk__op_key(rsc->id, task, 0),
- task, NULL, optional, TRUE, data_set);
+ pe_action_t *action = NULL;
+
+ CRM_ASSERT((rsc != NULL) && (task != NULL));
+ action = custom_action(rsc, pcmk__op_key(rsc->id, task, 0), task, NULL,
+ optional, TRUE, rsc->cluster);
pe__set_action_flags(action, pe_action_pseudo);
- if(runnable) {
+ if (runnable) {
pe__set_action_flags(action, pe_action_runnable);
}
return action;
}
diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c
index cc72900f40..99498fba9b 100644
--- a/lib/pacemaker/pcmk_sched_bundle.c
+++ b/lib/pacemaker/pcmk_sched_bundle.c
@@ -1,1093 +1,1093 @@
/*
* Copyright 2004-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
#define PE__VARIANT_BUNDLE 1
#include <lib/pengine/variant.h>
static bool
is_bundle_node(pe__bundle_variant_data_t *data, pe_node_t *node)
{
for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (node->details == replica->node->details) {
return TRUE;
}
}
return FALSE;
}
gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set);
void distribute_children(pe_resource_t *rsc, GList *children, GList *nodes,
int max, int per_host_max, pe_working_set_t * data_set);
static GList *
get_container_list(pe_resource_t *rsc)
{
GList *containers = NULL;
if (rsc->variant == pe_container) {
pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, rsc);
for (GList *gIter = data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
containers = g_list_append(containers, replica->container);
}
}
return containers;
}
static inline GList *
get_containers_or_children(pe_resource_t *rsc)
{
return (rsc->variant == pe_container)?
get_container_list(rsc) : rsc->children;
}
pe_node_t *
pcmk__bundle_allocate(pe_resource_t *rsc, pe_node_t *prefer,
pe_working_set_t *data_set)
{
GList *containers = NULL;
GList *nodes = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return NULL);
get_bundle_variant_data(bundle_data, rsc);
pe__set_resource_flags(rsc, pe_rsc_allocating);
containers = get_container_list(rsc);
pe__show_node_weights(!pcmk_is_set(data_set->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, data_set);
nodes = g_hash_table_get_values(rsc->allowed_nodes);
nodes = sort_nodes_by_weight(nodes, NULL, data_set);
containers = g_list_sort_with_data(containers, sort_clone_instance, data_set);
distribute_children(rsc, containers, nodes, bundle_data->nreplicas,
bundle_data->nreplicas_per_host, data_set);
g_list_free(nodes);
g_list_free(containers);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
pe_node_t *container_host = NULL;
CRM_ASSERT(replica);
if (replica->ip) {
pe_rsc_trace(rsc, "Allocating bundle %s IP %s",
rsc->id, replica->ip->id);
replica->ip->cmds->allocate(replica->ip, prefer, data_set);
}
container_host = replica->container->allocated_to;
if (replica->remote && pe__is_guest_or_remote_node(container_host)) {
/* We need 'nested' connection resources to be on the same
* host because pacemaker-remoted only supports a single
* active connection
*/
pcmk__new_colocation("child-remote-with-docker-remote", NULL,
INFINITY, replica->remote,
container_host->details->remote_rsc, NULL,
NULL, true, data_set);
}
if (replica->remote) {
pe_rsc_trace(rsc, "Allocating bundle %s connection %s",
rsc->id, replica->remote->id);
replica->remote->cmds->allocate(replica->remote, prefer,
data_set);
}
// Explicitly allocate replicas' children before bundle child
if (replica->child) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, replica->child->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
if (node->details != replica->node->details) {
node->weight = -INFINITY;
} else if (!pcmk__threshold_reached(replica->child, node,
data_set, NULL)) {
node->weight = INFINITY;
}
}
pe__set_resource_flags(replica->child->parent, pe_rsc_allocating);
pe_rsc_trace(rsc, "Allocating bundle %s replica child %s",
rsc->id, replica->child->id);
replica->child->cmds->allocate(replica->child, replica->node,
data_set);
pe__clear_resource_flags(replica->child->parent,
pe_rsc_allocating);
}
}
if (bundle_data->child) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, bundle_data->child->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
if (is_bundle_node(bundle_data, node)) {
node->weight = 0;
} else {
node->weight = -INFINITY;
}
}
pe_rsc_trace(rsc, "Allocating bundle %s child %s",
rsc->id, bundle_data->child->id);
bundle_data->child->cmds->allocate(bundle_data->child, prefer, data_set);
}
pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional);
return NULL;
}
void
pcmk__bundle_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
{
pe_action_t *action = NULL;
GList *containers = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
containers = get_container_list(rsc);
get_bundle_variant_data(bundle_data, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (replica->ip) {
replica->ip->cmds->create_actions(replica->ip, data_set);
}
if (replica->container) {
replica->container->cmds->create_actions(replica->container,
data_set);
}
if (replica->remote) {
replica->remote->cmds->create_actions(replica->remote, data_set);
}
}
clone_create_pseudo_actions(rsc, containers, NULL, NULL, data_set);
if (bundle_data->child) {
bundle_data->child->cmds->create_actions(bundle_data->child, data_set);
if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) {
/* promote */
- create_pseudo_resource_op(rsc, RSC_PROMOTE, TRUE, TRUE, data_set);
- action = create_pseudo_resource_op(rsc, RSC_PROMOTED, TRUE, TRUE, data_set);
+ pcmk__new_rsc_pseudo_action(rsc, RSC_PROMOTE, true, true);
+ action = pcmk__new_rsc_pseudo_action(rsc, RSC_PROMOTED, true, true);
action->priority = INFINITY;
/* demote */
- create_pseudo_resource_op(rsc, RSC_DEMOTE, TRUE, TRUE, data_set);
- action = create_pseudo_resource_op(rsc, RSC_DEMOTED, TRUE, TRUE, data_set);
+ pcmk__new_rsc_pseudo_action(rsc, RSC_DEMOTE, true, true);
+ action = pcmk__new_rsc_pseudo_action(rsc, RSC_DEMOTED, true, true);
action->priority = INFINITY;
}
}
g_list_free(containers);
}
void
pcmk__bundle_internal_constraints(pe_resource_t *rsc,
pe_working_set_t *data_set)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_bundle_variant_data(bundle_data, rsc);
if (bundle_data->child) {
pcmk__order_resource_actions(rsc, RSC_START, bundle_data->child,
RSC_START, pe_order_implies_first_printed,
data_set);
pcmk__order_resource_actions(rsc, RSC_STOP, bundle_data->child,
RSC_STOP, pe_order_implies_first_printed,
data_set);
if (bundle_data->child->children) {
pcmk__order_resource_actions(bundle_data->child, RSC_STARTED, rsc,
RSC_STARTED,
pe_order_implies_then_printed,
data_set);
pcmk__order_resource_actions(bundle_data->child, RSC_STOPPED, rsc,
RSC_STOPPED,
pe_order_implies_then_printed,
data_set);
} else {
pcmk__order_resource_actions(bundle_data->child, RSC_START, rsc,
RSC_STARTED,
pe_order_implies_then_printed,
data_set);
pcmk__order_resource_actions(bundle_data->child, RSC_STOP, rsc,
RSC_STOPPED,
pe_order_implies_then_printed,
data_set);
}
}
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
CRM_ASSERT(replica->container);
replica->container->cmds->internal_constraints(replica->container,
data_set);
pcmk__order_starts(rsc, replica->container,
pe_order_runnable_left|pe_order_implies_first_printed,
data_set);
if (replica->child) {
pcmk__order_stops(rsc, replica->child,
pe_order_implies_first_printed, data_set);
}
pcmk__order_stops(rsc, replica->container,
pe_order_implies_first_printed, data_set);
pcmk__order_resource_actions(replica->container, RSC_START, rsc,
RSC_STARTED, pe_order_implies_then_printed,
data_set);
pcmk__order_resource_actions(replica->container, RSC_STOP, rsc,
RSC_STOPPED, pe_order_implies_then_printed,
data_set);
if (replica->ip) {
replica->ip->cmds->internal_constraints(replica->ip, data_set);
// Start IP then container
pcmk__order_starts(replica->ip, replica->container,
pe_order_runnable_left|pe_order_preserve,
data_set);
pcmk__order_stops(replica->container, replica->ip,
pe_order_implies_first|pe_order_preserve,
data_set);
pcmk__new_colocation("ip-with-docker", NULL, INFINITY, replica->ip,
replica->container, NULL, NULL, true,
data_set);
}
if (replica->remote) {
/* This handles ordering and colocating remote relative to container
* (via "resource-with-container"). Since IP is also ordered and
* colocated relative to the container, we don't need to do anything
* explicit here with IP.
*/
replica->remote->cmds->internal_constraints(replica->remote,
data_set);
}
if (replica->child) {
CRM_ASSERT(replica->remote);
// "Start remote then child" is implicit in scheduler's remote logic
}
}
if (bundle_data->child) {
bundle_data->child->cmds->internal_constraints(bundle_data->child, data_set);
if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) {
promote_demote_constraints(rsc, data_set);
/* child demoted before global demoted */
pcmk__order_resource_actions(bundle_data->child, RSC_DEMOTED, rsc,
RSC_DEMOTED,
pe_order_implies_then_printed,
data_set);
/* global demote before child demote */
pcmk__order_resource_actions(rsc, RSC_DEMOTE, bundle_data->child,
RSC_DEMOTE,
pe_order_implies_first_printed,
data_set);
/* child promoted before global promoted */
pcmk__order_resource_actions(bundle_data->child, RSC_PROMOTED, rsc,
RSC_PROMOTED,
pe_order_implies_then_printed,
data_set);
/* global promote before child promote */
pcmk__order_resource_actions(rsc, RSC_PROMOTE, bundle_data->child,
RSC_PROMOTE,
pe_order_implies_first_printed,
data_set);
}
}
}
static pe_resource_t *
compatible_replica_for_node(pe_resource_t *rsc_lh, pe_node_t *candidate,
pe_resource_t *rsc, enum rsc_role_e filter,
gboolean current)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(candidate != NULL, return NULL);
get_bundle_variant_data(bundle_data, rsc);
crm_trace("Looking for compatible child from %s for %s on %s",
rsc_lh->id, rsc->id, candidate->details->uname);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (is_child_compatible(replica->container, candidate, filter, current)) {
crm_trace("Pairing %s with %s on %s",
rsc_lh->id, replica->container->id,
candidate->details->uname);
return replica->container;
}
}
crm_trace("Can't pair %s with %s", rsc_lh->id, rsc->id);
return NULL;
}
static pe_resource_t *
compatible_replica(pe_resource_t *rsc_lh, pe_resource_t *rsc,
enum rsc_role_e filter, gboolean current,
pe_working_set_t *data_set)
{
GList *scratch = NULL;
pe_resource_t *pair = NULL;
pe_node_t *active_node_lh = NULL;
active_node_lh = rsc_lh->fns->location(rsc_lh, NULL, current);
if (active_node_lh) {
return compatible_replica_for_node(rsc_lh, active_node_lh, rsc, filter,
current);
}
scratch = g_hash_table_get_values(rsc_lh->allowed_nodes);
scratch = sort_nodes_by_weight(scratch, NULL, data_set);
for (GList *gIter = scratch; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
pair = compatible_replica_for_node(rsc_lh, node, rsc, filter, current);
if (pair) {
goto done;
}
}
pe_rsc_debug(rsc, "Can't pair %s with %s", rsc_lh->id, (rsc? rsc->id : "none"));
done:
g_list_free(scratch);
return pair;
}
void
pcmk__bundle_rsc_colocation_lh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
/* -- Never called --
*
* Instead we add the colocation constraints to the child and call from there
*/
CRM_ASSERT(FALSE);
}
int copies_per_node(pe_resource_t * rsc)
{
/* Strictly speaking, there should be a 'copies_per_node' addition
* to the resource function table and each case would be a
* function. However that would be serious overkill to return an
* int. In fact, it seems to me that both function tables
* could/should be replaced by resources.{c,h} full of
* rsc_{some_operation} functions containing a switch as below
* which calls out to functions named {variant}_{some_operation}
* as needed.
*/
switch(rsc->variant) {
case pe_unknown:
return 0;
case pe_native:
case pe_group:
return 1;
case pe_clone:
{
const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX);
if (max_clones_node == NULL) {
return 1;
} else {
int max_i;
pcmk__scan_min_int(max_clones_node, &max_i, 0);
return max_i;
}
}
case pe_container:
{
pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, rsc);
return data->nreplicas_per_host;
}
}
return 0;
}
void
pcmk__bundle_rsc_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
GList *allocated_primaries = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(constraint != NULL, return);
CRM_CHECK(dependent != NULL,
pe_err("dependent was NULL for %s", constraint->id); return);
CRM_CHECK(primary != NULL,
pe_err("primary was NULL for %s", constraint->id); return);
CRM_ASSERT(dependent->variant == pe_native);
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
pe_rsc_trace(primary, "%s is still provisional", primary->id);
return;
} else if(constraint->dependent->variant > pe_group) {
pe_resource_t *primary_replica = compatible_replica(dependent, primary,
RSC_ROLE_UNKNOWN,
FALSE, data_set);
if (primary_replica) {
pe_rsc_debug(primary, "Pairing %s with %s",
dependent->id, primary_replica->id);
dependent->cmds->rsc_colocation_lh(dependent, primary_replica,
constraint, data_set);
} else if (constraint->score >= INFINITY) {
crm_notice("Cannot pair %s with instance of %s",
dependent->id, primary->id);
pcmk__assign_resource(dependent, NULL, true);
} else {
pe_rsc_debug(primary, "Cannot pair %s with instance of %s",
dependent->id, primary->id);
}
return;
}
get_bundle_variant_data(bundle_data, primary);
pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d",
constraint->id, dependent->id, primary->id, constraint->score);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (constraint->score < INFINITY) {
replica->container->cmds->rsc_colocation_rh(dependent,
replica->container,
constraint, data_set);
} else {
pe_node_t *chosen = replica->container->fns->location(replica->container,
NULL, FALSE);
if ((chosen == NULL)
|| is_set_recursive(replica->container, pe_rsc_block, TRUE)) {
continue;
}
if ((constraint->primary_role >= RSC_ROLE_PROMOTED)
&& (replica->child == NULL)) {
continue;
}
if ((constraint->primary_role >= RSC_ROLE_PROMOTED)
&& (replica->child->next_role < RSC_ROLE_PROMOTED)) {
continue;
}
pe_rsc_trace(primary, "Allowing %s: %s %d",
constraint->id, chosen->details->uname,
chosen->weight);
allocated_primaries = g_list_prepend(allocated_primaries, chosen);
}
}
if (constraint->score >= INFINITY) {
node_list_exclude(dependent->allowed_nodes, allocated_primaries, FALSE);
}
g_list_free(allocated_primaries);
}
enum pe_action_flags
pcmk__bundle_action_flags(pe_action_t *action, pe_node_t *node)
{
GList *containers = NULL;
enum pe_action_flags flags = 0;
pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, action->rsc);
if(data->child) {
enum action_tasks task = get_complex_task(data->child, action->task, TRUE);
switch(task) {
case no_action:
case action_notify:
case action_notified:
case action_promote:
case action_promoted:
case action_demote:
case action_demoted:
return summary_action_flags(action, data->child->children, node);
default:
break;
}
}
containers = get_container_list(action->rsc);
flags = summary_action_flags(action, containers, node);
g_list_free(containers);
return flags;
}
pe_resource_t *
find_compatible_child_by_node(pe_resource_t * local_child, pe_node_t * local_node, pe_resource_t * rsc,
enum rsc_role_e filter, gboolean current)
{
GList *gIter = NULL;
GList *children = NULL;
if (local_node == NULL) {
crm_err("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id);
return NULL;
}
crm_trace("Looking for compatible child from %s for %s on %s",
local_child->id, rsc->id, local_node->details->uname);
children = get_containers_or_children(rsc);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if(is_child_compatible(child_rsc, local_node, filter, current)) {
crm_trace("Pairing %s with %s on %s",
local_child->id, child_rsc->id, local_node->details->uname);
return child_rsc;
}
}
crm_trace("Can't pair %s with %s", local_child->id, rsc->id);
if(children != rsc->children) {
g_list_free(children);
}
return NULL;
}
static pe__bundle_replica_t *
replica_for_container(pe_resource_t *rsc, pe_resource_t *container,
pe_node_t *node)
{
if (rsc->variant == pe_container) {
pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, rsc);
for (GList *gIter = data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (replica->child
&& (container == replica->container)
&& (node->details == replica->node->details)) {
return replica;
}
}
}
return NULL;
}
static enum pe_graph_flags
multi_update_interleave_actions(pe_action_t *first, pe_action_t *then,
pe_node_t *node, enum pe_action_flags flags,
enum pe_action_flags filter,
enum pe_ordering type,
pe_working_set_t *data_set)
{
GList *gIter = NULL;
GList *children = NULL;
gboolean current = FALSE;
enum pe_graph_flags changed = pe_graph_none;
/* Fix this - lazy */
if (pcmk__ends_with(first->uuid, "_stopped_0")
|| pcmk__ends_with(first->uuid, "_demoted_0")) {
current = TRUE;
}
children = get_containers_or_children(then->rsc);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *then_child = gIter->data;
pe_resource_t *first_child = find_compatible_child(then_child,
first->rsc,
RSC_ROLE_UNKNOWN,
current, data_set);
if (first_child == NULL && current) {
crm_trace("Ignore");
} else if (first_child == NULL) {
crm_debug("No match found for %s (%d / %s / %s)", then_child->id, current, first->uuid, then->uuid);
/* Me no like this hack - but what else can we do?
*
* If there is no-one active or about to be active
* on the same node as then_child, then they must
* not be allowed to start
*/
if (type & (pe_order_runnable_left | pe_order_implies_then) /* Mandatory */ ) {
pe_rsc_info(then->rsc, "Inhibiting %s from being active", then_child->id);
if (pcmk__assign_resource(then_child, NULL, true)) {
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
}
} else {
pe_action_t *first_action = NULL;
pe_action_t *then_action = NULL;
enum action_tasks task = clone_child_action(first);
const char *first_task = task2text(task);
pe__bundle_replica_t *first_replica = NULL;
pe__bundle_replica_t *then_replica = NULL;
first_replica = replica_for_container(first->rsc, first_child,
node);
if (strstr(first->task, "stop") && first_replica && first_replica->child) {
/* Except for 'stopped' we should be looking at the
* in-container resource, actions for the child will
* happen later and are therefor more likely to align
* with the user's intent.
*/
first_action = find_first_action(first_replica->child->actions,
NULL, task2text(task), node);
} else {
first_action = find_first_action(first_child->actions, NULL, task2text(task), node);
}
then_replica = replica_for_container(then->rsc, then_child, node);
if (strstr(then->task, "mote")
&& then_replica && then_replica->child) {
/* Promote/demote actions will never be found for the
* container resource, look in the child instead
*
* Alternatively treat:
* 'XXXX then promote YYYY' as 'XXXX then start container for YYYY', and
* 'demote XXXX then stop YYYY' as 'stop container for XXXX then stop YYYY'
*/
then_action = find_first_action(then_replica->child->actions,
NULL, then->task, node);
} else {
then_action = find_first_action(then_child->actions, NULL, then->task, node);
}
if (first_action == NULL) {
if (!pcmk_is_set(first_child->flags, pe_rsc_orphan)
&& !pcmk__str_any_of(first_task, RSC_STOP, RSC_DEMOTE, NULL)) {
crm_err("Internal error: No action found for %s in %s (first)",
first_task, first_child->id);
} else {
crm_trace("No action found for %s in %s%s (first)",
first_task, first_child->id,
pcmk_is_set(first_child->flags, pe_rsc_orphan)? " (ORPHAN)" : "");
}
continue;
}
/* We're only interested if 'then' is neither stopping nor being demoted */
if (then_action == NULL) {
if (!pcmk_is_set(then_child->flags, pe_rsc_orphan)
&& !pcmk__str_any_of(then->task, RSC_STOP, RSC_DEMOTE, NULL)) {
crm_err("Internal error: No action found for %s in %s (then)",
then->task, then_child->id);
} else {
crm_trace("No action found for %s in %s%s (then)",
then->task, then_child->id,
pcmk_is_set(then_child->flags, pe_rsc_orphan)? " (ORPHAN)" : "");
}
continue;
}
if (order_actions(first_action, then_action, type)) {
crm_debug("Created constraint for %s (%d) -> %s (%d) %.6x",
first_action->uuid,
pcmk_is_set(first_action->flags, pe_action_optional),
then_action->uuid,
pcmk_is_set(then_action->flags, pe_action_optional),
type);
pe__set_graph_flags(changed, first,
pe_graph_updated_first|pe_graph_updated_then);
}
if(first_action && then_action) {
changed |= then_child->cmds->update_actions(first_action,
then_action, node,
first_child->cmds->action_flags(first_action, node),
filter, type, data_set);
} else {
crm_err("Nothing found either for %s (%p) or %s (%p) %s",
first_child->id, first_action,
then_child->id, then_action, task2text(task));
}
}
}
if(children != then->rsc->children) {
g_list_free(children);
}
return changed;
}
static bool
can_interleave_actions(pe_action_t *first, pe_action_t *then)
{
bool interleave = FALSE;
pe_resource_t *rsc = NULL;
const char *interleave_s = NULL;
if(first->rsc == NULL || then->rsc == NULL) {
crm_trace("Not interleaving %s with %s (both must be resources)", first->uuid, then->uuid);
return FALSE;
} else if(first->rsc == then->rsc) {
crm_trace("Not interleaving %s with %s (must belong to different resources)", first->uuid, then->uuid);
return FALSE;
} else if(first->rsc->variant < pe_clone || then->rsc->variant < pe_clone) {
crm_trace("Not interleaving %s with %s (both sides must be clones or bundles)", first->uuid, then->uuid);
return FALSE;
}
if (pcmk__ends_with(then->uuid, "_stop_0")
|| pcmk__ends_with(then->uuid, "_demote_0")) {
rsc = first->rsc;
} else {
rsc = then->rsc;
}
interleave_s = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE);
interleave = crm_is_true(interleave_s);
crm_trace("Interleave %s -> %s: %s (based on %s)",
first->uuid, then->uuid, interleave ? "yes" : "no", rsc->id);
return interleave;
}
enum pe_graph_flags
pcmk__multi_update_actions(pe_action_t *first, pe_action_t *then,
pe_node_t *node, enum pe_action_flags flags,
enum pe_action_flags filter, enum pe_ordering type,
pe_working_set_t *data_set)
{
enum pe_graph_flags changed = pe_graph_none;
crm_trace("%s -> %s", first->uuid, then->uuid);
if(can_interleave_actions(first, then)) {
changed = multi_update_interleave_actions(first, then, node, flags,
filter, type, data_set);
} else if(then->rsc) {
GList *gIter = NULL;
GList *children = NULL;
// Handle the 'primitive' ordering case
changed |= native_update_actions(first, then, node, flags, filter,
type, data_set);
// Now any children (or containers in the case of a bundle)
children = get_containers_or_children(then->rsc);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *then_child = (pe_resource_t *) gIter->data;
enum pe_graph_flags then_child_changed = pe_graph_none;
pe_action_t *then_child_action = find_first_action(then_child->actions, NULL, then->task, node);
if (then_child_action) {
enum pe_action_flags then_child_flags = then_child->cmds->action_flags(then_child_action, node);
if (pcmk_is_set(then_child_flags, pe_action_runnable)) {
then_child_changed |= then_child->cmds->update_actions(first,
then_child_action, node, flags, filter, type, data_set);
}
changed |= then_child_changed;
if (then_child_changed & pe_graph_updated_then) {
for (GList *lpc = then_child_action->actions_after; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *next = (pe_action_wrapper_t *) lpc->data;
pcmk__update_action_for_orderings(next->action,
data_set);
}
}
}
}
if(children != then->rsc->children) {
g_list_free(children);
}
}
return changed;
}
void
pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
{
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, rsc);
pcmk__apply_location(constraint, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (replica->container) {
replica->container->cmds->rsc_location(replica->container,
constraint);
}
if (replica->ip) {
replica->ip->cmds->rsc_location(replica->ip, constraint);
}
}
if (bundle_data->child
&& ((constraint->role_filter == RSC_ROLE_UNPROMOTED)
|| (constraint->role_filter == RSC_ROLE_PROMOTED))) {
bundle_data->child->cmds->rsc_location(bundle_data->child, constraint);
bundle_data->child->rsc_location = g_list_prepend(bundle_data->child->rsc_location,
constraint);
}
}
void
pcmk__bundle_expand(pe_resource_t *rsc, pe_working_set_t * data_set)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_bundle_variant_data(bundle_data, rsc);
if (bundle_data->child) {
bundle_data->child->cmds->expand(bundle_data->child, data_set);
}
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (replica->remote && replica->container
&& pe__bundle_needs_remote_name(replica->remote, data_set)) {
/* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that
* run pacemaker-remoted inside, without needing a separate IP for
* the container. This is done by configuring the inner remote's
* connection host as the magic string "#uname", then
* replacing it with the underlying host when needed.
*/
xmlNode *nvpair = get_xpath_object("//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']",
replica->remote->xml, LOG_ERR);
const char *calculated_addr = NULL;
// Replace the value in replica->remote->xml (if appropriate)
calculated_addr = pe__add_bundle_remote_name(replica->remote,
data_set,
nvpair, "value");
if (calculated_addr) {
/* Since this is for the bundle as a resource, and not any
* particular action, replace the value in the default
* parameters (not evaluated for node). action2xml() will grab
* it from there to replace it in node-evaluated parameters.
*/
GHashTable *params = pe_rsc_params(replica->remote,
NULL, data_set);
crm_trace("Set address for bundle connection %s to bundle host %s",
replica->remote->id, calculated_addr);
g_hash_table_replace(params,
strdup(XML_RSC_ATTR_REMOTE_RA_ADDR),
strdup(calculated_addr));
} else {
/* The only way to get here is if the remote connection is
* neither currently running nor scheduled to run. That means we
* won't be doing any operations that require addr (only start
* requires it; we additionally use it to compare digests when
* unpacking status, promote, and migrate_from history, but
* that's already happened by this point).
*/
crm_info("Unable to determine address for bundle %s remote connection",
rsc->id);
}
}
if (replica->ip) {
replica->ip->cmds->expand(replica->ip, data_set);
}
if (replica->container) {
replica->container->cmds->expand(replica->container, data_set);
}
if (replica->remote) {
replica->remote->cmds->expand(replica->remote, data_set);
}
}
}
gboolean
pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *complete, gboolean force,
pe_working_set_t * data_set)
{
bool any_created = FALSE;
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
get_bundle_variant_data(bundle_data, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (replica->ip) {
any_created |= replica->ip->cmds->create_probe(replica->ip, node,
complete, force,
data_set);
}
if (replica->child && (node->details == replica->node->details)) {
any_created |= replica->child->cmds->create_probe(replica->child,
node, complete,
force, data_set);
}
if (replica->container) {
bool created = replica->container->cmds->create_probe(replica->container,
node, complete,
force, data_set);
if(created) {
any_created = TRUE;
/* If we're limited to one replica per host (due to
* the lack of an IP range probably), then we don't
* want any of our peer containers starting until
* we've established that no other copies are already
* running.
*
* Partly this is to ensure that nreplicas_per_host is
* observed, but also to ensure that the containers
* don't fail to start because the necessary port
* mappings (which won't include an IP for uniqueness)
* are already taken
*/
for (GList *tIter = bundle_data->replicas;
tIter && (bundle_data->nreplicas_per_host == 1);
tIter = tIter->next) {
pe__bundle_replica_t *other = tIter->data;
if ((other != replica) && (other != NULL)
&& (other->container != NULL)) {
pcmk__new_ordering(replica->container,
pcmk__op_key(replica->container->id, RSC_STATUS, 0),
NULL, other->container,
pcmk__op_key(other->container->id, RSC_START, 0),
NULL,
pe_order_optional|pe_order_same_node,
data_set);
}
}
}
}
if (replica->container && replica->remote
&& replica->remote->cmds->create_probe(replica->remote, node,
complete, force,
data_set)) {
/* Do not probe the remote resource until we know where the
* container is running. This is required for REMOTE_CONTAINER_HACK
* to correctly probe remote resources.
*/
char *probe_uuid = pcmk__op_key(replica->remote->id, RSC_STATUS,
0);
pe_action_t *probe = find_first_action(replica->remote->actions,
probe_uuid, NULL, node);
free(probe_uuid);
if (probe) {
any_created = TRUE;
crm_trace("Ordering %s probe on %s",
replica->remote->id, node->details->uname);
pcmk__new_ordering(replica->container,
pcmk__op_key(replica->container->id, RSC_START, 0),
NULL, replica->remote, NULL, probe,
pe_order_probe, data_set);
}
}
}
return any_created;
}
void
pcmk__bundle_append_meta(pe_resource_t *rsc, xmlNode *xml)
{
}
void
pcmk__bundle_log_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_bundle_variant_data(bundle_data, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (replica->ip) {
LogActions(replica->ip, data_set);
}
if (replica->container) {
LogActions(replica->container, data_set);
}
if (replica->remote) {
LogActions(replica->remote, data_set);
}
if (replica->child) {
LogActions(replica->child, data_set);
}
}
}
diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c
index 7916117a47..f1da1f2777 100644
--- a/lib/pacemaker/pcmk_sched_clone.c
+++ b/lib/pacemaker/pcmk_sched_clone.c
@@ -1,1534 +1,1536 @@
/*
* Copyright 2004-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
#define VARIANT_CLONE 1
#include <lib/pengine/variant.h>
gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set);
static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all);
static gint
sort_rsc_id(gconstpointer a, gconstpointer b)
{
const pe_resource_t *resource1 = (const pe_resource_t *)a;
const pe_resource_t *resource2 = (const pe_resource_t *)b;
long num1, num2;
CRM_ASSERT(resource1 != NULL);
CRM_ASSERT(resource2 != NULL);
/*
* Sort clone instances numerically by instance number, so instance :10
* comes after :9.
*/
num1 = strtol(strrchr(resource1->id, ':') + 1, NULL, 10);
num2 = strtol(strrchr(resource2->id, ':') + 1, NULL, 10);
if (num1 < num2) {
return -1;
} else if (num1 > num2) {
return 1;
}
return 0;
}
static pe_node_t *
parent_node_instance(const pe_resource_t * rsc, pe_node_t * node)
{
pe_node_t *ret = NULL;
if (node != NULL && rsc->parent) {
ret = pe_hash_table_lookup(rsc->parent->allowed_nodes, node->details->id);
} else if(node != NULL) {
ret = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
}
return ret;
}
static gboolean
did_fail(const pe_resource_t * rsc)
{
GList *gIter = rsc->children;
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
return TRUE;
}
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if (did_fail(child_rsc)) {
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Compare instances based on colocation scores.
*
* Determines the relative order in which \c rsc1 and \c rsc2 should be
* allocated. If one resource compares less than the other, then it
* should be allocated first.
*
* \param[in] rsc1 The first instance to compare.
* \param[in] rsc2 The second instance to compare.
* \param[in] data_set Cluster working set.
*
* \return -1 if `rsc1 < rsc2`,
* 0 if `rsc1 == rsc2`, or
* 1 if `rsc1 > rsc2`
*/
static int
order_instance_by_colocation(const pe_resource_t *rsc1,
const pe_resource_t *rsc2,
pe_working_set_t *data_set)
{
int rc = 0;
pe_node_t *n = NULL;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
pe_node_t *current_node1 = pe__current_node(rsc1);
pe_node_t *current_node2 = pe__current_node(rsc2);
GList *list1 = NULL;
GList *list2 = NULL;
GHashTable *hash1 = pcmk__strkey_table(NULL, free);
GHashTable *hash2 = pcmk__strkey_table(NULL, free);
/* Clone instances must have parents */
CRM_ASSERT(rsc1->parent != NULL);
CRM_ASSERT(rsc2->parent != NULL);
n = pe__copy_node(current_node1);
g_hash_table_insert(hash1, (gpointer) n->details->id, n);
n = pe__copy_node(current_node2);
g_hash_table_insert(hash2, (gpointer) n->details->id, n);
/* Apply rsc1's parental colocations */
for (GList *gIter = rsc1->parent->rsc_cons; gIter != NULL;
gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
crm_trace("Applying %s to %s", constraint->id, rsc1->id);
hash1 = pcmk__native_merge_weights(constraint->primary, rsc1->id, hash1,
constraint->node_attribute,
constraint->score / (float) INFINITY,
0);
}
for (GList *gIter = rsc1->parent->rsc_cons_lhs; gIter != NULL;
gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(constraint, rsc1)) {
continue;
}
crm_trace("Applying %s to %s", constraint->id, rsc1->id);
hash1 = pcmk__native_merge_weights(constraint->dependent, rsc1->id,
hash1, constraint->node_attribute,
constraint->score / (float) INFINITY,
pe_weights_positive);
}
/* Apply rsc2's parental colocations */
for (GList *gIter = rsc2->parent->rsc_cons; gIter != NULL;
gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
crm_trace("Applying %s to %s", constraint->id, rsc2->id);
hash2 = pcmk__native_merge_weights(constraint->primary, rsc2->id, hash2,
constraint->node_attribute,
constraint->score / (float) INFINITY,
0);
}
for (GList *gIter = rsc2->parent->rsc_cons_lhs; gIter;
gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(constraint, rsc2)) {
continue;
}
crm_trace("Applying %s to %s", constraint->id, rsc2->id);
hash2 = pcmk__native_merge_weights(constraint->dependent, rsc2->id,
hash2, constraint->node_attribute,
constraint->score / (float) INFINITY,
pe_weights_positive);
}
/* Current location score */
node1 = g_hash_table_lookup(hash1, current_node1->details->id);
node2 = g_hash_table_lookup(hash2, current_node2->details->id);
if (node1->weight < node2->weight) {
if (node1->weight < 0) {
crm_trace("%s > %s: current score: %d %d",
rsc1->id, rsc2->id, node1->weight, node2->weight);
rc = -1;
goto out;
} else {
crm_trace("%s < %s: current score: %d %d",
rsc1->id, rsc2->id, node1->weight, node2->weight);
rc = 1;
goto out;
}
} else if (node1->weight > node2->weight) {
crm_trace("%s > %s: current score: %d %d",
rsc1->id, rsc2->id, node1->weight, node2->weight);
rc = -1;
goto out;
}
/* All location scores */
list1 = g_hash_table_get_values(hash1);
list2 = g_hash_table_get_values(hash2);
list1 = sort_nodes_by_weight(list1, current_node1, data_set);
list2 = sort_nodes_by_weight(list2, current_node2, data_set);
for (GList *gIter1 = list1, *gIter2 = list2;
(gIter1 != NULL) && (gIter2 != NULL);
gIter1 = gIter1->next, gIter2 = gIter2->next) {
node1 = (pe_node_t *) gIter1->data;
node2 = (pe_node_t *) gIter2->data;
if (node1 == NULL) {
crm_trace("%s < %s: colocated score NULL", rsc1->id, rsc2->id);
rc = 1;
break;
} else if (node2 == NULL) {
crm_trace("%s > %s: colocated score NULL", rsc1->id, rsc2->id);
rc = -1;
break;
}
if (node1->weight < node2->weight) {
crm_trace("%s < %s: colocated score", rsc1->id, rsc2->id);
rc = 1;
break;
} else if (node1->weight > node2->weight) {
crm_trace("%s > %s: colocated score", rsc1->id, rsc2->id);
rc = -1;
break;
}
}
out:
g_hash_table_destroy(hash1);
g_hash_table_destroy(hash2);
g_list_free(list1);
g_list_free(list2);
return rc;
}
gint
sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
pe_node_t *current_node1 = NULL;
pe_node_t *current_node2 = NULL;
unsigned int nnodes1 = 0;
unsigned int nnodes2 = 0;
gboolean can1 = TRUE;
gboolean can2 = TRUE;
const pe_resource_t *resource1 = (const pe_resource_t *)a;
const pe_resource_t *resource2 = (const pe_resource_t *)b;
CRM_ASSERT(resource1 != NULL);
CRM_ASSERT(resource2 != NULL);
/* allocation order:
* - active instances
* - instances running on nodes with the least copies
* - active instances on nodes that can't support them or are to be fenced
* - failed instances
* - inactive instances
*/
current_node1 = pe__find_active_on(resource1, &nnodes1, NULL);
current_node2 = pe__find_active_on(resource2, &nnodes2, NULL);
/* If both instances are running and at least one is multiply
* active, give precedence to the one that's running on fewer nodes.
*/
if ((nnodes1 > 0) && (nnodes2 > 0)) {
if (nnodes1 < nnodes2) {
crm_trace("%s < %s: running_on", resource1->id, resource2->id);
return -1;
} else if (nnodes1 > nnodes2) {
crm_trace("%s > %s: running_on", resource1->id, resource2->id);
return 1;
}
}
/* Instance whose current location is available sorts first */
node1 = current_node1;
node2 = current_node2;
if (node1 != NULL) {
pe_node_t *match = pe_hash_table_lookup(resource1->allowed_nodes, node1->details->id);
if (match == NULL || match->weight < 0) {
crm_trace("%s: current location is unavailable", resource1->id);
node1 = NULL;
can1 = FALSE;
}
}
if (node2 != NULL) {
pe_node_t *match = pe_hash_table_lookup(resource2->allowed_nodes, node2->details->id);
if (match == NULL || match->weight < 0) {
crm_trace("%s: current location is unavailable", resource2->id);
node2 = NULL;
can2 = FALSE;
}
}
if (can1 && !can2) {
crm_trace("%s < %s: availability of current location", resource1->id,
resource2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("%s > %s: availability of current location", resource1->id,
resource2->id);
return 1;
}
/* Higher-priority instance sorts first */
if (resource1->priority > resource2->priority) {
crm_trace("%s < %s: priority", resource1->id, resource2->id);
return -1;
} else if (resource1->priority < resource2->priority) {
crm_trace("%s > %s: priority", resource1->id, resource2->id);
return 1;
}
/* Active instance sorts first */
if (node1 == NULL && node2 == NULL) {
crm_trace("%s == %s: not active", resource1->id, resource2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("%s > %s: active", resource1->id, resource2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("%s < %s: active", resource1->id, resource2->id);
return -1;
}
/* Instance whose current node can run resources sorts first */
can1 = can_run_resources(node1);
can2 = can_run_resources(node2);
if (can1 && !can2) {
crm_trace("%s < %s: can", resource1->id, resource2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("%s > %s: can", resource1->id, resource2->id);
return 1;
}
/* Is the parent allowed to run on the instance's current node?
* Instance with parent allowed sorts first.
*/
node1 = parent_node_instance(resource1, node1);
node2 = parent_node_instance(resource2, node2);
if (node1 == NULL && node2 == NULL) {
crm_trace("%s == %s: not allowed", resource1->id, resource2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("%s > %s: not allowed", resource1->id, resource2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("%s < %s: not allowed", resource1->id, resource2->id);
return -1;
}
/* Does one node have more instances allocated?
* Instance whose current node has fewer instances sorts first.
*/
if (node1->count < node2->count) {
crm_trace("%s < %s: count", resource1->id, resource2->id);
return -1;
} else if (node1->count > node2->count) {
crm_trace("%s > %s: count", resource1->id, resource2->id);
return 1;
}
/* Failed instance sorts first */
can1 = did_fail(resource1);
can2 = did_fail(resource2);
if (can1 && !can2) {
crm_trace("%s > %s: failed", resource1->id, resource2->id);
return 1;
} else if (!can1 && can2) {
crm_trace("%s < %s: failed", resource1->id, resource2->id);
return -1;
}
rc = order_instance_by_colocation(resource1, resource2, data_set);
if (rc != 0) {
return rc;
}
/* Default to lexicographic order by ID */
rc = strcmp(resource1->id, resource2->id);
crm_trace("%s %c %s: default", resource1->id, rc < 0 ? '<' : '>', resource2->id);
return rc;
}
static pe_node_t *
can_run_instance(pe_resource_t * rsc, pe_node_t * node, int limit)
{
pe_node_t *local_node = NULL;
if (node == NULL && rsc->allowed_nodes) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) {
can_run_instance(rsc, local_node, limit);
}
return NULL;
}
if (!node) {
/* make clang analyzer happy */
goto bail;
} else if (can_run_resources(node) == FALSE) {
goto bail;
} else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
goto bail;
}
local_node = parent_node_instance(rsc, node);
if (local_node == NULL) {
crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname);
goto bail;
} else if (local_node->weight < 0) {
common_update_score(rsc, node->details->id, local_node->weight);
pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.",
rsc->id, node->details->uname);
} else if (local_node->count < limit) {
pe_rsc_trace(rsc, "%s can run on %s (already running %d)",
rsc->id, node->details->uname, local_node->count);
return local_node;
} else {
pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)",
rsc->id, node->details->uname, local_node->count, limit);
}
bail:
if (node) {
common_update_score(rsc, node->details->id, -INFINITY);
}
return NULL;
}
static pe_node_t *
allocate_instance(pe_resource_t *rsc, pe_node_t *prefer, gboolean all_coloc,
int limit, pe_working_set_t *data_set)
{
pe_node_t *chosen = NULL;
GHashTable *backup = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)",
rsc->id, (prefer? prefer->details->uname: "none"),
(all_coloc? "all" : "some"));
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return rsc->fns->location(rsc, NULL, FALSE);
} else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
/* Only include positive colocation preferences of dependent resources
* if not every node will get a copy of the clone
*/
append_parent_colocation(rsc->parent, rsc, all_coloc);
if (prefer) {
pe_node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
if (local_prefer == NULL || local_prefer->weight < 0) {
pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id,
prefer->details->uname);
return NULL;
}
}
can_run_instance(rsc, NULL, limit);
backup = pcmk__copy_node_table(rsc->allowed_nodes);
pe_rsc_trace(rsc, "Allocating instance %s", rsc->id);
chosen = rsc->cmds->allocate(rsc, prefer, data_set);
if (chosen && prefer && (chosen->details != prefer->details)) {
crm_info("Not pre-allocating %s to %s because %s is better",
rsc->id, prefer->details->uname, chosen->details->uname);
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = backup;
pcmk__unassign_resource(rsc);
chosen = NULL;
backup = NULL;
}
if (chosen) {
pe_node_t *local_node = parent_node_instance(rsc, chosen);
if (local_node) {
local_node->count++;
} else if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
/* what to do? we can't enforce per-node limits in this case */
pcmk__config_err("%s not found in %s (list of %d)",
chosen->details->id, rsc->parent->id,
g_hash_table_size(rsc->parent->allowed_nodes));
}
}
if(backup) {
g_hash_table_destroy(backup);
}
return chosen;
}
static void
append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all)
{
GList *gIter = NULL;
gIter = rsc->rsc_cons;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data;
if (all || cons->score < 0 || cons->score == INFINITY) {
child->rsc_cons = g_list_prepend(child->rsc_cons, cons);
}
}
gIter = rsc->rsc_cons_lhs;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(cons, child)) {
continue;
}
if (all || cons->score < 0) {
child->rsc_cons_lhs = g_list_prepend(child->rsc_cons_lhs, cons);
}
}
}
void
distribute_children(pe_resource_t *rsc, GList *children, GList *nodes,
int max, int per_host_max, pe_working_set_t * data_set);
void
distribute_children(pe_resource_t *rsc, GList *children, GList *nodes,
int max, int per_host_max, pe_working_set_t * data_set)
{
int loop_max = 0;
int allocated = 0;
int available_nodes = 0;
bool all_coloc = false;
/* count now tracks the number of clones currently allocated */
for(GList *nIter = nodes; nIter != NULL; nIter = nIter->next) {
pe_node_t *node = nIter->data;
node->count = 0;
if (can_run_resources(node)) {
available_nodes++;
}
}
all_coloc = (max < available_nodes) ? true : false;
if(available_nodes) {
loop_max = max / available_nodes;
}
if (loop_max < 1) {
loop_max = 1;
}
pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)",
max, rsc->id, available_nodes, per_host_max, loop_max);
/* Pre-allocate as many instances as we can to their current location */
for (GList *gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
pe_node_t *child_node = NULL;
pe_node_t *local_node = NULL;
if ((child->running_on == NULL)
|| !pcmk_is_set(child->flags, pe_rsc_provisional)
|| pcmk_is_set(child->flags, pe_rsc_failed)) {
continue;
}
child_node = pe__current_node(child);
local_node = parent_node_instance(child, child_node);
pe_rsc_trace(rsc,
"Checking pre-allocation of %s to %s (%d remaining of %d)",
child->id, child_node->details->uname, max - allocated,
max);
if (!can_run_resources(child_node) || (child_node->weight < 0)) {
pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s",
child_node->details->uname, child->id);
continue;
}
if ((local_node != NULL) && (local_node->count >= loop_max)) {
pe_rsc_trace(rsc,
"Not pre-allocating because %s already allocated "
"optimal instances", child_node->details->uname);
continue;
}
if (allocate_instance(child, child_node, all_coloc, per_host_max,
data_set)) {
pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id,
child_node->details->uname);
allocated++;
}
}
pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max);
for (GList *gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
if (child->running_on != NULL) {
pe_node_t *child_node = pe__current_node(child);
pe_node_t *local_node = parent_node_instance(child, child_node);
if (local_node == NULL) {
crm_err("%s is running on %s which isn't allowed",
child->id, child_node->details->uname);
}
}
if (!pcmk_is_set(child->flags, pe_rsc_provisional)) {
} else if (allocated >= max) {
pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max);
resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set);
} else {
if (allocate_instance(child, NULL, all_coloc, per_host_max,
data_set)) {
allocated++;
}
}
}
pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d",
allocated, rsc->id, max);
}
pe_node_t *
pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *prefer,
pe_working_set_t *data_set)
{
GList *nodes = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return NULL;
} else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__add_promotion_scores(rsc);
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
/* this information is used by sort_clone_instance() when deciding in which
* order to allocate clone instances
*/
for (GList *gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pe_rsc_trace(rsc, "%s: Allocating %s first",
rsc->id, constraint->primary->id);
constraint->primary->cmds->allocate(constraint->primary, prefer,
data_set);
}
for (GList *gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(constraint, NULL)) {
continue;
}
rsc->allowed_nodes = constraint->dependent->cmds->merge_weights(
constraint->dependent, rsc->id, rsc->allowed_nodes,
constraint->node_attribute, (float)constraint->score / INFINITY,
(pe_weights_rollback | pe_weights_positive));
}
pe__show_node_weights(!pcmk_is_set(data_set->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, data_set);
nodes = g_hash_table_get_values(rsc->allowed_nodes);
nodes = sort_nodes_by_weight(nodes, NULL, data_set);
rsc->children = g_list_sort_with_data(rsc->children, sort_clone_instance, data_set);
distribute_children(rsc, rsc->children, nodes, clone_data->clone_max, clone_data->clone_node_max, data_set);
g_list_free(nodes);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__set_instance_roles(rsc, data_set);
}
pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating);
pe_rsc_trace(rsc, "Done allocating %s", rsc->id);
return NULL;
}
static void
clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting,
gboolean * active)
{
GList *gIter = NULL;
if (rsc->children) {
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
clone_update_pseudo_status(child, stopping, starting, active);
}
return;
}
CRM_ASSERT(active != NULL);
CRM_ASSERT(starting != NULL);
CRM_ASSERT(stopping != NULL);
if (rsc->running_on) {
*active = TRUE;
}
gIter = rsc->actions;
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (*starting && *stopping) {
return;
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid);
continue;
} else if (!pcmk_any_flags_set(action->flags,
pe_action_pseudo|pe_action_runnable)) {
pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid);
continue;
} else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) {
pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid);
*stopping = TRUE;
} else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) {
if (!pcmk_is_set(action->flags, pe_action_runnable)) {
pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d",
action->uuid,
pcmk_is_set(action->flags, pe_action_runnable),
pcmk_is_set(action->flags, pe_action_pseudo));
} else {
pe_rsc_trace(rsc, "Starting due to: %s", action->uuid);
pe_rsc_trace(rsc, "%s run=%d, pseudo=%d",
action->uuid,
pcmk_is_set(action->flags, pe_action_runnable),
pcmk_is_set(action->flags, pe_action_pseudo));
*starting = TRUE;
}
}
}
}
static pe_action_t *
find_rsc_action(pe_resource_t *rsc, const char *task)
{
pe_action_t *match = NULL;
GList *actions = pe__resource_actions(rsc, NULL, task, FALSE);
for (GList *item = actions; item != NULL; item = item->next) {
pe_action_t *op = (pe_action_t *) item->data;
if (!pcmk_is_set(op->flags, pe_action_optional)) {
if (match != NULL) {
// More than one match, don't return any
match = NULL;
break;
}
match = op;
}
}
g_list_free(actions);
return match;
}
static void
child_ordering_constraints(pe_resource_t * rsc, pe_working_set_t * data_set)
{
pe_action_t *stop = NULL;
pe_action_t *start = NULL;
pe_action_t *last_stop = NULL;
pe_action_t *last_start = NULL;
GList *gIter = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
if (clone_data->ordered == FALSE) {
return;
}
/* we have to maintain a consistent sorted child list when building order constraints */
rsc->children = g_list_sort(rsc->children, sort_rsc_id);
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
stop = find_rsc_action(child, RSC_STOP);
if (stop) {
if (last_stop) {
/* child/child relative stop */
order_actions(stop, last_stop, pe_order_optional);
}
last_stop = stop;
}
start = find_rsc_action(child, RSC_START);
if (start) {
if (last_start) {
/* child/child relative start */
order_actions(last_start, start, pe_order_optional);
}
last_start = start;
}
}
}
void
clone_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
clone_create_pseudo_actions(rsc, rsc->children, &clone_data->start_notify, &clone_data->stop_notify,data_set);
child_ordering_constraints(rsc, data_set);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
create_promotable_actions(rsc, data_set);
}
}
void
clone_create_pseudo_actions(
pe_resource_t * rsc, GList *children, notify_data_t **start_notify, notify_data_t **stop_notify, pe_working_set_t * data_set)
{
gboolean child_active = FALSE;
gboolean child_starting = FALSE;
gboolean child_stopping = FALSE;
gboolean allow_dependent_migrations = TRUE;
pe_action_t *stop = NULL;
pe_action_t *stopped = NULL;
pe_action_t *start = NULL;
pe_action_t *started = NULL;
pe_rsc_trace(rsc, "Creating actions for %s", rsc->id);
for (GList *gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
gboolean starting = FALSE;
gboolean stopping = FALSE;
child_rsc->cmds->create_actions(child_rsc, data_set);
clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active);
if (stopping && starting) {
allow_dependent_migrations = FALSE;
}
child_stopping |= stopping;
child_starting |= starting;
}
/* start */
- start = create_pseudo_resource_op(rsc, RSC_START, !child_starting, TRUE, data_set);
- started = create_pseudo_resource_op(rsc, RSC_STARTED, !child_starting, FALSE, data_set);
+ start = pcmk__new_rsc_pseudo_action(rsc, RSC_START, !child_starting, true);
+ started = pcmk__new_rsc_pseudo_action(rsc, RSC_STARTED, !child_starting,
+ false);
started->priority = INFINITY;
if (child_active || child_starting) {
pe__set_action_flags(started, pe_action_runnable);
}
if (start_notify != NULL && *start_notify == NULL) {
*start_notify = create_notification_boundaries(rsc, RSC_START, start, started, data_set);
}
/* stop */
- stop = create_pseudo_resource_op(rsc, RSC_STOP, !child_stopping, TRUE, data_set);
- stopped = create_pseudo_resource_op(rsc, RSC_STOPPED, !child_stopping, TRUE, data_set);
+ stop = pcmk__new_rsc_pseudo_action(rsc, RSC_STOP, !child_stopping, true);
+ stopped = pcmk__new_rsc_pseudo_action(rsc, RSC_STOPPED, !child_stopping,
+ true);
stopped->priority = INFINITY;
if (allow_dependent_migrations) {
pe__set_action_flags(stop, pe_action_migrate_runnable);
}
if (stop_notify != NULL && *stop_notify == NULL) {
*stop_notify = create_notification_boundaries(rsc, RSC_STOP, stop, stopped, data_set);
if (start_notify && *start_notify && *stop_notify) {
order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional);
}
}
}
void
clone_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
{
pe_resource_t *last_rsc = NULL;
GList *gIter;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id);
pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START,
pe_order_optional, data_set);
pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED,
pe_order_runnable_left, data_set);
pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED,
pe_order_runnable_left, data_set);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP,
pe_order_optional, data_set);
pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE,
pe_order_runnable_left, data_set);
}
if (clone_data->ordered) {
/* we have to maintain a consistent sorted child list when building order constraints */
rsc->children = g_list_sort(rsc->children, sort_rsc_id);
}
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->internal_constraints(child_rsc, data_set);
pcmk__order_starts(rsc, child_rsc,
pe_order_runnable_left|pe_order_implies_first_printed,
data_set);
pcmk__order_resource_actions(child_rsc, RSC_START, rsc, RSC_STARTED,
pe_order_implies_then_printed, data_set);
if (clone_data->ordered && last_rsc) {
pcmk__order_starts(last_rsc, child_rsc, pe_order_optional,
data_set);
}
pcmk__order_stops(rsc, child_rsc, pe_order_implies_first_printed,
data_set);
pcmk__order_resource_actions(child_rsc, RSC_STOP, rsc, RSC_STOPPED,
pe_order_implies_then_printed, data_set);
if (clone_data->ordered && last_rsc) {
pcmk__order_stops(child_rsc, last_rsc, pe_order_optional, data_set);
}
last_rsc = child_rsc;
}
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
promotable_constraints(rsc, data_set);
}
}
gboolean
is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current)
{
pe_node_t *node = NULL;
enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current);
CRM_CHECK(child_rsc && local_node, return FALSE);
if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
/* We only want instances that haven't failed */
node = child_rsc->fns->location(child_rsc, NULL, current);
}
if (filter != RSC_ROLE_UNKNOWN && next_role != filter) {
crm_trace("Filtered %s", child_rsc->id);
return FALSE;
}
if (node && (node->details == local_node->details)) {
return TRUE;
} else if (node) {
crm_trace("%s - %s vs %s", child_rsc->id, node->details->uname,
local_node->details->uname);
} else {
crm_trace("%s - not allocated %d", child_rsc->id, current);
}
return FALSE;
}
pe_resource_t *
find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc,
enum rsc_role_e filter, gboolean current,
pe_working_set_t *data_set)
{
pe_resource_t *pair = NULL;
GList *gIter = NULL;
GList *scratch = NULL;
pe_node_t *local_node = NULL;
local_node = local_child->fns->location(local_child, NULL, current);
if (local_node) {
return find_compatible_child_by_node(local_child, local_node, rsc, filter, current);
}
scratch = g_hash_table_get_values(local_child->allowed_nodes);
scratch = sort_nodes_by_weight(scratch, NULL, data_set);
gIter = scratch;
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
pair = find_compatible_child_by_node(local_child, node, rsc, filter, current);
if (pair) {
goto done;
}
}
pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id);
done:
g_list_free(scratch);
return pair;
}
void
clone_rsc_colocation_lh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
/* -- Never called --
*
* Instead we add the colocation constraints to the child and call from there
*/
CRM_ASSERT(FALSE);
}
void
clone_rsc_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
GList *gIter = NULL;
gboolean do_interleave = FALSE;
const char *interleave_s = NULL;
CRM_CHECK(constraint != NULL, return);
CRM_CHECK(dependent != NULL,
pe_err("dependent was NULL for %s", constraint->id); return);
CRM_CHECK(primary != NULL,
pe_err("primary was NULL for %s", constraint->id); return);
CRM_CHECK(dependent->variant == pe_native, return);
pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d",
constraint->id, dependent->id, primary->id, constraint->score);
if (pcmk_is_set(primary->flags, pe_rsc_promotable)) {
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
pe_rsc_trace(primary, "%s is still provisional", primary->id);
return;
} else if (constraint->primary_role == RSC_ROLE_UNKNOWN) {
pe_rsc_trace(primary, "Handling %s as a clone colocation",
constraint->id);
} else {
promotable_colocation_rh(dependent, primary, constraint, data_set);
return;
}
}
/* only the LHS side needs to be labeled as interleave */
interleave_s = g_hash_table_lookup(constraint->dependent->meta,
XML_RSC_ATTR_INTERLEAVE);
if (crm_is_true(interleave_s)
&& (constraint->dependent->variant > pe_group)) {
/* @TODO Do we actually care about multiple primary copies sharing a
* dependent copy anymore?
*/
if (copies_per_node(constraint->dependent) != copies_per_node(constraint->primary)) {
pcmk__config_err("Cannot interleave %s and %s because they do not "
"support the same number of instances per node",
constraint->dependent->id,
constraint->primary->id);
} else {
do_interleave = TRUE;
}
}
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
pe_rsc_trace(primary, "%s is still provisional", primary->id);
return;
} else if (do_interleave) {
pe_resource_t *primary_instance = NULL;
primary_instance = find_compatible_child(dependent, primary,
RSC_ROLE_UNKNOWN, FALSE,
data_set);
if (primary_instance != NULL) {
pe_rsc_debug(primary, "Pairing %s with %s",
dependent->id, primary_instance->id);
dependent->cmds->rsc_colocation_lh(dependent, primary_instance,
constraint, data_set);
} else if (constraint->score >= INFINITY) {
crm_notice("Cannot pair %s with instance of %s",
dependent->id, primary->id);
pcmk__assign_resource(dependent, NULL, true);
} else {
pe_rsc_debug(primary, "Cannot pair %s with instance of %s",
dependent->id, primary->id);
}
return;
} else if (constraint->score >= INFINITY) {
GList *affected_nodes = NULL;
gIter = primary->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
pe_rsc_trace(primary, "Allowing %s: %s %d",
constraint->id, chosen->details->uname,
chosen->weight);
affected_nodes = g_list_prepend(affected_nodes, chosen);
}
}
node_list_exclude(dependent->allowed_nodes, affected_nodes, FALSE);
g_list_free(affected_nodes);
return;
}
gIter = primary->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->rsc_colocation_rh(dependent, child_rsc, constraint,
data_set);
}
}
enum action_tasks
clone_child_action(pe_action_t * action)
{
enum action_tasks result = no_action;
pe_resource_t *child = (pe_resource_t *) action->rsc->children->data;
if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) {
/* Find the action we're notifying about instead */
int stop = 0;
char *key = action->uuid;
int lpc = strlen(key);
for (; lpc > 0; lpc--) {
if (key[lpc] == '_' && stop == 0) {
stop = lpc;
} else if (key[lpc] == '_') {
char *task_mutable = NULL;
lpc++;
task_mutable = strdup(key + lpc);
task_mutable[stop - lpc] = 0;
crm_trace("Extracted action '%s' from '%s'", task_mutable, key);
result = get_complex_task(child, task_mutable, TRUE);
free(task_mutable);
break;
}
}
} else {
result = get_complex_task(child, action->task, TRUE);
}
return result;
}
#define pe__clear_action_summary_flags(flags, action, flag) do { \
flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Action summary", action->rsc->id, \
flags, flag, #flag); \
} while (0)
enum pe_action_flags
summary_action_flags(pe_action_t * action, GList *children, pe_node_t * node)
{
GList *gIter = NULL;
gboolean any_runnable = FALSE;
gboolean check_runnable = TRUE;
enum action_tasks task = clone_child_action(action);
enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo);
const char *task_s = task2text(task);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_action_t *child_action = NULL;
pe_resource_t *child = (pe_resource_t *) gIter->data;
child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node);
pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id,
node ? node->details->uname : "none", child_action?child_action->uuid:"NA");
if (child_action) {
enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node);
if (pcmk_is_set(flags, pe_action_optional)
&& !pcmk_is_set(child_flags, pe_action_optional)) {
pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid,
child_action->uuid);
pe__clear_action_summary_flags(flags, action, pe_action_optional);
pe__clear_action_flags(action, pe_action_optional);
}
if (pcmk_is_set(child_flags, pe_action_runnable)) {
any_runnable = TRUE;
}
}
}
if (check_runnable && any_runnable == FALSE) {
pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid);
pe__clear_action_summary_flags(flags, action, pe_action_runnable);
if (node == NULL) {
pe__clear_action_flags(action, pe_action_runnable);
}
}
return flags;
}
enum pe_action_flags
clone_action_flags(pe_action_t * action, pe_node_t * node)
{
return summary_action_flags(action, action->rsc->children, node);
}
void
clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
{
GList *gIter = rsc->children;
pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id);
pcmk__apply_location(constraint, rsc);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->rsc_location(child_rsc, constraint);
}
}
void
clone_expand(pe_resource_t * rsc, pe_working_set_t * data_set)
{
GList *gIter = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
g_list_foreach(rsc->actions, (GFunc) rsc->cmds->action_flags, NULL);
if (clone_data->start_notify) {
collect_notification_data(rsc, TRUE, TRUE, clone_data->start_notify);
pcmk__create_notification_keys(rsc, clone_data->start_notify, data_set);
create_notifications(rsc, clone_data->start_notify, data_set);
}
if (clone_data->stop_notify) {
collect_notification_data(rsc, TRUE, TRUE, clone_data->stop_notify);
pcmk__create_notification_keys(rsc, clone_data->stop_notify, data_set);
create_notifications(rsc, clone_data->stop_notify, data_set);
}
if (clone_data->promote_notify) {
collect_notification_data(rsc, TRUE, TRUE, clone_data->promote_notify);
pcmk__create_notification_keys(rsc, clone_data->promote_notify, data_set);
create_notifications(rsc, clone_data->promote_notify, data_set);
}
if (clone_data->demote_notify) {
collect_notification_data(rsc, TRUE, TRUE, clone_data->demote_notify);
pcmk__create_notification_keys(rsc, clone_data->demote_notify, data_set);
create_notifications(rsc, clone_data->demote_notify, data_set);
}
/* Now that the notifcations have been created we can expand the children */
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->expand(child_rsc, data_set);
}
native_expand(rsc, data_set);
/* The notifications are in the graph now, we can destroy the notify_data */
free_notification_data(clone_data->demote_notify);
clone_data->demote_notify = NULL;
free_notification_data(clone_data->stop_notify);
clone_data->stop_notify = NULL;
free_notification_data(clone_data->start_notify);
clone_data->start_notify = NULL;
free_notification_data(clone_data->promote_notify);
clone_data->promote_notify = NULL;
}
// Check whether a resource or any of its children is known on node
static bool
rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node)
{
if (rsc->children) {
for (GList *child_iter = rsc->children; child_iter != NULL;
child_iter = child_iter->next) {
pe_resource_t *child = (pe_resource_t *) child_iter->data;
if (rsc_known_on(child, node)) {
return TRUE;
}
}
} else if (rsc->known_on) {
GHashTableIter iter;
pe_node_t *known_node = NULL;
g_hash_table_iter_init(&iter, rsc->known_on);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) {
if (node->details == known_node->details) {
return TRUE;
}
}
}
return FALSE;
}
// Look for an instance of clone that is known on node
static pe_resource_t *
find_instance_on(const pe_resource_t *clone, const pe_node_t *node)
{
for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
if (rsc_known_on(child, node)) {
return child;
}
}
return NULL;
}
// For unique clones, probe each instance separately
static gboolean
probe_unique_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete,
gboolean force, pe_working_set_t *data_set)
{
gboolean any_created = FALSE;
for (GList *child_iter = rsc->children; child_iter != NULL;
child_iter = child_iter->next) {
pe_resource_t *child = (pe_resource_t *) child_iter->data;
any_created |= child->cmds->create_probe(child, node, complete, force,
data_set);
}
return any_created;
}
// For anonymous clones, only a single instance needs to be probed
static gboolean
probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *complete, gboolean force,
pe_working_set_t *data_set)
{
// First, check if we probed an instance on this node last time
pe_resource_t *child = find_instance_on(rsc, node);
// Otherwise, check if we plan to start an instance on this node
if (child == NULL) {
for (GList *child_iter = rsc->children; child_iter && !child;
child_iter = child_iter->next) {
pe_node_t *local_node = NULL;
pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data;
if (child_rsc) { /* make clang analyzer happy */
local_node = child_rsc->fns->location(child_rsc, NULL, FALSE);
if (local_node && (local_node->details == node->details)) {
child = child_rsc;
}
}
}
}
// Otherwise, use the first clone instance
if (child == NULL) {
child = rsc->children->data;
}
CRM_ASSERT(child);
return child->cmds->create_probe(child, node, complete, force, data_set);
}
gboolean
clone_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete,
gboolean force, pe_working_set_t * data_set)
{
gboolean any_created = FALSE;
CRM_ASSERT(rsc);
rsc->children = g_list_sort(rsc->children, sort_rsc_id);
if (rsc->children == NULL) {
pe_warn("Clone %s has no children", rsc->id);
return FALSE;
}
if (rsc->exclusive_discover) {
pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) {
/* exclusive discover is enabled and this node is not marked
* as a node this resource should be discovered on
*
* remove the node from allowed_nodes so that the
* notification contains only nodes that we might ever run
* on
*/
g_hash_table_remove(rsc->allowed_nodes, node->details->id);
/* Bit of a shortcut - might as well take it */
return FALSE;
}
}
if (pcmk_is_set(rsc->flags, pe_rsc_unique)) {
any_created = probe_unique_clone(rsc, node, complete, force, data_set);
} else {
any_created = probe_anonymous_clone(rsc, node, complete, force,
data_set);
}
return any_created;
}
void
clone_append_meta(pe_resource_t * rsc, xmlNode * xml)
{
char *name = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
name = crm_meta_name(XML_RSC_ATTR_UNIQUE);
crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_unique));
free(name);
name = crm_meta_name(XML_RSC_ATTR_NOTIFY);
crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_notify));
free(name);
name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX);
crm_xml_add_int(xml, name, clone_data->clone_max);
free(name);
name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX);
crm_xml_add_int(xml, name, clone_data->clone_node_max);
free(name);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX);
crm_xml_add_int(xml, name, clone_data->promoted_max);
free(name);
name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX);
crm_xml_add_int(xml, name, clone_data->promoted_node_max);
free(name);
/* @COMPAT Maintain backward compatibility with resource agents that
* expect the old names (deprecated since 2.0.0).
*/
name = crm_meta_name(PCMK_XE_PROMOTED_MAX_LEGACY);
crm_xml_add_int(xml, name, clone_data->promoted_max);
free(name);
name = crm_meta_name(PCMK_XE_PROMOTED_NODE_MAX_LEGACY);
crm_xml_add_int(xml, name, clone_data->promoted_node_max);
free(name);
}
}
diff --git a/lib/pacemaker/pcmk_sched_promotable.c b/lib/pacemaker/pcmk_sched_promotable.c
index 6bde60d687..2e91154c4c 100644
--- a/lib/pacemaker/pcmk_sched_promotable.c
+++ b/lib/pacemaker/pcmk_sched_promotable.c
@@ -1,1050 +1,1053 @@
/*
* Copyright 2004-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
#define VARIANT_CLONE 1
#include <lib/pengine/variant.h>
extern gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set);
extern bool pcmk__is_daemon;
static void
child_promoting_constraints(clone_variant_data_t * clone_data, enum pe_ordering type,
pe_resource_t * rsc, pe_resource_t * child, pe_resource_t * last,
pe_working_set_t * data_set)
{
if (child == NULL) {
if (clone_data->ordered && last != NULL) {
pe_rsc_trace(rsc, "Ordered version (last node)");
/* last child promote before promoted started */
pcmk__order_resource_actions(last, RSC_PROMOTE, rsc, RSC_PROMOTED,
type, data_set);
}
return;
}
/* child promote before global promoted */
pcmk__order_resource_actions(child, RSC_PROMOTE, rsc, RSC_PROMOTED, type,
data_set);
/* global promote before child promote */
pcmk__order_resource_actions(rsc, RSC_PROMOTE, child, RSC_PROMOTE, type,
data_set);
if (clone_data->ordered) {
pe_rsc_trace(rsc, "Ordered version");
if (last == NULL) {
/* global promote before first child promote */
last = rsc;
}
/* else: child/child relative promote */
pcmk__order_starts(last, child, type, data_set);
pcmk__order_resource_actions(last, RSC_PROMOTE, child, RSC_PROMOTE,
type, data_set);
} else {
pe_rsc_trace(rsc, "Un-ordered version");
}
}
static void
child_demoting_constraints(clone_variant_data_t * clone_data, enum pe_ordering type,
pe_resource_t * rsc, pe_resource_t * child, pe_resource_t * last,
pe_working_set_t * data_set)
{
if (child == NULL) {
if (clone_data->ordered && last != NULL) {
pe_rsc_trace(rsc, "Ordered version (last node)");
/* global demote before first child demote */
pcmk__order_resource_actions(rsc, RSC_DEMOTE, last, RSC_DEMOTE,
pe_order_optional, data_set);
}
return;
}
/* child demote before global demoted */
pcmk__order_resource_actions(child, RSC_DEMOTE, rsc, RSC_DEMOTED,
pe_order_implies_then_printed, data_set);
/* global demote before child demote */
pcmk__order_resource_actions(rsc, RSC_DEMOTE, child, RSC_DEMOTE,
pe_order_implies_first_printed, data_set);
if (clone_data->ordered && last != NULL) {
pe_rsc_trace(rsc, "Ordered version");
/* child/child relative demote */
pcmk__order_resource_actions(child, RSC_DEMOTE, last, RSC_DEMOTE, type,
data_set);
} else if (clone_data->ordered) {
pe_rsc_trace(rsc, "Ordered version (1st node)");
/* first child stop before global stopped */
pcmk__order_resource_actions(child, RSC_DEMOTE, rsc, RSC_DEMOTED, type,
data_set);
} else {
pe_rsc_trace(rsc, "Un-ordered version");
}
}
static void
check_promotable_actions(pe_resource_t *rsc, gboolean *demoting,
gboolean *promoting)
{
GList *gIter = NULL;
if (rsc->children) {
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
check_promotable_actions(child, demoting, promoting);
}
return;
}
CRM_ASSERT(demoting != NULL);
CRM_ASSERT(promoting != NULL);
gIter = rsc->actions;
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (*promoting && *demoting) {
return;
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
continue;
} else if (pcmk__str_eq(RSC_DEMOTE, action->task, pcmk__str_casei)) {
*demoting = TRUE;
} else if (pcmk__str_eq(RSC_PROMOTE, action->task, pcmk__str_casei)) {
*promoting = TRUE;
}
}
}
static void
apply_promoted_location(pe_resource_t *child, GList *location_constraints,
pe_node_t *chosen)
{
CRM_CHECK(child && chosen, return);
for (GList *gIter = location_constraints; gIter; gIter = gIter->next) {
pe_node_t *cons_node = NULL;
pe__location_t *cons = gIter->data;
if (cons->role_filter == RSC_ROLE_PROMOTED) {
pe_rsc_trace(child, "Applying %s to %s", cons->id, child->id);
cons_node = pe_find_node_id(cons->node_list_rh, chosen->details->id);
}
if (cons_node != NULL) {
int new_priority = pe__add_scores(child->priority,
cons_node->weight);
pe_rsc_trace(child, "\t%s[%s]: %d -> %d (%d)",
child->id, cons_node->details->uname, child->priority,
new_priority, cons_node->weight);
child->priority = new_priority;
}
}
}
static pe_node_t *
guest_location(pe_node_t *guest_node)
{
pe_resource_t *guest = guest_node->details->remote_rsc->container;
return guest->fns->location(guest, NULL, FALSE);
}
static pe_node_t *
node_to_be_promoted_on(pe_resource_t *rsc)
{
pe_node_t *node = NULL;
pe_node_t *local_node = NULL;
pe_resource_t *parent = uber_parent(rsc);
clone_variant_data_t *clone_data = NULL;
#if 0
enum rsc_role_e role = RSC_ROLE_UNKNOWN;
role = rsc->fns->state(rsc, FALSE);
crm_info("%s role: %s", rsc->id, role2text(role));
#endif
if (rsc->children) {
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
if (node_to_be_promoted_on(child) == NULL) {
pe_rsc_trace(rsc, "Child %s of %s can't be promoted", child->id, rsc->id);
return NULL;
}
}
}
node = rsc->fns->location(rsc, NULL, FALSE);
if (node == NULL) {
pe_rsc_trace(rsc, "%s cannot be promoted: not allocated", rsc->id);
return NULL;
} else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
if (rsc->fns->state(rsc, TRUE) == RSC_ROLE_PROMOTED) {
crm_notice("Forcing unmanaged instance %s to remain promoted on %s",
rsc->id, node->details->uname);
} else {
return NULL;
}
} else if (rsc->priority < 0) {
pe_rsc_trace(rsc, "%s cannot be promoted: preference: %d",
rsc->id, rsc->priority);
return NULL;
} else if (can_run_resources(node) == FALSE) {
crm_trace("Node can't run any resources: %s", node->details->uname);
return NULL;
/* @TODO It's possible this check should be done in can_run_resources()
* instead. We should investigate all its callers to figure out whether that
* would be a good idea.
*/
} else if (pe__is_guest_node(node) && (guest_location(node) == NULL)) {
pe_rsc_trace(rsc, "%s cannot be promoted: guest %s not allocated",
rsc->id, node->details->remote_rsc->container->id);
return NULL;
}
get_clone_variant_data(clone_data, parent);
local_node = pe_hash_table_lookup(parent->allowed_nodes, node->details->id);
if (local_node == NULL) {
crm_err("%s cannot run on %s: node not allowed", rsc->id, node->details->uname);
return NULL;
} else if ((local_node->count < clone_data->promoted_node_max)
|| !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
return local_node;
} else {
pe_rsc_trace(rsc, "%s cannot be promoted on %s: node full",
rsc->id, node->details->uname);
}
return NULL;
}
static gint
sort_promotable_instance(gconstpointer a, gconstpointer b, gpointer data_set)
{
int rc;
enum rsc_role_e role1 = RSC_ROLE_UNKNOWN;
enum rsc_role_e role2 = RSC_ROLE_UNKNOWN;
const pe_resource_t *resource1 = (const pe_resource_t *)a;
const pe_resource_t *resource2 = (const pe_resource_t *)b;
CRM_ASSERT(resource1 != NULL);
CRM_ASSERT(resource2 != NULL);
role1 = resource1->fns->state(resource1, TRUE);
role2 = resource2->fns->state(resource2, TRUE);
rc = sort_rsc_index(a, b);
if (rc != 0) {
crm_trace("%s %c %s (index)", resource1->id, rc < 0 ? '<' : '>', resource2->id);
return rc;
}
if (role1 > role2) {
crm_trace("%s %c %s (role)", resource1->id, '<', resource2->id);
return -1;
} else if (role1 < role2) {
crm_trace("%s %c %s (role)", resource1->id, '>', resource2->id);
return 1;
}
return sort_clone_instance(a, b, data_set);
}
static void
promotion_order(pe_resource_t *rsc, pe_working_set_t *data_set)
{
GList *gIter = NULL;
pe_node_t *node = NULL;
pe_node_t *chosen = NULL;
clone_variant_data_t *clone_data = NULL;
char score[33];
size_t len = sizeof(score);
get_clone_variant_data(clone_data, rsc);
if (clone_data->added_promoted_constraints) {
return;
}
clone_data->added_promoted_constraints = true;
pe_rsc_trace(rsc, "Merging weights for %s", rsc->id);
pe__set_resource_flags(rsc, pe_rsc_merging);
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
pe_rsc_trace(rsc, "Sort index: %s = %d", child->id, child->sort_index);
}
pe__show_node_weights(true, rsc, "Before", rsc->allowed_nodes, data_set);
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
chosen = child->fns->location(child, NULL, FALSE);
if (chosen == NULL || child->sort_index < 0) {
pe_rsc_trace(rsc, "Skipping %s", child->id);
continue;
}
node = (pe_node_t *) pe_hash_table_lookup(rsc->allowed_nodes, chosen->details->id);
CRM_ASSERT(node != NULL);
// Add promotion preferences and rsc_location scores when role=Promoted
score2char_stack(child->sort_index, score, len);
pe_rsc_trace(rsc, "Adding %s to %s from %s", score,
node->details->uname, child->id);
node->weight = pe__add_scores(child->sort_index, node->weight);
}
pe__show_node_weights(true, rsc, "Middle", rsc->allowed_nodes, data_set);
gIter = rsc->rsc_cons;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
/* (Re-)add location preferences of resources that a promoted instance
* should/must be colocated with.
*/
if (constraint->dependent_role == RSC_ROLE_PROMOTED) {
enum pe_weights flags = constraint->score == INFINITY ? 0 : pe_weights_rollback;
pe_rsc_trace(rsc, "RHS: %s with %s: %d",
constraint->dependent->id, constraint->primary->id,
constraint->score);
rsc->allowed_nodes = constraint->primary->cmds->merge_weights(
constraint->primary, rsc->id, rsc->allowed_nodes,
constraint->node_attribute,
constraint->score / (float) INFINITY, flags);
}
}
gIter = rsc->rsc_cons_lhs;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(constraint, NULL)) {
continue;
}
/* (Re-)add location preferences of resources that wish to be colocated
* with a promoted instance.
*/
if (constraint->primary_role == RSC_ROLE_PROMOTED) {
pe_rsc_trace(rsc, "LHS: %s with %s: %d",
constraint->dependent->id, constraint->primary->id,
constraint->score);
rsc->allowed_nodes = constraint->dependent->cmds->merge_weights(
constraint->dependent, rsc->id, rsc->allowed_nodes,
constraint->node_attribute,
constraint->score / (float) INFINITY,
pe_weights_rollback|pe_weights_positive);
}
}
gIter = rsc->rsc_tickets;
for (; gIter != NULL; gIter = gIter->next) {
rsc_ticket_t *rsc_ticket = (rsc_ticket_t *) gIter->data;
if ((rsc_ticket->role_lh == RSC_ROLE_PROMOTED)
&& (rsc_ticket->ticket->granted == FALSE || rsc_ticket->ticket->standby)) {
resource_location(rsc, NULL, -INFINITY, "__stateful_without_ticket__", data_set);
}
}
pe__show_node_weights(true, rsc, "After", rsc->allowed_nodes, data_set);
/* write them back and sort */
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
chosen = child->fns->location(child, NULL, FALSE);
if (!pcmk_is_set(child->flags, pe_rsc_managed)
&& (child->next_role == RSC_ROLE_PROMOTED)) {
child->sort_index = INFINITY;
} else if (chosen == NULL || child->sort_index < 0) {
pe_rsc_trace(rsc, "%s: %d", child->id, child->sort_index);
} else {
node = (pe_node_t *) pe_hash_table_lookup(rsc->allowed_nodes, chosen->details->id);
CRM_ASSERT(node != NULL);
child->sort_index = node->weight;
}
pe_rsc_trace(rsc, "Set sort index: %s = %d", child->id, child->sort_index);
}
rsc->children = g_list_sort_with_data(rsc->children,
sort_promotable_instance, data_set);
pe__clear_resource_flags(rsc, pe_rsc_merging);
}
static gboolean
filter_anonymous_instance(pe_resource_t *rsc, const pe_node_t *node)
{
GList *rIter = NULL;
char *key = clone_strip(rsc->id);
pe_resource_t *parent = uber_parent(rsc);
for (rIter = parent->children; rIter; rIter = rIter->next) {
/* If there is an active instance on the node, only it receives the
* promotion score. Use ->find_rsc() in case this is a cloned group.
*/
pe_resource_t *child = rIter->data;
pe_resource_t *active = parent->fns->find_rsc(child, key, node, pe_find_clone|pe_find_current);
if(rsc == active) {
pe_rsc_trace(rsc, "Found %s for %s active on %s: done", active->id, key, node->details->uname);
free(key);
return TRUE;
} else if(active) {
pe_rsc_trace(rsc, "Found %s for %s on %s: not %s", active->id, key, node->details->uname, rsc->id);
free(key);
return FALSE;
} else {
pe_rsc_trace(rsc, "%s on %s: not active", key, node->details->uname);
}
}
for (rIter = parent->children; rIter; rIter = rIter->next) {
pe_resource_t *child = rIter->data;
/*
* We know it's not running, but any score will still count if
* the instance has been probed on $node
*
* Again use ->find_rsc() because we might be a cloned group
* and knowing that other members of the group are known here
* implies nothing
*/
rsc = parent->fns->find_rsc(child, key, NULL, pe_find_clone);
CRM_LOG_ASSERT(rsc);
if(rsc) {
pe_rsc_trace(rsc, "Checking %s for %s on %s", rsc->id, key, node->details->uname);
if (g_hash_table_lookup(rsc->known_on, node->details->id)) {
free(key);
return TRUE;
}
}
}
free(key);
return FALSE;
}
static const char *
lookup_promotion_score(pe_resource_t *rsc, const pe_node_t *node, const char *name)
{
const char *attr_value = NULL;
if (node && name) {
char *attr_name = pcmk_promotion_score_name(name);
attr_value = pe_node_attribute_calculated(node, attr_name, rsc);
free(attr_name);
}
return attr_value;
}
static int
promotion_score(pe_resource_t *rsc, const pe_node_t *node, int not_set_value)
{
char *name = rsc->id;
const char *attr_value = NULL;
int score = not_set_value;
pe_node_t *match = NULL;
CRM_CHECK(node != NULL, return not_set_value);
if (rsc->children) {
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
int c_score = promotion_score(child, node, not_set_value);
if (score == not_set_value) {
score = c_score;
} else {
score += c_score;
}
}
return score;
}
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)
&& filter_anonymous_instance(rsc, node)) {
pe_rsc_trace(rsc, "Anonymous clone %s is allowed on %s", rsc->id, node->details->uname);
} else if (rsc->running_on || g_hash_table_size(rsc->known_on)) {
/* If we've probed and/or started the resource anywhere, consider
* promotion scores only from nodes where we know the status. However,
* if the status of all nodes is unknown (e.g. cluster startup),
* skip this code, to make sure we take into account any permanent
* promotion scores set previously.
*/
pe_node_t *known = pe_hash_table_lookup(rsc->known_on, node->details->id);
match = pe_find_node_id(rsc->running_on, node->details->id);
if ((match == NULL) && (known == NULL)) {
pe_rsc_trace(rsc, "skipping %s (aka. %s) promotion score on %s because inactive",
rsc->id, rsc->clone_name, node->details->uname);
return score;
}
}
match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (match == NULL) {
return score;
} else if (match->weight < 0) {
pe_rsc_trace(rsc, "%s on %s has score: %d - ignoring",
rsc->id, match->details->uname, match->weight);
return score;
}
if (rsc->clone_name) {
/* Use the name the lrm knows this resource as,
* since that's what crm_attribute --promotion would have used
*/
name = rsc->clone_name;
}
attr_value = lookup_promotion_score(rsc, node, name);
pe_rsc_trace(rsc, "promotion score for %s on %s = %s",
name, node->details->uname, crm_str(attr_value));
if ((attr_value == NULL) && !pcmk_is_set(rsc->flags, pe_rsc_unique)) {
/* If we don't have any LRM history yet, we won't have clone_name -- in
* that case, for anonymous clones, try the resource name without any
* instance number.
*/
name = clone_strip(rsc->id);
if (strcmp(rsc->id, name)) {
attr_value = lookup_promotion_score(rsc, node, name);
pe_rsc_trace(rsc, "stripped promotion score for %s on %s = %s",
name, node->details->uname, crm_str(attr_value));
}
free(name);
}
if (attr_value != NULL) {
score = char2score(attr_value);
}
return score;
}
void
pcmk__add_promotion_scores(pe_resource_t *rsc)
{
int score, new_score;
GList *gIter = rsc->children;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
if (clone_data->added_promotion_scores) {
/* Make sure we only do this once */
return;
}
clone_data->added_promotion_scores = true;
for (; gIter != NULL; gIter = gIter->next) {
GHashTableIter iter;
pe_node_t *node = NULL;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
g_hash_table_iter_init(&iter, child_rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (can_run_resources(node) == FALSE) {
/* This node will never be promoted, so don't apply the
* promotion score, as that may lead to clone shuffling.
*/
continue;
}
score = promotion_score(child_rsc, node, 0);
if (score > 0) {
new_score = pe__add_scores(node->weight, score);
if (new_score != node->weight) {
pe_rsc_trace(rsc, "\t%s: Updating preference for %s (%d->%d)",
child_rsc->id, node->details->uname, node->weight, new_score);
node->weight = new_score;
}
}
new_score = QB_MAX(child_rsc->priority, score);
if (new_score != child_rsc->priority) {
pe_rsc_trace(rsc, "\t%s: Updating priority (%d->%d)",
child_rsc->id, child_rsc->priority, new_score);
child_rsc->priority = new_score;
}
}
}
}
static void
set_role_unpromoted(pe_resource_t *rsc, bool current)
{
GList *gIter = rsc->children;
if (current) {
if (rsc->role == RSC_ROLE_STARTED) {
rsc->role = RSC_ROLE_UNPROMOTED;
}
} else {
GList *allocated = NULL;
rsc->fns->location(rsc, &allocated, FALSE);
pe__set_next_role(rsc, (allocated? RSC_ROLE_UNPROMOTED : RSC_ROLE_STOPPED),
"unpromoted instance");
g_list_free(allocated);
}
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
set_role_unpromoted(child_rsc, current);
}
}
static void
set_role_promoted(pe_resource_t *rsc, gpointer user_data)
{
if (rsc->next_role == RSC_ROLE_UNKNOWN) {
pe__set_next_role(rsc, RSC_ROLE_PROMOTED, "promoted instance");
}
g_list_foreach(rsc->children, (GFunc) set_role_promoted, NULL);
}
pe_node_t *
pcmk__set_instance_roles(pe_resource_t *rsc, pe_working_set_t *data_set)
{
int promoted = 0;
GList *gIter = NULL;
GList *gIter2 = NULL;
GHashTableIter iter;
pe_node_t *node = NULL;
pe_node_t *chosen = NULL;
enum rsc_role_e next_role = RSC_ROLE_UNKNOWN;
char score[33];
size_t len = sizeof(score);
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
// Repurpose count to track the number of promoted instances allocated
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
node->count = 0;
}
/*
* assign priority
*/
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
GList *list = NULL;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_rsc_trace(rsc, "Assigning priority for %s: %s", child_rsc->id,
role2text(child_rsc->next_role));
if (child_rsc->fns->state(child_rsc, TRUE) == RSC_ROLE_STARTED) {
set_role_unpromoted(child_rsc, true);
}
chosen = child_rsc->fns->location(child_rsc, &list, FALSE);
if (pcmk__list_of_multiple(list)) {
pcmk__config_err("Cannot promote non-colocated child %s",
child_rsc->id);
}
g_list_free(list);
if (chosen == NULL) {
continue;
}
next_role = child_rsc->fns->state(child_rsc, FALSE);
switch (next_role) {
case RSC_ROLE_STARTED:
case RSC_ROLE_UNKNOWN:
/*
* Default to -1 if no value is set
*
* This allows instances eligible for promotion to be specified
* based solely on rsc_location constraints,
* but prevents anyone from being promoted if
* neither a constraint nor a promotion score is present
*/
child_rsc->priority = promotion_score(child_rsc, chosen, -1);
break;
case RSC_ROLE_UNPROMOTED:
case RSC_ROLE_STOPPED:
child_rsc->priority = -INFINITY;
break;
case RSC_ROLE_PROMOTED:
/* We will arrive here if we're re-creating actions after a stonith
*/
break;
default:
CRM_CHECK(FALSE /* unhandled */ ,
crm_err("Unknown resource role: %d for %s", next_role, child_rsc->id));
}
apply_promoted_location(child_rsc, child_rsc->rsc_location, chosen);
apply_promoted_location(child_rsc, rsc->rsc_location, chosen);
for (gIter2 = child_rsc->rsc_cons; gIter2 != NULL; gIter2 = gIter2->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter2->data;
child_rsc->cmds->rsc_colocation_lh(child_rsc, cons->primary, cons,
data_set);
}
child_rsc->sort_index = child_rsc->priority;
pe_rsc_trace(rsc, "Assigning priority for %s: %d", child_rsc->id, child_rsc->priority);
if (next_role == RSC_ROLE_PROMOTED) {
child_rsc->sort_index = INFINITY;
}
}
pe__show_node_weights(true, rsc, "Pre merge", rsc->allowed_nodes, data_set);
promotion_order(rsc, data_set);
// Choose the first N eligible instances to be promoted
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
score2char_stack(child_rsc->sort_index, score, len);
chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
if (pcmk_is_set(data_set->flags, pe_flag_show_scores) && !pcmk__is_daemon) {
if (data_set->priv != NULL) {
pcmk__output_t *out = data_set->priv;
out->message(out, "promotion-score", child_rsc, chosen, score);
}
} else {
pe_rsc_trace(rsc, "%s promotion score on %s: %s", child_rsc->id,
(chosen? chosen->details->uname : "none"), score);
}
chosen = NULL; /* nuke 'chosen' so that we don't promote more than the
* required number of instances
*/
if (child_rsc->sort_index < 0) {
pe_rsc_trace(rsc, "Not supposed to promote child: %s", child_rsc->id);
} else if ((promoted < clone_data->promoted_max)
|| !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
chosen = node_to_be_promoted_on(child_rsc);
}
pe_rsc_debug(rsc, "%s promotion score: %d", child_rsc->id, child_rsc->priority);
if (chosen == NULL) {
set_role_unpromoted(child_rsc, false);
continue;
} else if ((child_rsc->role < RSC_ROLE_PROMOTED)
&& !pcmk_is_set(data_set->flags, pe_flag_have_quorum)
&& data_set->no_quorum_policy == no_quorum_freeze) {
crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze",
child_rsc->id, role2text(child_rsc->role), role2text(child_rsc->next_role));
set_role_unpromoted(child_rsc, false);
continue;
}
chosen->count++;
pe_rsc_info(rsc, "Promoting %s (%s %s)",
child_rsc->id, role2text(child_rsc->role), chosen->details->uname);
set_role_promoted(child_rsc, NULL);
promoted++;
}
pe_rsc_info(rsc, "%s: Promoted %d instances of a possible %d",
rsc->id, promoted, clone_data->promoted_max);
return NULL;
}
void
create_promotable_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
{
pe_action_t *action = NULL;
GList *gIter = rsc->children;
pe_action_t *action_complete = NULL;
gboolean any_promoting = FALSE;
gboolean any_demoting = FALSE;
pe_resource_t *last_promote_rsc = NULL;
pe_resource_t *last_demote_rsc = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
pe_rsc_debug(rsc, "Creating actions for %s", rsc->id);
for (; gIter != NULL; gIter = gIter->next) {
gboolean child_promoting = FALSE;
gboolean child_demoting = FALSE;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_rsc_trace(rsc, "Creating actions for %s", child_rsc->id);
child_rsc->cmds->create_actions(child_rsc, data_set);
check_promotable_actions(child_rsc, &child_demoting, &child_promoting);
any_demoting = any_demoting || child_demoting;
any_promoting = any_promoting || child_promoting;
pe_rsc_trace(rsc, "Created actions for %s: %d %d", child_rsc->id, child_promoting,
child_demoting);
}
/* promote */
- action = create_pseudo_resource_op(rsc, RSC_PROMOTE, !any_promoting, TRUE, data_set);
- action_complete = create_pseudo_resource_op(rsc, RSC_PROMOTED, !any_promoting, TRUE, data_set);
+ action = pcmk__new_rsc_pseudo_action(rsc, RSC_PROMOTE, !any_promoting,
+ true);
+ action_complete = pcmk__new_rsc_pseudo_action(rsc, RSC_PROMOTED,
+ !any_promoting, true);
action_complete->priority = INFINITY;
child_promoting_constraints(clone_data, pe_order_optional,
rsc, NULL, last_promote_rsc, data_set);
if (clone_data->promote_notify == NULL) {
clone_data->promote_notify =
create_notification_boundaries(rsc, RSC_PROMOTE, action, action_complete, data_set);
}
/* demote */
- action = create_pseudo_resource_op(rsc, RSC_DEMOTE, !any_demoting, TRUE, data_set);
- action_complete = create_pseudo_resource_op(rsc, RSC_DEMOTED, !any_demoting, TRUE, data_set);
+ action = pcmk__new_rsc_pseudo_action(rsc, RSC_DEMOTE, !any_demoting, true);
+ action_complete = pcmk__new_rsc_pseudo_action(rsc, RSC_DEMOTED,
+ !any_demoting, true);
action_complete->priority = INFINITY;
child_demoting_constraints(clone_data, pe_order_optional, rsc, NULL, last_demote_rsc, data_set);
if (clone_data->demote_notify == NULL) {
clone_data->demote_notify =
create_notification_boundaries(rsc, RSC_DEMOTE, action, action_complete, data_set);
if (clone_data->promote_notify) {
/* If we ever wanted groups to have notifications we'd need to move this to native_internal_constraints() one day
* Requires exposing *_notify
*/
order_actions(clone_data->stop_notify->post_done, clone_data->promote_notify->pre,
pe_order_optional);
order_actions(clone_data->start_notify->post_done, clone_data->promote_notify->pre,
pe_order_optional);
order_actions(clone_data->demote_notify->post_done, clone_data->promote_notify->pre,
pe_order_optional);
order_actions(clone_data->demote_notify->post_done, clone_data->start_notify->pre,
pe_order_optional);
order_actions(clone_data->demote_notify->post_done, clone_data->stop_notify->pre,
pe_order_optional);
}
}
/* restore the correct priority */
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->priority = rsc->priority;
}
}
void
promote_demote_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
{
/* global stopped before start */
pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START,
pe_order_optional, data_set);
/* global stopped before promote */
pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_PROMOTE,
pe_order_optional, data_set);
/* global demoted before start */
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_START,
pe_order_optional, data_set);
/* global started before promote */
pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE,
pe_order_optional, data_set);
/* global demoted before stop */
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP,
pe_order_optional, data_set);
/* global demote before demoted */
pcmk__order_resource_actions(rsc, RSC_DEMOTE, rsc, RSC_DEMOTED,
pe_order_optional, data_set);
/* global demoted before promote */
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_PROMOTE,
pe_order_optional, data_set);
}
void
promotable_constraints(pe_resource_t * rsc, pe_working_set_t * data_set)
{
GList *gIter = rsc->children;
pe_resource_t *last_rsc = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
promote_demote_constraints(rsc, data_set);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
/* child demote before promote */
pcmk__order_resource_actions(child_rsc, RSC_DEMOTE, child_rsc,
RSC_PROMOTE, pe_order_optional, data_set);
child_promoting_constraints(clone_data, pe_order_optional,
rsc, child_rsc, last_rsc, data_set);
child_demoting_constraints(clone_data, pe_order_optional,
rsc, child_rsc, last_rsc, data_set);
last_rsc = child_rsc;
}
}
static void
node_hash_update_one(GHashTable * hash, pe_node_t * other, const char *attr, int score)
{
GHashTableIter iter;
pe_node_t *node = NULL;
const char *value = NULL;
if (other == NULL) {
return;
} else if (attr == NULL) {
attr = CRM_ATTR_UNAME;
}
value = pe_node_attribute_raw(other, attr);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
const char *tmp = pe_node_attribute_raw(node, attr);
if (pcmk__str_eq(value, tmp, pcmk__str_casei)) {
crm_trace("%s: %d + %d", node->details->uname, node->weight, other->weight);
node->weight = pe__add_scores(node->weight, score);
}
}
}
void
promotable_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
GList *gIter = NULL;
if (pcmk_is_set(dependent->flags, pe_rsc_provisional)) {
GList *affected_nodes = NULL;
for (gIter = primary->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, FALSE);
pe_rsc_trace(primary, "Processing: %s", child_rsc->id);
if ((chosen != NULL) && (next_role == constraint->primary_role)) {
pe_rsc_trace(primary, "Applying: %s %s %s %d", child_rsc->id,
role2text(next_role), chosen->details->uname, constraint->score);
if (constraint->score < INFINITY) {
node_hash_update_one(dependent->allowed_nodes, chosen,
constraint->node_attribute, constraint->score);
}
affected_nodes = g_list_prepend(affected_nodes, chosen);
}
}
/* Only do this if it's not a promoted-with-promoted colocation. Doing
* this unconditionally would prevent unpromoted instances from being
* started.
*/
if ((constraint->dependent_role != RSC_ROLE_PROMOTED)
|| (constraint->primary_role != RSC_ROLE_PROMOTED)) {
if (constraint->score >= INFINITY) {
node_list_exclude(dependent->allowed_nodes, affected_nodes,
TRUE);
}
}
g_list_free(affected_nodes);
} else if (constraint->dependent_role == RSC_ROLE_PROMOTED) {
pe_resource_t *primary_instance;
primary_instance = find_compatible_child(dependent, primary,
constraint->primary_role,
FALSE, data_set);
if ((primary_instance == NULL) && (constraint->score >= INFINITY)) {
pe_rsc_trace(dependent, "%s can't be promoted %s",
dependent->id, constraint->id);
dependent->priority = -INFINITY;
} else if (primary_instance != NULL) {
int new_priority = pe__add_scores(dependent->priority,
constraint->score);
pe_rsc_debug(dependent, "Applying %s to %s",
constraint->id, dependent->id);
pe_rsc_debug(dependent, "\t%s: %d->%d",
dependent->id, dependent->priority, new_priority);
dependent->priority = new_priority;
}
}
return;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Nov 23, 3:52 AM (57 m, 11 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1010192
Default Alt Text
(179 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment