Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/include/pcmki/pcmki_sched_utils.h b/include/pcmki/pcmki_sched_utils.h
index f19fa56b79..e8d619588a 100644
--- a/include/pcmki/pcmki_sched_utils.h
+++ b/include/pcmki/pcmki_sched_utils.h
@@ -1,70 +1,69 @@
/*
* Copyright 2004-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PENGINE_AUTILS__H
# define PENGINE_AUTILS__H
#include <stdbool.h> // bool
#include <glib.h> // GList, GHashTable, gboolean, guint
#include <crm/lrmd.h> // lrmd_event_data_t
#include <crm/cib.h> // cib_t
#include <crm/pengine/pe_types.h>
#include <crm/pengine/internal.h>
#include <pcmki/pcmki_scheduler.h>
#include <pcmki/pcmki_transition.h>
#include <pacemaker.h>
/* Constraint helper functions */
pcmk__colocation_t *invert_constraint(pcmk__colocation_t *constraint);
pe__location_t *copy_constraint(pe__location_t *constraint);
GHashTable *pcmk__copy_node_table(GHashTable *nodes);
GList *pcmk__copy_node_list(const GList *list, bool reset);
GList *sort_nodes_by_weight(GList *nodes, pe_node_t *active_node,
pe_working_set_t *data_set);
extern gboolean can_run_resources(const pe_node_t * node);
gboolean can_run_any(GHashTable * nodes);
pe_resource_t *find_compatible_child(pe_resource_t *local_child,
pe_resource_t *rsc, enum rsc_role_e filter,
gboolean current,
pe_working_set_t *data_set);
pe_resource_t *find_compatible_child_by_node(pe_resource_t * local_child, pe_node_t * local_node, pe_resource_t * rsc,
enum rsc_role_e filter, gboolean current);
gboolean is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current);
enum pe_action_flags summary_action_flags(pe_action_t * action, GList *children, pe_node_t * node);
enum action_tasks clone_child_action(pe_action_t * action);
int copies_per_node(pe_resource_t * rsc);
extern int compare_capacity(const pe_node_t * node1, const pe_node_t * node2);
extern void calculate_utilization(GHashTable * current_utilization,
GHashTable * utilization, gboolean plus);
extern void process_utilization(pe_resource_t * rsc, pe_node_t ** prefer, pe_working_set_t * data_set);
-pe_action_t *create_pseudo_resource_op(pe_resource_t * rsc, const char *task, bool optional, bool runnable, pe_working_set_t *data_set);
pe_action_t *pe_cancel_op(pe_resource_t *rsc, const char *name,
guint interval_ms, pe_node_t *node,
pe_working_set_t *data_set);
pe_action_t *sched_shutdown_op(pe_node_t *node, pe_working_set_t *data_set);
xmlNode *pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *event,
const char *caller_version, int target_rc,
const char *node, const char *origin,
int level);
# define LOAD_STOPPED "load_stopped"
void modify_configuration(pe_working_set_t *data_set, cib_t *cib,
pcmk_injections_t *injections);
enum transition_status run_simulation(pe_working_set_t * data_set, cib_t *cib, GList *op_fail_list);
#endif
diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h
index db8ae11c35..f58ea53ff0 100644
--- a/lib/pacemaker/libpacemaker_private.h
+++ b/lib/pacemaker/libpacemaker_private.h
@@ -1,225 +1,229 @@
/*
* Copyright 2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__LIBPACEMAKER_PRIVATE__H
# define PCMK__LIBPACEMAKER_PRIVATE__H
/* This header is for the sole use of libpacemaker, so that functions can be
* declared with G_GNUC_INTERNAL for efficiency.
*/
#include <crm/pengine/pe_types.h> // pe_action_t, pe_node_t, pe_working_set_t
// Actions
G_GNUC_INTERNAL
void pcmk__update_action_for_orderings(pe_action_t *action,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__log_action(const char *pre_text, pe_action_t *action, bool details);
+G_GNUC_INTERNAL
+pe_action_t *pcmk__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task,
+ bool optional, bool runnable);
+
G_GNUC_INTERNAL
bool pcmk__graph_has_loop(pe_action_t *init_action, pe_action_t *action,
pe_action_wrapper_t *input);
G_GNUC_INTERNAL
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *action, enum pe_ordering order,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__fence_guest(pe_node_t *node, pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__node_unfenced(pe_node_t *node);
G_GNUC_INTERNAL
bool pcmk__is_unfence_device(const pe_resource_t *rsc,
const pe_working_set_t *data_set);
G_GNUC_INTERNAL
pe_resource_t *pcmk__find_constraint_resource(GList *rsc_list, const char *id);
G_GNUC_INTERNAL
xmlNode *pcmk__expand_tags_in_sets(xmlNode *xml_obj,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__valid_resource_or_tag(pe_working_set_t *data_set, const char *id,
pe_resource_t **rsc, pe_tag_t **tag);
G_GNUC_INTERNAL
bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr,
bool convert_rsc, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__create_internal_constraints(pe_working_set_t *data_set);
// Location constraints
G_GNUC_INTERNAL
void pcmk__unpack_location(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
pe__location_t *pcmk__new_location(const char *id, pe_resource_t *rsc,
int node_weight, const char *discover_mode,
pe_node_t *foo_node,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_locations(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_location(pe__location_t *constraint, pe_resource_t *rsc);
// Colocation constraints
enum pcmk__coloc_affects {
pcmk__coloc_affects_nothing = 0,
pcmk__coloc_affects_location,
pcmk__coloc_affects_role,
};
G_GNUC_INTERNAL
enum pcmk__coloc_affects pcmk__colocation_affects(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint,
bool preview);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_weights(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint);
G_GNUC_INTERNAL
void pcmk__apply_coloc_to_priority(pe_resource_t *dependent,
pe_resource_t *primary,
pcmk__colocation_t *constraint);
G_GNUC_INTERNAL
void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__new_colocation(const char *id, const char *node_attr, int score,
pe_resource_t *dependent, pe_resource_t *primary,
const char *dependent_role, const char *primary_role,
bool influence, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__block_colocated_starts(pe_action_t *action,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__new_ordering(pe_resource_t *lh_rsc, char *lh_task,
pe_action_t *lh_action, pe_resource_t *rh_rsc,
char *rh_task, pe_action_t *rh_action,
enum pe_ordering type, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__disable_invalid_orderings(pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_stops_before_shutdown(pe_node_t *node,
pe_action_t *shutdown_op,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__apply_orderings(pe_working_set_t *data_set);
/*!
* \internal
* \brief Create a new ordering between two resource actions
*
* \param[in] lh_rsc Resource for 'first' action
* \param[in] rh_rsc Resource for 'then' action
* \param[in] lh_task Action key for 'first' action
* \param[in] rh_task Action key for 'then' action
* \param[in] flags Bitmask of enum pe_ordering flags
* \param[in] data_set Cluster working set to add ordering to
*/
#define pcmk__order_resource_actions(lh_rsc, lh_task, rh_rsc, rh_task, \
flags, data_set) \
pcmk__new_ordering((lh_rsc), pcmk__op_key((lh_rsc)->id, (lh_task), 0), \
NULL, \
(rh_rsc), pcmk__op_key((rh_rsc)->id, (rh_task), 0), \
NULL, (flags), (data_set))
#define pcmk__order_starts(rsc1, rsc2, type, data_set) \
pcmk__order_resource_actions((rsc1), CRMD_ACTION_START, \
(rsc2), CRMD_ACTION_START, (type), (data_set))
#define pcmk__order_stops(rsc1, rsc2, type, data_set) \
pcmk__order_resource_actions((rsc1), CRMD_ACTION_STOP, \
(rsc2), CRMD_ACTION_STOP, (type), (data_set))
G_GNUC_INTERNAL
void pcmk__unpack_rsc_ticket(xmlNode *xml_obj, pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__order_probes(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__is_failed_remote_node(pe_node_t *node);
G_GNUC_INTERNAL
void pcmk__order_remote_connection_actions(pe_working_set_t *data_set);
G_GNUC_INTERNAL
bool pcmk__rsc_corresponds_to_guest(pe_resource_t *rsc, pe_node_t *node);
G_GNUC_INTERNAL
pe_node_t *pcmk__connection_host_for_action(pe_action_t *action);
G_GNUC_INTERNAL
void pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params,
pe_working_set_t *data_set);
G_GNUC_INTERNAL
void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, pe_action_t *action);
// Groups (pcmk_sched_group.c)
G_GNUC_INTERNAL
GList *pcmk__group_colocated_resources(pe_resource_t *rsc,
pe_resource_t *orig_rsc,
GList *colocated_rscs);
// Functions applying to more than one variant (pcmk_sched_resource.c)
G_GNUC_INTERNAL
GList *pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
GList *colocated_rscs);
G_GNUC_INTERNAL
bool pcmk__assign_primitive(pe_resource_t *rsc, pe_node_t *chosen, bool force);
G_GNUC_INTERNAL
bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force);
G_GNUC_INTERNAL
void pcmk__unassign_resource(pe_resource_t *rsc);
G_GNUC_INTERNAL
bool pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node,
pe_working_set_t *data_set,
pe_resource_t **failed);
#endif // PCMK__LIBPACEMAKER_PRIVATE__H
diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c
index 2064398aa9..a5d9abf144 100644
--- a/lib/pacemaker/pcmk_sched_actions.c
+++ b/lib/pacemaker/pcmk_sched_actions.c
@@ -1,767 +1,781 @@
/*
* Copyright 2004-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <sys/param.h>
#include <glib.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Get the action flags relevant to ordering constraints
*
* \param[in] action Action to check
* \param[in] node Node that *other* action in the ordering is on
* (used only for clone resource actions)
*
* \return Action flags that should be used for orderings
*/
static enum pe_action_flags
action_flags_for_ordering(pe_action_t *action, pe_node_t *node)
{
bool runnable = false;
enum pe_action_flags flags;
// For non-resource actions, return the action flags
if (action->rsc == NULL) {
return action->flags;
}
/* For non-clone resources, or a clone action not assigned to a node,
* return the flags as determined by the resource method without a node
* specified.
*/
flags = action->rsc->cmds->action_flags(action, NULL);
if ((node == NULL) || !pe_rsc_is_clone(action->rsc)) {
return flags;
}
/* Otherwise (i.e., for clone resource actions on a specific node), first
* remember whether the non-node-specific action is runnable.
*/
runnable = pcmk_is_set(flags, pe_action_runnable);
// Then recheck the resource method with the node
flags = action->rsc->cmds->action_flags(action, node);
/* For clones in ordering constraints, the node-specific "runnable" doesn't
* matter, just the non-node-specific setting (i.e., is the action runnable
* anywhere).
*
* This applies only to runnable, and only for ordering constraints. This
* function shouldn't be used for other types of constraints without
* changes. Not very satisfying, but it's logical and appears to work well.
*/
if (runnable && !pcmk_is_set(flags, pe_action_runnable)) {
pe__set_raw_action_flags(flags, action->rsc->id,
pe_action_runnable);
}
return flags;
}
/*!
* \internal
* \brief Get action UUID that should be used with a resource ordering
*
* When an action is ordered relative to an action for a collective resource
* (clone, group, or bundle), it actually needs to be ordered after all
* instances of the collective have completed the relevant action (for example,
* given "start CLONE then start RSC", RSC must wait until all instances of
* CLONE have started). Given the UUID and resource of the first action in an
* ordering, this returns the UUID of the action that should actually be used
* for ordering (for example, "CLONE_started_0" instead of "CLONE_start_0").
*
* \param[in] first_uuid UUID of first action in ordering
* \param[in] first_rsc Resource of first action in ordering
*
* \return Newly allocated copy of UUID to use with ordering
* \note It is the caller's responsibility to free the return value.
*/
static char *
action_uuid_for_ordering(const char *first_uuid, pe_resource_t *first_rsc)
{
guint interval_ms = 0;
char *uuid = NULL;
char *rid = NULL;
char *first_task_str = NULL;
enum action_tasks first_task = no_action;
enum action_tasks remapped_task = no_action;
// Only non-notify actions for collective resources need remapping
if ((strstr(first_uuid, "notify") != NULL)
|| (first_rsc->variant < pe_group)) {
goto done;
}
// Only non-recurring actions need remapping
CRM_ASSERT(parse_op_key(first_uuid, &rid, &first_task_str, &interval_ms));
if (interval_ms > 0) {
goto done;
}
first_task = text2task(first_task_str);
switch (first_task) {
case stop_rsc:
case start_rsc:
case action_notify:
case action_promote:
case action_demote:
remapped_task = first_task + 1;
break;
case stopped_rsc:
case started_rsc:
case action_notified:
case action_promoted:
case action_demoted:
remapped_task = first_task;
break;
case monitor_rsc:
case shutdown_crm:
case stonith_node:
break;
default:
crm_err("Unknown action '%s' in ordering", first_task_str);
break;
}
if (remapped_task != no_action) {
/* If a (clone) resource has notifications enabled, we want to order
* relative to when all notifications have been sent for the remapped
* task. Only outermost resources or those in bundles have
* notifications.
*/
if (pcmk_is_set(first_rsc->flags, pe_rsc_notify)
&& ((first_rsc->parent == NULL)
|| (pe_rsc_is_clone(first_rsc)
&& (first_rsc->parent->variant == pe_container)))) {
uuid = pcmk__notify_key(rid, "confirmed-post",
task2text(remapped_task));
} else {
uuid = pcmk__op_key(rid, task2text(remapped_task), 0);
}
pe_rsc_trace(first_rsc,
"Remapped action UUID %s to %s for ordering purposes",
first_uuid, uuid);
}
done:
if (uuid == NULL) {
uuid = strdup(first_uuid);
CRM_ASSERT(uuid != NULL);
}
free(first_task_str);
free(rid);
return uuid;
}
/*!
* \internal
* \brief Get actual action that should be used with an ordering
*
* When an action is ordered relative to an action for a collective resource
* (clone, group, or bundle), it actually needs to be ordered after all
* instances of the collective have completed the relevant action (for example,
* given "start CLONE then start RSC", RSC must wait until all instances of
* CLONE have started). Given the first action in an ordering, this returns the
* the action that should actually be used for ordering (for example, the
* started action instead of the start action).
*
* \param[in] action First action in an ordering
*
* \return Actual action that should be used for the ordering
*/
static pe_action_t *
action_for_ordering(pe_action_t *action)
{
pe_action_t *result = action;
pe_resource_t *rsc = action->rsc;
if ((rsc != NULL) && (rsc->variant >= pe_group) && (action->uuid != NULL)) {
char *uuid = action_uuid_for_ordering(action->uuid, rsc);
result = find_first_action(rsc->actions, uuid, NULL, NULL);
if (result == NULL) {
crm_warn("Not remapping %s to %s because %s does not have "
"remapped action", action->uuid, uuid, rsc->id);
result = action;
}
free(uuid);
}
return result;
}
/*!
* \internal
* \brief Update flags for ordering's actions appropriately for ordering's flags
*
* \param[in] first First action in an ordering
* \param[in] then Then action in an ordering
* \param[in] first_flags Action flags for \p first for ordering purposes
* \param[in] then_flags Action flags for \p then for ordering purposes
* \param[in] order Action wrapper for \p first in ordering
* \param[in] data_set Cluster working set
*
* \return Mask of pe_graph_updated_first and/or pe_graph_updated_then
*/
static enum pe_graph_flags
update_action_for_ordering_flags(pe_action_t *first, pe_action_t *then,
enum pe_action_flags first_flags,
enum pe_action_flags then_flags,
pe_action_wrapper_t *order,
pe_working_set_t *data_set)
{
enum pe_graph_flags changed = pe_graph_none;
/* The node will only be used for clones. If interleaved, node will be NULL,
* otherwise the ordering scope will be limited to the node. Normally, the
* whole 'then' clone should restart if 'first' is restarted, so then->node
* is needed.
*/
pe_node_t *node = then->node;
if (pcmk_is_set(order->type, pe_order_implies_then_on_node)) {
/* For unfencing, only instances of 'then' on the same node as 'first'
* (the unfencing operation) should restart, so reset node to
* first->node, at which point this case is handled like a normal
* pe_order_implies_then.
*/
pe__clear_order_flags(order->type, pe_order_implies_then_on_node);
pe__set_order_flags(order->type, pe_order_implies_then);
node = first->node;
pe_rsc_trace(then->rsc,
"%s then %s: mapped pe_order_implies_then_on_node to "
"pe_order_implies_then on %s",
first->uuid, then->uuid, node->details->uname);
}
if (pcmk_is_set(order->type, pe_order_implies_then)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags & pe_action_optional,
pe_action_optional,
pe_order_implies_then,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_optional)
&& pcmk_is_set(then->flags, pe_action_optional)) {
pe__clear_action_flags(then, pe_action_optional);
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_then",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_restart) && (then->rsc != NULL)) {
enum pe_action_flags restart = pe_action_optional|pe_action_runnable;
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, restart,
pe_order_restart, data_set);
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_restart",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_implies_first)) {
if (first->rsc != NULL) {
changed |= first->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_optional,
pe_order_implies_first,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_optional)
&& pcmk_is_set(first->flags, pe_action_runnable)) {
pe__clear_action_flags(first, pe_action_runnable);
pe__set_graph_flags(changed, first, pe_graph_updated_first);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_first",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_promoted_implies_first)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags & pe_action_optional,
pe_action_optional,
pe_order_promoted_implies_first,
data_set);
}
pe_rsc_trace(then->rsc,
"%s then %s: %s after pe_order_promoted_implies_first",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_one_or_more)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_one_or_more,
data_set);
} else if (pcmk_is_set(first_flags, pe_action_runnable)) {
// We have another runnable instance of "first"
then->runnable_before++;
/* Mark "then" as runnable if it requires a certain number of
* "before" instances to be runnable, and they now are.
*/
if ((then->runnable_before >= then->required_runnable_before)
&& !pcmk_is_set(then->flags, pe_action_runnable)) {
pe__set_action_flags(then, pe_action_runnable);
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_one_or_more",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_probe) && (then->rsc != NULL)) {
if (!pcmk_is_set(first_flags, pe_action_runnable)
&& (first->rsc->running_on != NULL)) {
pe_rsc_trace(then->rsc,
"%s then %s: ignoring because first is stopping",
first->uuid, then->uuid);
order->type = pe_order_none;
} else {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_runnable_left,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_probe",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_runnable_left)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_runnable_left,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_runnable)
&& pcmk_is_set(then->flags, pe_action_runnable)) {
pe__clear_action_flags(then, pe_action_runnable);
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_runnable_left",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_implies_first_migratable)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags, pe_action_optional,
pe_order_implies_first_migratable, data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after "
"pe_order_implies_first_migratable",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_pseudo_left)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_optional,
pe_order_pseudo_left,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_pseudo_left",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_optional)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_optional,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_optional",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_asymmetrical)) {
if (then->rsc != NULL) {
changed |= then->rsc->cmds->update_actions(first, then, node,
first_flags,
pe_action_runnable,
pe_order_asymmetrical,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_asymmetrical",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(first->flags, pe_action_runnable)
&& pcmk_is_set(order->type, pe_order_implies_then_printed)
&& !pcmk_is_set(first_flags, pe_action_optional)) {
pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
then->uuid, first->uuid);
pe__set_action_flags(then, pe_action_print_always);
// Don't bother marking 'then' as changed just for this
}
if (pcmk_is_set(order->type, pe_order_implies_first_printed)
&& !pcmk_is_set(then_flags, pe_action_optional)) {
pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
first->uuid, then->uuid);
pe__set_action_flags(first, pe_action_print_always);
// Don't bother marking 'first' as changed just for this
}
if (pcmk_any_flags_set(order->type, pe_order_implies_then
|pe_order_implies_first
|pe_order_restart)
&& (first->rsc != NULL)
&& !pcmk_is_set(first->rsc->flags, pe_rsc_managed)
&& pcmk_is_set(first->rsc->flags, pe_rsc_block)
&& !pcmk_is_set(first->flags, pe_action_runnable)
&& pcmk__str_eq(first->task, RSC_STOP, pcmk__str_casei)) {
if (pcmk_is_set(then->flags, pe_action_runnable)) {
pe__clear_action_flags(then, pe_action_runnable);
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after checking whether first "
"is blocked, unmanaged, unrunnable stop",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
return changed;
}
// Convenience macros for logging action properties
#define action_type_str(flags) \
(pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action")
#define action_optional_str(flags) \
(pcmk_is_set((flags), pe_action_optional)? "optional" : "required")
#define action_runnable_str(flags) \
(pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable")
#define action_node_str(a) \
(((a)->node == NULL)? "no node" : (a)->node->details->uname)
/*!
* \internal
* \brief Update an action's flags for all orderings where it is "then"
*
* \param[in] then Action to update
* \param[in] data_set Cluster working set
*/
void
pcmk__update_action_for_orderings(pe_action_t *then, pe_working_set_t *data_set)
{
GList *lpc = NULL;
enum pe_graph_flags changed = pe_graph_none;
int last_flags = then->flags;
pe_rsc_trace(then->rsc, "Updating %s %s (%s %s) on %s",
action_type_str(then->flags), then->uuid,
action_optional_str(then->flags),
action_runnable_str(then->flags), action_node_str(then));
if (pcmk_is_set(then->flags, pe_action_requires_any)) {
/* Initialize current known "runnable before" actions. As
* update_action_for_ordering_flags() is called for each of then's
* before actions, this number will increment as runnable 'first'
* actions are encountered.
*/
then->runnable_before = 0;
if (then->required_runnable_before == 0) {
/* @COMPAT This ordering constraint uses the deprecated
* "require-all=false" attribute. Treat it like "clone-min=1".
*/
then->required_runnable_before = 1;
}
/* The pe_order_one_or_more clause of update_action_for_ordering_flags()
* (called below) will reset runnable if appropriate.
*/
pe__clear_action_flags(then, pe_action_runnable);
}
for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
pe_action_t *first = other->action;
pe_node_t *then_node = then->node;
pe_node_t *first_node = first->node;
if ((first->rsc != NULL)
&& (first->rsc->variant == pe_group)
&& pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) {
first_node = first->rsc->fns->location(first->rsc, NULL, FALSE);
if (first_node != NULL) {
pe_rsc_trace(first->rsc, "Found node %s for 'first' %s",
first_node->details->uname, first->uuid);
}
}
if ((then->rsc != NULL)
&& (then->rsc->variant == pe_group)
&& pcmk__str_eq(then->task, RSC_START, pcmk__str_casei)) {
then_node = then->rsc->fns->location(then->rsc, NULL, FALSE);
if (then_node != NULL) {
pe_rsc_trace(then->rsc, "Found node %s for 'then' %s",
then_node->details->uname, then->uuid);
}
}
// Disable constraint if it only applies when on same node, but isn't
if (pcmk_is_set(other->type, pe_order_same_node)
&& (first_node != NULL) && (then_node != NULL)
&& (first_node->details != then_node->details)) {
pe_rsc_trace(then->rsc,
"Disabled ordering %s on %s then %s on %s: not same node",
other->action->uuid, first_node->details->uname,
then->uuid, then_node->details->uname);
other->type = pe_order_none;
continue;
}
pe__clear_graph_flags(changed, then, pe_graph_updated_first);
if ((first->rsc != NULL)
&& pcmk_is_set(other->type, pe_order_then_cancels_first)
&& !pcmk_is_set(then->flags, pe_action_optional)) {
/* 'then' is required, so we must abandon 'first'
* (e.g. a required stop cancels any agent reload).
*/
pe__set_action_flags(other->action, pe_action_optional);
if (!strcmp(first->task, CRMD_ACTION_RELOAD_AGENT)) {
pe__clear_resource_flags(first->rsc, pe_rsc_reload);
}
}
if ((first->rsc != NULL) && (then->rsc != NULL)
&& (first->rsc != then->rsc) && !is_parent(then->rsc, first->rsc)) {
first = action_for_ordering(first);
}
if (first != other->action) {
pe_rsc_trace(then->rsc, "Ordering %s after %s instead of %s",
then->uuid, first->uuid, other->action->uuid);
}
pe_rsc_trace(then->rsc,
"%s (0x%.6x) then %s (0x%.6x): type=0x%.6x node=%s",
first->uuid, first->flags, then->uuid, then->flags,
other->type, action_node_str(first));
if (first == other->action) {
/* 'first' was not remapped (e.g. from 'start' to 'running'), which
* could mean it is a non-resource action, a primitive resource
* action, or already expanded.
*/
enum pe_action_flags first_flags, then_flags;
first_flags = action_flags_for_ordering(first, then_node);
then_flags = action_flags_for_ordering(then, first_node);
changed |= update_action_for_ordering_flags(first, then,
first_flags, then_flags,
other, data_set);
/* 'first' was for a complex resource (clone, group, etc),
* create a new dependency if necessary
*/
} else if (order_actions(first, then, other->type)) {
/* This was the first time 'first' and 'then' were associated,
* start again to get the new actions_before list
*/
pe__set_graph_flags(changed, then,
pe_graph_updated_then|pe_graph_disable);
}
if (pcmk_is_set(changed, pe_graph_disable)) {
pe_rsc_trace(then->rsc,
"Disabled ordering %s then %s in favor of %s then %s",
other->action->uuid, then->uuid, first->uuid,
then->uuid);
pe__clear_graph_flags(changed, then, pe_graph_disable);
other->type = pe_order_none;
}
if (pcmk_is_set(changed, pe_graph_updated_first)) {
crm_trace("Re-processing %s and its 'after' actions "
"because it changed", first->uuid);
for (GList *lpc2 = first->actions_after; lpc2 != NULL;
lpc2 = lpc2->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc2->data;
pcmk__update_action_for_orderings(other->action, data_set);
}
pcmk__update_action_for_orderings(first, data_set);
}
}
if (pcmk_is_set(then->flags, pe_action_requires_any)) {
if (last_flags == then->flags) {
pe__clear_graph_flags(changed, then, pe_graph_updated_then);
} else {
pe__set_graph_flags(changed, then, pe_graph_updated_then);
}
}
if (pcmk_is_set(changed, pe_graph_updated_then)) {
crm_trace("Re-processing %s and its 'after' actions because it changed",
then->uuid);
if (pcmk_is_set(last_flags, pe_action_runnable)
&& !pcmk_is_set(then->flags, pe_action_runnable)) {
pcmk__block_colocated_starts(then, data_set);
}
pcmk__update_action_for_orderings(then, data_set);
for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
pcmk__update_action_for_orderings(other->action, data_set);
}
}
}
/*!
* \internal
* \brief Trace-log an action (optionally with its dependent actions)
*
* \param[in] pre_text If not NULL, prefix the log with this plus ": "
* \param[in] action Action to log
* \param[in] details If true, recursively log dependent actions
*/
void
pcmk__log_action(const char *pre_text, pe_action_t *action, bool details)
{
const char *node_uname = NULL;
const char *node_uuid = NULL;
const char *desc = NULL;
CRM_CHECK(action != NULL, return);
if (!pcmk_is_set(action->flags, pe_action_pseudo)) {
if (action->node != NULL) {
node_uname = action->node->details->uname;
node_uuid = action->node->details->id;
} else {
node_uname = "<none>";
}
}
switch (text2task(action->task)) {
case stonith_node:
case shutdown_crm:
if (pcmk_is_set(action->flags, pe_action_pseudo)) {
desc = "Pseudo ";
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
desc = "Optional ";
} else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
desc = "!!Non-Startable!! ";
} else if (pcmk_is_set(action->flags, pe_action_processed)) {
desc = "";
} else {
desc = "(Provisional) ";
}
crm_trace("%s%s%sAction %d: %s%s%s%s%s%s",
((pre_text == NULL)? "" : pre_text),
((pre_text == NULL)? "" : ": "),
desc, action->id, action->uuid,
(node_uname? "\ton " : ""), (node_uname? node_uname : ""),
(node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
(node_uuid? ")" : ""));
break;
default:
if (pcmk_is_set(action->flags, pe_action_optional)) {
desc = "Optional ";
} else if (pcmk_is_set(action->flags, pe_action_pseudo)) {
desc = "Pseudo ";
} else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
desc = "!!Non-Startable!! ";
} else if (pcmk_is_set(action->flags, pe_action_processed)) {
desc = "";
} else {
desc = "(Provisional) ";
}
crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s",
((pre_text == NULL)? "" : pre_text),
((pre_text == NULL)? "" : ": "),
desc, action->id, action->uuid,
(action->rsc? action->rsc->id : "<none>"),
(node_uname? "\ton " : ""), (node_uname? node_uname : ""),
(node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
(node_uuid? ")" : ""));
break;
}
if (details) {
GList *iter = NULL;
crm_trace("\t\t====== Preceding Actions");
for (iter = action->actions_before; iter != NULL; iter = iter->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) iter->data;
pcmk__log_action("\t\t", other->action, false);
}
crm_trace("\t\t====== Subsequent Actions");
for (iter = action->actions_after; iter != NULL; iter = iter->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) iter->data;
pcmk__log_action("\t\t", other->action, false);
}
crm_trace("\t\t====== End");
} else {
crm_trace("\t\t(before=%d, after=%d)",
g_list_length(action->actions_before),
g_list_length(action->actions_after));
}
}
+/*!
+ * \internal
+ * \brief Create a new pseudo-action for a resource
+ *
+ * \param[in] rsc Resource to create action for
+ * \param[in] task Action name
+ * \param[in] optional Whether action should be considered optional
+ * \param[in] runnable Whethe action should be considered runnable
+ *
+ * \return New action object corresponding to arguments
+ */
pe_action_t *
-create_pseudo_resource_op(pe_resource_t *rsc, const char *task, bool optional,
- bool runnable, pe_working_set_t *data_set)
+pcmk__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task,
+ bool optional, bool runnable)
{
- pe_action_t *action = custom_action(rsc, pcmk__op_key(rsc->id, task, 0),
- task, NULL, optional, TRUE, data_set);
+ pe_action_t *action = NULL;
+
+ CRM_ASSERT((rsc != NULL) && (task != NULL));
+ action = custom_action(rsc, pcmk__op_key(rsc->id, task, 0), task, NULL,
+ optional, TRUE, rsc->cluster);
pe__set_action_flags(action, pe_action_pseudo);
- if(runnable) {
+ if (runnable) {
pe__set_action_flags(action, pe_action_runnable);
}
return action;
}
diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c
index cc72900f40..99498fba9b 100644
--- a/lib/pacemaker/pcmk_sched_bundle.c
+++ b/lib/pacemaker/pcmk_sched_bundle.c
@@ -1,1093 +1,1093 @@
/*
* Copyright 2004-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
#define PE__VARIANT_BUNDLE 1
#include <lib/pengine/variant.h>
static bool
is_bundle_node(pe__bundle_variant_data_t *data, pe_node_t *node)
{
for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (node->details == replica->node->details) {
return TRUE;
}
}
return FALSE;
}
gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set);
void distribute_children(pe_resource_t *rsc, GList *children, GList *nodes,
int max, int per_host_max, pe_working_set_t * data_set);
static GList *
get_container_list(pe_resource_t *rsc)
{
GList *containers = NULL;
if (rsc->variant == pe_container) {
pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, rsc);
for (GList *gIter = data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
containers = g_list_append(containers, replica->container);
}
}
return containers;
}
static inline GList *
get_containers_or_children(pe_resource_t *rsc)
{
return (rsc->variant == pe_container)?
get_container_list(rsc) : rsc->children;
}
pe_node_t *
pcmk__bundle_allocate(pe_resource_t *rsc, pe_node_t *prefer,
pe_working_set_t *data_set)
{
GList *containers = NULL;
GList *nodes = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return NULL);
get_bundle_variant_data(bundle_data, rsc);
pe__set_resource_flags(rsc, pe_rsc_allocating);
containers = get_container_list(rsc);
pe__show_node_weights(!pcmk_is_set(data_set->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, data_set);
nodes = g_hash_table_get_values(rsc->allowed_nodes);
nodes = sort_nodes_by_weight(nodes, NULL, data_set);
containers = g_list_sort_with_data(containers, sort_clone_instance, data_set);
distribute_children(rsc, containers, nodes, bundle_data->nreplicas,
bundle_data->nreplicas_per_host, data_set);
g_list_free(nodes);
g_list_free(containers);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
pe_node_t *container_host = NULL;
CRM_ASSERT(replica);
if (replica->ip) {
pe_rsc_trace(rsc, "Allocating bundle %s IP %s",
rsc->id, replica->ip->id);
replica->ip->cmds->allocate(replica->ip, prefer, data_set);
}
container_host = replica->container->allocated_to;
if (replica->remote && pe__is_guest_or_remote_node(container_host)) {
/* We need 'nested' connection resources to be on the same
* host because pacemaker-remoted only supports a single
* active connection
*/
pcmk__new_colocation("child-remote-with-docker-remote", NULL,
INFINITY, replica->remote,
container_host->details->remote_rsc, NULL,
NULL, true, data_set);
}
if (replica->remote) {
pe_rsc_trace(rsc, "Allocating bundle %s connection %s",
rsc->id, replica->remote->id);
replica->remote->cmds->allocate(replica->remote, prefer,
data_set);
}
// Explicitly allocate replicas' children before bundle child
if (replica->child) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, replica->child->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
if (node->details != replica->node->details) {
node->weight = -INFINITY;
} else if (!pcmk__threshold_reached(replica->child, node,
data_set, NULL)) {
node->weight = INFINITY;
}
}
pe__set_resource_flags(replica->child->parent, pe_rsc_allocating);
pe_rsc_trace(rsc, "Allocating bundle %s replica child %s",
rsc->id, replica->child->id);
replica->child->cmds->allocate(replica->child, replica->node,
data_set);
pe__clear_resource_flags(replica->child->parent,
pe_rsc_allocating);
}
}
if (bundle_data->child) {
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, bundle_data->child->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
if (is_bundle_node(bundle_data, node)) {
node->weight = 0;
} else {
node->weight = -INFINITY;
}
}
pe_rsc_trace(rsc, "Allocating bundle %s child %s",
rsc->id, bundle_data->child->id);
bundle_data->child->cmds->allocate(bundle_data->child, prefer, data_set);
}
pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional);
return NULL;
}
void
pcmk__bundle_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
{
pe_action_t *action = NULL;
GList *containers = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
containers = get_container_list(rsc);
get_bundle_variant_data(bundle_data, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (replica->ip) {
replica->ip->cmds->create_actions(replica->ip, data_set);
}
if (replica->container) {
replica->container->cmds->create_actions(replica->container,
data_set);
}
if (replica->remote) {
replica->remote->cmds->create_actions(replica->remote, data_set);
}
}
clone_create_pseudo_actions(rsc, containers, NULL, NULL, data_set);
if (bundle_data->child) {
bundle_data->child->cmds->create_actions(bundle_data->child, data_set);
if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) {
/* promote */
- create_pseudo_resource_op(rsc, RSC_PROMOTE, TRUE, TRUE, data_set);
- action = create_pseudo_resource_op(rsc, RSC_PROMOTED, TRUE, TRUE, data_set);
+ pcmk__new_rsc_pseudo_action(rsc, RSC_PROMOTE, true, true);
+ action = pcmk__new_rsc_pseudo_action(rsc, RSC_PROMOTED, true, true);
action->priority = INFINITY;
/* demote */
- create_pseudo_resource_op(rsc, RSC_DEMOTE, TRUE, TRUE, data_set);
- action = create_pseudo_resource_op(rsc, RSC_DEMOTED, TRUE, TRUE, data_set);
+ pcmk__new_rsc_pseudo_action(rsc, RSC_DEMOTE, true, true);
+ action = pcmk__new_rsc_pseudo_action(rsc, RSC_DEMOTED, true, true);
action->priority = INFINITY;
}
}
g_list_free(containers);
}
void
pcmk__bundle_internal_constraints(pe_resource_t *rsc,
pe_working_set_t *data_set)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_bundle_variant_data(bundle_data, rsc);
if (bundle_data->child) {
pcmk__order_resource_actions(rsc, RSC_START, bundle_data->child,
RSC_START, pe_order_implies_first_printed,
data_set);
pcmk__order_resource_actions(rsc, RSC_STOP, bundle_data->child,
RSC_STOP, pe_order_implies_first_printed,
data_set);
if (bundle_data->child->children) {
pcmk__order_resource_actions(bundle_data->child, RSC_STARTED, rsc,
RSC_STARTED,
pe_order_implies_then_printed,
data_set);
pcmk__order_resource_actions(bundle_data->child, RSC_STOPPED, rsc,
RSC_STOPPED,
pe_order_implies_then_printed,
data_set);
} else {
pcmk__order_resource_actions(bundle_data->child, RSC_START, rsc,
RSC_STARTED,
pe_order_implies_then_printed,
data_set);
pcmk__order_resource_actions(bundle_data->child, RSC_STOP, rsc,
RSC_STOPPED,
pe_order_implies_then_printed,
data_set);
}
}
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
CRM_ASSERT(replica->container);
replica->container->cmds->internal_constraints(replica->container,
data_set);
pcmk__order_starts(rsc, replica->container,
pe_order_runnable_left|pe_order_implies_first_printed,
data_set);
if (replica->child) {
pcmk__order_stops(rsc, replica->child,
pe_order_implies_first_printed, data_set);
}
pcmk__order_stops(rsc, replica->container,
pe_order_implies_first_printed, data_set);
pcmk__order_resource_actions(replica->container, RSC_START, rsc,
RSC_STARTED, pe_order_implies_then_printed,
data_set);
pcmk__order_resource_actions(replica->container, RSC_STOP, rsc,
RSC_STOPPED, pe_order_implies_then_printed,
data_set);
if (replica->ip) {
replica->ip->cmds->internal_constraints(replica->ip, data_set);
// Start IP then container
pcmk__order_starts(replica->ip, replica->container,
pe_order_runnable_left|pe_order_preserve,
data_set);
pcmk__order_stops(replica->container, replica->ip,
pe_order_implies_first|pe_order_preserve,
data_set);
pcmk__new_colocation("ip-with-docker", NULL, INFINITY, replica->ip,
replica->container, NULL, NULL, true,
data_set);
}
if (replica->remote) {
/* This handles ordering and colocating remote relative to container
* (via "resource-with-container"). Since IP is also ordered and
* colocated relative to the container, we don't need to do anything
* explicit here with IP.
*/
replica->remote->cmds->internal_constraints(replica->remote,
data_set);
}
if (replica->child) {
CRM_ASSERT(replica->remote);
// "Start remote then child" is implicit in scheduler's remote logic
}
}
if (bundle_data->child) {
bundle_data->child->cmds->internal_constraints(bundle_data->child, data_set);
if (pcmk_is_set(bundle_data->child->flags, pe_rsc_promotable)) {
promote_demote_constraints(rsc, data_set);
/* child demoted before global demoted */
pcmk__order_resource_actions(bundle_data->child, RSC_DEMOTED, rsc,
RSC_DEMOTED,
pe_order_implies_then_printed,
data_set);
/* global demote before child demote */
pcmk__order_resource_actions(rsc, RSC_DEMOTE, bundle_data->child,
RSC_DEMOTE,
pe_order_implies_first_printed,
data_set);
/* child promoted before global promoted */
pcmk__order_resource_actions(bundle_data->child, RSC_PROMOTED, rsc,
RSC_PROMOTED,
pe_order_implies_then_printed,
data_set);
/* global promote before child promote */
pcmk__order_resource_actions(rsc, RSC_PROMOTE, bundle_data->child,
RSC_PROMOTE,
pe_order_implies_first_printed,
data_set);
}
}
}
static pe_resource_t *
compatible_replica_for_node(pe_resource_t *rsc_lh, pe_node_t *candidate,
pe_resource_t *rsc, enum rsc_role_e filter,
gboolean current)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(candidate != NULL, return NULL);
get_bundle_variant_data(bundle_data, rsc);
crm_trace("Looking for compatible child from %s for %s on %s",
rsc_lh->id, rsc->id, candidate->details->uname);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (is_child_compatible(replica->container, candidate, filter, current)) {
crm_trace("Pairing %s with %s on %s",
rsc_lh->id, replica->container->id,
candidate->details->uname);
return replica->container;
}
}
crm_trace("Can't pair %s with %s", rsc_lh->id, rsc->id);
return NULL;
}
static pe_resource_t *
compatible_replica(pe_resource_t *rsc_lh, pe_resource_t *rsc,
enum rsc_role_e filter, gboolean current,
pe_working_set_t *data_set)
{
GList *scratch = NULL;
pe_resource_t *pair = NULL;
pe_node_t *active_node_lh = NULL;
active_node_lh = rsc_lh->fns->location(rsc_lh, NULL, current);
if (active_node_lh) {
return compatible_replica_for_node(rsc_lh, active_node_lh, rsc, filter,
current);
}
scratch = g_hash_table_get_values(rsc_lh->allowed_nodes);
scratch = sort_nodes_by_weight(scratch, NULL, data_set);
for (GList *gIter = scratch; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
pair = compatible_replica_for_node(rsc_lh, node, rsc, filter, current);
if (pair) {
goto done;
}
}
pe_rsc_debug(rsc, "Can't pair %s with %s", rsc_lh->id, (rsc? rsc->id : "none"));
done:
g_list_free(scratch);
return pair;
}
void
pcmk__bundle_rsc_colocation_lh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
/* -- Never called --
*
* Instead we add the colocation constraints to the child and call from there
*/
CRM_ASSERT(FALSE);
}
int copies_per_node(pe_resource_t * rsc)
{
/* Strictly speaking, there should be a 'copies_per_node' addition
* to the resource function table and each case would be a
* function. However that would be serious overkill to return an
* int. In fact, it seems to me that both function tables
* could/should be replaced by resources.{c,h} full of
* rsc_{some_operation} functions containing a switch as below
* which calls out to functions named {variant}_{some_operation}
* as needed.
*/
switch(rsc->variant) {
case pe_unknown:
return 0;
case pe_native:
case pe_group:
return 1;
case pe_clone:
{
const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX);
if (max_clones_node == NULL) {
return 1;
} else {
int max_i;
pcmk__scan_min_int(max_clones_node, &max_i, 0);
return max_i;
}
}
case pe_container:
{
pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, rsc);
return data->nreplicas_per_host;
}
}
return 0;
}
void
pcmk__bundle_rsc_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
GList *allocated_primaries = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(constraint != NULL, return);
CRM_CHECK(dependent != NULL,
pe_err("dependent was NULL for %s", constraint->id); return);
CRM_CHECK(primary != NULL,
pe_err("primary was NULL for %s", constraint->id); return);
CRM_ASSERT(dependent->variant == pe_native);
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
pe_rsc_trace(primary, "%s is still provisional", primary->id);
return;
} else if(constraint->dependent->variant > pe_group) {
pe_resource_t *primary_replica = compatible_replica(dependent, primary,
RSC_ROLE_UNKNOWN,
FALSE, data_set);
if (primary_replica) {
pe_rsc_debug(primary, "Pairing %s with %s",
dependent->id, primary_replica->id);
dependent->cmds->rsc_colocation_lh(dependent, primary_replica,
constraint, data_set);
} else if (constraint->score >= INFINITY) {
crm_notice("Cannot pair %s with instance of %s",
dependent->id, primary->id);
pcmk__assign_resource(dependent, NULL, true);
} else {
pe_rsc_debug(primary, "Cannot pair %s with instance of %s",
dependent->id, primary->id);
}
return;
}
get_bundle_variant_data(bundle_data, primary);
pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d",
constraint->id, dependent->id, primary->id, constraint->score);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (constraint->score < INFINITY) {
replica->container->cmds->rsc_colocation_rh(dependent,
replica->container,
constraint, data_set);
} else {
pe_node_t *chosen = replica->container->fns->location(replica->container,
NULL, FALSE);
if ((chosen == NULL)
|| is_set_recursive(replica->container, pe_rsc_block, TRUE)) {
continue;
}
if ((constraint->primary_role >= RSC_ROLE_PROMOTED)
&& (replica->child == NULL)) {
continue;
}
if ((constraint->primary_role >= RSC_ROLE_PROMOTED)
&& (replica->child->next_role < RSC_ROLE_PROMOTED)) {
continue;
}
pe_rsc_trace(primary, "Allowing %s: %s %d",
constraint->id, chosen->details->uname,
chosen->weight);
allocated_primaries = g_list_prepend(allocated_primaries, chosen);
}
}
if (constraint->score >= INFINITY) {
node_list_exclude(dependent->allowed_nodes, allocated_primaries, FALSE);
}
g_list_free(allocated_primaries);
}
enum pe_action_flags
pcmk__bundle_action_flags(pe_action_t *action, pe_node_t *node)
{
GList *containers = NULL;
enum pe_action_flags flags = 0;
pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, action->rsc);
if(data->child) {
enum action_tasks task = get_complex_task(data->child, action->task, TRUE);
switch(task) {
case no_action:
case action_notify:
case action_notified:
case action_promote:
case action_promoted:
case action_demote:
case action_demoted:
return summary_action_flags(action, data->child->children, node);
default:
break;
}
}
containers = get_container_list(action->rsc);
flags = summary_action_flags(action, containers, node);
g_list_free(containers);
return flags;
}
pe_resource_t *
find_compatible_child_by_node(pe_resource_t * local_child, pe_node_t * local_node, pe_resource_t * rsc,
enum rsc_role_e filter, gboolean current)
{
GList *gIter = NULL;
GList *children = NULL;
if (local_node == NULL) {
crm_err("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id);
return NULL;
}
crm_trace("Looking for compatible child from %s for %s on %s",
local_child->id, rsc->id, local_node->details->uname);
children = get_containers_or_children(rsc);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if(is_child_compatible(child_rsc, local_node, filter, current)) {
crm_trace("Pairing %s with %s on %s",
local_child->id, child_rsc->id, local_node->details->uname);
return child_rsc;
}
}
crm_trace("Can't pair %s with %s", local_child->id, rsc->id);
if(children != rsc->children) {
g_list_free(children);
}
return NULL;
}
static pe__bundle_replica_t *
replica_for_container(pe_resource_t *rsc, pe_resource_t *container,
pe_node_t *node)
{
if (rsc->variant == pe_container) {
pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, rsc);
for (GList *gIter = data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (replica->child
&& (container == replica->container)
&& (node->details == replica->node->details)) {
return replica;
}
}
}
return NULL;
}
static enum pe_graph_flags
multi_update_interleave_actions(pe_action_t *first, pe_action_t *then,
pe_node_t *node, enum pe_action_flags flags,
enum pe_action_flags filter,
enum pe_ordering type,
pe_working_set_t *data_set)
{
GList *gIter = NULL;
GList *children = NULL;
gboolean current = FALSE;
enum pe_graph_flags changed = pe_graph_none;
/* Fix this - lazy */
if (pcmk__ends_with(first->uuid, "_stopped_0")
|| pcmk__ends_with(first->uuid, "_demoted_0")) {
current = TRUE;
}
children = get_containers_or_children(then->rsc);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *then_child = gIter->data;
pe_resource_t *first_child = find_compatible_child(then_child,
first->rsc,
RSC_ROLE_UNKNOWN,
current, data_set);
if (first_child == NULL && current) {
crm_trace("Ignore");
} else if (first_child == NULL) {
crm_debug("No match found for %s (%d / %s / %s)", then_child->id, current, first->uuid, then->uuid);
/* Me no like this hack - but what else can we do?
*
* If there is no-one active or about to be active
* on the same node as then_child, then they must
* not be allowed to start
*/
if (type & (pe_order_runnable_left | pe_order_implies_then) /* Mandatory */ ) {
pe_rsc_info(then->rsc, "Inhibiting %s from being active", then_child->id);
if (pcmk__assign_resource(then_child, NULL, true)) {
pe__set_graph_flags(changed, first, pe_graph_updated_then);
}
}
} else {
pe_action_t *first_action = NULL;
pe_action_t *then_action = NULL;
enum action_tasks task = clone_child_action(first);
const char *first_task = task2text(task);
pe__bundle_replica_t *first_replica = NULL;
pe__bundle_replica_t *then_replica = NULL;
first_replica = replica_for_container(first->rsc, first_child,
node);
if (strstr(first->task, "stop") && first_replica && first_replica->child) {
/* Except for 'stopped' we should be looking at the
* in-container resource, actions for the child will
* happen later and are therefor more likely to align
* with the user's intent.
*/
first_action = find_first_action(first_replica->child->actions,
NULL, task2text(task), node);
} else {
first_action = find_first_action(first_child->actions, NULL, task2text(task), node);
}
then_replica = replica_for_container(then->rsc, then_child, node);
if (strstr(then->task, "mote")
&& then_replica && then_replica->child) {
/* Promote/demote actions will never be found for the
* container resource, look in the child instead
*
* Alternatively treat:
* 'XXXX then promote YYYY' as 'XXXX then start container for YYYY', and
* 'demote XXXX then stop YYYY' as 'stop container for XXXX then stop YYYY'
*/
then_action = find_first_action(then_replica->child->actions,
NULL, then->task, node);
} else {
then_action = find_first_action(then_child->actions, NULL, then->task, node);
}
if (first_action == NULL) {
if (!pcmk_is_set(first_child->flags, pe_rsc_orphan)
&& !pcmk__str_any_of(first_task, RSC_STOP, RSC_DEMOTE, NULL)) {
crm_err("Internal error: No action found for %s in %s (first)",
first_task, first_child->id);
} else {
crm_trace("No action found for %s in %s%s (first)",
first_task, first_child->id,
pcmk_is_set(first_child->flags, pe_rsc_orphan)? " (ORPHAN)" : "");
}
continue;
}
/* We're only interested if 'then' is neither stopping nor being demoted */
if (then_action == NULL) {
if (!pcmk_is_set(then_child->flags, pe_rsc_orphan)
&& !pcmk__str_any_of(then->task, RSC_STOP, RSC_DEMOTE, NULL)) {
crm_err("Internal error: No action found for %s in %s (then)",
then->task, then_child->id);
} else {
crm_trace("No action found for %s in %s%s (then)",
then->task, then_child->id,
pcmk_is_set(then_child->flags, pe_rsc_orphan)? " (ORPHAN)" : "");
}
continue;
}
if (order_actions(first_action, then_action, type)) {
crm_debug("Created constraint for %s (%d) -> %s (%d) %.6x",
first_action->uuid,
pcmk_is_set(first_action->flags, pe_action_optional),
then_action->uuid,
pcmk_is_set(then_action->flags, pe_action_optional),
type);
pe__set_graph_flags(changed, first,
pe_graph_updated_first|pe_graph_updated_then);
}
if(first_action && then_action) {
changed |= then_child->cmds->update_actions(first_action,
then_action, node,
first_child->cmds->action_flags(first_action, node),
filter, type, data_set);
} else {
crm_err("Nothing found either for %s (%p) or %s (%p) %s",
first_child->id, first_action,
then_child->id, then_action, task2text(task));
}
}
}
if(children != then->rsc->children) {
g_list_free(children);
}
return changed;
}
static bool
can_interleave_actions(pe_action_t *first, pe_action_t *then)
{
bool interleave = FALSE;
pe_resource_t *rsc = NULL;
const char *interleave_s = NULL;
if(first->rsc == NULL || then->rsc == NULL) {
crm_trace("Not interleaving %s with %s (both must be resources)", first->uuid, then->uuid);
return FALSE;
} else if(first->rsc == then->rsc) {
crm_trace("Not interleaving %s with %s (must belong to different resources)", first->uuid, then->uuid);
return FALSE;
} else if(first->rsc->variant < pe_clone || then->rsc->variant < pe_clone) {
crm_trace("Not interleaving %s with %s (both sides must be clones or bundles)", first->uuid, then->uuid);
return FALSE;
}
if (pcmk__ends_with(then->uuid, "_stop_0")
|| pcmk__ends_with(then->uuid, "_demote_0")) {
rsc = first->rsc;
} else {
rsc = then->rsc;
}
interleave_s = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE);
interleave = crm_is_true(interleave_s);
crm_trace("Interleave %s -> %s: %s (based on %s)",
first->uuid, then->uuid, interleave ? "yes" : "no", rsc->id);
return interleave;
}
enum pe_graph_flags
pcmk__multi_update_actions(pe_action_t *first, pe_action_t *then,
pe_node_t *node, enum pe_action_flags flags,
enum pe_action_flags filter, enum pe_ordering type,
pe_working_set_t *data_set)
{
enum pe_graph_flags changed = pe_graph_none;
crm_trace("%s -> %s", first->uuid, then->uuid);
if(can_interleave_actions(first, then)) {
changed = multi_update_interleave_actions(first, then, node, flags,
filter, type, data_set);
} else if(then->rsc) {
GList *gIter = NULL;
GList *children = NULL;
// Handle the 'primitive' ordering case
changed |= native_update_actions(first, then, node, flags, filter,
type, data_set);
// Now any children (or containers in the case of a bundle)
children = get_containers_or_children(then->rsc);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *then_child = (pe_resource_t *) gIter->data;
enum pe_graph_flags then_child_changed = pe_graph_none;
pe_action_t *then_child_action = find_first_action(then_child->actions, NULL, then->task, node);
if (then_child_action) {
enum pe_action_flags then_child_flags = then_child->cmds->action_flags(then_child_action, node);
if (pcmk_is_set(then_child_flags, pe_action_runnable)) {
then_child_changed |= then_child->cmds->update_actions(first,
then_child_action, node, flags, filter, type, data_set);
}
changed |= then_child_changed;
if (then_child_changed & pe_graph_updated_then) {
for (GList *lpc = then_child_action->actions_after; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *next = (pe_action_wrapper_t *) lpc->data;
pcmk__update_action_for_orderings(next->action,
data_set);
}
}
}
}
if(children != then->rsc->children) {
g_list_free(children);
}
}
return changed;
}
void
pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
{
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, rsc);
pcmk__apply_location(constraint, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
if (replica->container) {
replica->container->cmds->rsc_location(replica->container,
constraint);
}
if (replica->ip) {
replica->ip->cmds->rsc_location(replica->ip, constraint);
}
}
if (bundle_data->child
&& ((constraint->role_filter == RSC_ROLE_UNPROMOTED)
|| (constraint->role_filter == RSC_ROLE_PROMOTED))) {
bundle_data->child->cmds->rsc_location(bundle_data->child, constraint);
bundle_data->child->rsc_location = g_list_prepend(bundle_data->child->rsc_location,
constraint);
}
}
void
pcmk__bundle_expand(pe_resource_t *rsc, pe_working_set_t * data_set)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_bundle_variant_data(bundle_data, rsc);
if (bundle_data->child) {
bundle_data->child->cmds->expand(bundle_data->child, data_set);
}
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (replica->remote && replica->container
&& pe__bundle_needs_remote_name(replica->remote, data_set)) {
/* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that
* run pacemaker-remoted inside, without needing a separate IP for
* the container. This is done by configuring the inner remote's
* connection host as the magic string "#uname", then
* replacing it with the underlying host when needed.
*/
xmlNode *nvpair = get_xpath_object("//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']",
replica->remote->xml, LOG_ERR);
const char *calculated_addr = NULL;
// Replace the value in replica->remote->xml (if appropriate)
calculated_addr = pe__add_bundle_remote_name(replica->remote,
data_set,
nvpair, "value");
if (calculated_addr) {
/* Since this is for the bundle as a resource, and not any
* particular action, replace the value in the default
* parameters (not evaluated for node). action2xml() will grab
* it from there to replace it in node-evaluated parameters.
*/
GHashTable *params = pe_rsc_params(replica->remote,
NULL, data_set);
crm_trace("Set address for bundle connection %s to bundle host %s",
replica->remote->id, calculated_addr);
g_hash_table_replace(params,
strdup(XML_RSC_ATTR_REMOTE_RA_ADDR),
strdup(calculated_addr));
} else {
/* The only way to get here is if the remote connection is
* neither currently running nor scheduled to run. That means we
* won't be doing any operations that require addr (only start
* requires it; we additionally use it to compare digests when
* unpacking status, promote, and migrate_from history, but
* that's already happened by this point).
*/
crm_info("Unable to determine address for bundle %s remote connection",
rsc->id);
}
}
if (replica->ip) {
replica->ip->cmds->expand(replica->ip, data_set);
}
if (replica->container) {
replica->container->cmds->expand(replica->container, data_set);
}
if (replica->remote) {
replica->remote->cmds->expand(replica->remote, data_set);
}
}
}
gboolean
pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *complete, gboolean force,
pe_working_set_t * data_set)
{
bool any_created = FALSE;
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
get_bundle_variant_data(bundle_data, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (replica->ip) {
any_created |= replica->ip->cmds->create_probe(replica->ip, node,
complete, force,
data_set);
}
if (replica->child && (node->details == replica->node->details)) {
any_created |= replica->child->cmds->create_probe(replica->child,
node, complete,
force, data_set);
}
if (replica->container) {
bool created = replica->container->cmds->create_probe(replica->container,
node, complete,
force, data_set);
if(created) {
any_created = TRUE;
/* If we're limited to one replica per host (due to
* the lack of an IP range probably), then we don't
* want any of our peer containers starting until
* we've established that no other copies are already
* running.
*
* Partly this is to ensure that nreplicas_per_host is
* observed, but also to ensure that the containers
* don't fail to start because the necessary port
* mappings (which won't include an IP for uniqueness)
* are already taken
*/
for (GList *tIter = bundle_data->replicas;
tIter && (bundle_data->nreplicas_per_host == 1);
tIter = tIter->next) {
pe__bundle_replica_t *other = tIter->data;
if ((other != replica) && (other != NULL)
&& (other->container != NULL)) {
pcmk__new_ordering(replica->container,
pcmk__op_key(replica->container->id, RSC_STATUS, 0),
NULL, other->container,
pcmk__op_key(other->container->id, RSC_START, 0),
NULL,
pe_order_optional|pe_order_same_node,
data_set);
}
}
}
}
if (replica->container && replica->remote
&& replica->remote->cmds->create_probe(replica->remote, node,
complete, force,
data_set)) {
/* Do not probe the remote resource until we know where the
* container is running. This is required for REMOTE_CONTAINER_HACK
* to correctly probe remote resources.
*/
char *probe_uuid = pcmk__op_key(replica->remote->id, RSC_STATUS,
0);
pe_action_t *probe = find_first_action(replica->remote->actions,
probe_uuid, NULL, node);
free(probe_uuid);
if (probe) {
any_created = TRUE;
crm_trace("Ordering %s probe on %s",
replica->remote->id, node->details->uname);
pcmk__new_ordering(replica->container,
pcmk__op_key(replica->container->id, RSC_START, 0),
NULL, replica->remote, NULL, probe,
pe_order_probe, data_set);
}
}
}
return any_created;
}
void
pcmk__bundle_append_meta(pe_resource_t *rsc, xmlNode *xml)
{
}
void
pcmk__bundle_log_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_bundle_variant_data(bundle_data, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pe__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (replica->ip) {
LogActions(replica->ip, data_set);
}
if (replica->container) {
LogActions(replica->container, data_set);
}
if (replica->remote) {
LogActions(replica->remote, data_set);
}
if (replica->child) {
LogActions(replica->child, data_set);
}
}
}
diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c
index 7916117a47..f1da1f2777 100644
--- a/lib/pacemaker/pcmk_sched_clone.c
+++ b/lib/pacemaker/pcmk_sched_clone.c
@@ -1,1534 +1,1536 @@
/*
* Copyright 2004-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
#define VARIANT_CLONE 1
#include <lib/pengine/variant.h>
gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set);
static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all);
static gint
sort_rsc_id(gconstpointer a, gconstpointer b)
{
const pe_resource_t *resource1 = (const pe_resource_t *)a;
const pe_resource_t *resource2 = (const pe_resource_t *)b;
long num1, num2;
CRM_ASSERT(resource1 != NULL);
CRM_ASSERT(resource2 != NULL);
/*
* Sort clone instances numerically by instance number, so instance :10
* comes after :9.
*/
num1 = strtol(strrchr(resource1->id, ':') + 1, NULL, 10);
num2 = strtol(strrchr(resource2->id, ':') + 1, NULL, 10);
if (num1 < num2) {
return -1;
} else if (num1 > num2) {
return 1;
}
return 0;
}
static pe_node_t *
parent_node_instance(const pe_resource_t * rsc, pe_node_t * node)
{
pe_node_t *ret = NULL;
if (node != NULL && rsc->parent) {
ret = pe_hash_table_lookup(rsc->parent->allowed_nodes, node->details->id);
} else if(node != NULL) {
ret = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
}
return ret;
}
static gboolean
did_fail(const pe_resource_t * rsc)
{
GList *gIter = rsc->children;
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
return TRUE;
}
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
if (did_fail(child_rsc)) {
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Compare instances based on colocation scores.
*
* Determines the relative order in which \c rsc1 and \c rsc2 should be
* allocated. If one resource compares less than the other, then it
* should be allocated first.
*
* \param[in] rsc1 The first instance to compare.
* \param[in] rsc2 The second instance to compare.
* \param[in] data_set Cluster working set.
*
* \return -1 if `rsc1 < rsc2`,
* 0 if `rsc1 == rsc2`, or
* 1 if `rsc1 > rsc2`
*/
static int
order_instance_by_colocation(const pe_resource_t *rsc1,
const pe_resource_t *rsc2,
pe_working_set_t *data_set)
{
int rc = 0;
pe_node_t *n = NULL;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
pe_node_t *current_node1 = pe__current_node(rsc1);
pe_node_t *current_node2 = pe__current_node(rsc2);
GList *list1 = NULL;
GList *list2 = NULL;
GHashTable *hash1 = pcmk__strkey_table(NULL, free);
GHashTable *hash2 = pcmk__strkey_table(NULL, free);
/* Clone instances must have parents */
CRM_ASSERT(rsc1->parent != NULL);
CRM_ASSERT(rsc2->parent != NULL);
n = pe__copy_node(current_node1);
g_hash_table_insert(hash1, (gpointer) n->details->id, n);
n = pe__copy_node(current_node2);
g_hash_table_insert(hash2, (gpointer) n->details->id, n);
/* Apply rsc1's parental colocations */
for (GList *gIter = rsc1->parent->rsc_cons; gIter != NULL;
gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
crm_trace("Applying %s to %s", constraint->id, rsc1->id);
hash1 = pcmk__native_merge_weights(constraint->primary, rsc1->id, hash1,
constraint->node_attribute,
constraint->score / (float) INFINITY,
0);
}
for (GList *gIter = rsc1->parent->rsc_cons_lhs; gIter != NULL;
gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(constraint, rsc1)) {
continue;
}
crm_trace("Applying %s to %s", constraint->id, rsc1->id);
hash1 = pcmk__native_merge_weights(constraint->dependent, rsc1->id,
hash1, constraint->node_attribute,
constraint->score / (float) INFINITY,
pe_weights_positive);
}
/* Apply rsc2's parental colocations */
for (GList *gIter = rsc2->parent->rsc_cons; gIter != NULL;
gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
crm_trace("Applying %s to %s", constraint->id, rsc2->id);
hash2 = pcmk__native_merge_weights(constraint->primary, rsc2->id, hash2,
constraint->node_attribute,
constraint->score / (float) INFINITY,
0);
}
for (GList *gIter = rsc2->parent->rsc_cons_lhs; gIter;
gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(constraint, rsc2)) {
continue;
}
crm_trace("Applying %s to %s", constraint->id, rsc2->id);
hash2 = pcmk__native_merge_weights(constraint->dependent, rsc2->id,
hash2, constraint->node_attribute,
constraint->score / (float) INFINITY,
pe_weights_positive);
}
/* Current location score */
node1 = g_hash_table_lookup(hash1, current_node1->details->id);
node2 = g_hash_table_lookup(hash2, current_node2->details->id);
if (node1->weight < node2->weight) {
if (node1->weight < 0) {
crm_trace("%s > %s: current score: %d %d",
rsc1->id, rsc2->id, node1->weight, node2->weight);
rc = -1;
goto out;
} else {
crm_trace("%s < %s: current score: %d %d",
rsc1->id, rsc2->id, node1->weight, node2->weight);
rc = 1;
goto out;
}
} else if (node1->weight > node2->weight) {
crm_trace("%s > %s: current score: %d %d",
rsc1->id, rsc2->id, node1->weight, node2->weight);
rc = -1;
goto out;
}
/* All location scores */
list1 = g_hash_table_get_values(hash1);
list2 = g_hash_table_get_values(hash2);
list1 = sort_nodes_by_weight(list1, current_node1, data_set);
list2 = sort_nodes_by_weight(list2, current_node2, data_set);
for (GList *gIter1 = list1, *gIter2 = list2;
(gIter1 != NULL) && (gIter2 != NULL);
gIter1 = gIter1->next, gIter2 = gIter2->next) {
node1 = (pe_node_t *) gIter1->data;
node2 = (pe_node_t *) gIter2->data;
if (node1 == NULL) {
crm_trace("%s < %s: colocated score NULL", rsc1->id, rsc2->id);
rc = 1;
break;
} else if (node2 == NULL) {
crm_trace("%s > %s: colocated score NULL", rsc1->id, rsc2->id);
rc = -1;
break;
}
if (node1->weight < node2->weight) {
crm_trace("%s < %s: colocated score", rsc1->id, rsc2->id);
rc = 1;
break;
} else if (node1->weight > node2->weight) {
crm_trace("%s > %s: colocated score", rsc1->id, rsc2->id);
rc = -1;
break;
}
}
out:
g_hash_table_destroy(hash1);
g_hash_table_destroy(hash2);
g_list_free(list1);
g_list_free(list2);
return rc;
}
gint
sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
pe_node_t *current_node1 = NULL;
pe_node_t *current_node2 = NULL;
unsigned int nnodes1 = 0;
unsigned int nnodes2 = 0;
gboolean can1 = TRUE;
gboolean can2 = TRUE;
const pe_resource_t *resource1 = (const pe_resource_t *)a;
const pe_resource_t *resource2 = (const pe_resource_t *)b;
CRM_ASSERT(resource1 != NULL);
CRM_ASSERT(resource2 != NULL);
/* allocation order:
* - active instances
* - instances running on nodes with the least copies
* - active instances on nodes that can't support them or are to be fenced
* - failed instances
* - inactive instances
*/
current_node1 = pe__find_active_on(resource1, &nnodes1, NULL);
current_node2 = pe__find_active_on(resource2, &nnodes2, NULL);
/* If both instances are running and at least one is multiply
* active, give precedence to the one that's running on fewer nodes.
*/
if ((nnodes1 > 0) && (nnodes2 > 0)) {
if (nnodes1 < nnodes2) {
crm_trace("%s < %s: running_on", resource1->id, resource2->id);
return -1;
} else if (nnodes1 > nnodes2) {
crm_trace("%s > %s: running_on", resource1->id, resource2->id);
return 1;
}
}
/* Instance whose current location is available sorts first */
node1 = current_node1;
node2 = current_node2;
if (node1 != NULL) {
pe_node_t *match = pe_hash_table_lookup(resource1->allowed_nodes, node1->details->id);
if (match == NULL || match->weight < 0) {
crm_trace("%s: current location is unavailable", resource1->id);
node1 = NULL;
can1 = FALSE;
}
}
if (node2 != NULL) {
pe_node_t *match = pe_hash_table_lookup(resource2->allowed_nodes, node2->details->id);
if (match == NULL || match->weight < 0) {
crm_trace("%s: current location is unavailable", resource2->id);
node2 = NULL;
can2 = FALSE;
}
}
if (can1 && !can2) {
crm_trace("%s < %s: availability of current location", resource1->id,
resource2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("%s > %s: availability of current location", resource1->id,
resource2->id);
return 1;
}
/* Higher-priority instance sorts first */
if (resource1->priority > resource2->priority) {
crm_trace("%s < %s: priority", resource1->id, resource2->id);
return -1;
} else if (resource1->priority < resource2->priority) {
crm_trace("%s > %s: priority", resource1->id, resource2->id);
return 1;
}
/* Active instance sorts first */
if (node1 == NULL && node2 == NULL) {
crm_trace("%s == %s: not active", resource1->id, resource2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("%s > %s: active", resource1->id, resource2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("%s < %s: active", resource1->id, resource2->id);
return -1;
}
/* Instance whose current node can run resources sorts first */
can1 = can_run_resources(node1);
can2 = can_run_resources(node2);
if (can1 && !can2) {
crm_trace("%s < %s: can", resource1->id, resource2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("%s > %s: can", resource1->id, resource2->id);
return 1;
}
/* Is the parent allowed to run on the instance's current node?
* Instance with parent allowed sorts first.
*/
node1 = parent_node_instance(resource1, node1);
node2 = parent_node_instance(resource2, node2);
if (node1 == NULL && node2 == NULL) {
crm_trace("%s == %s: not allowed", resource1->id, resource2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("%s > %s: not allowed", resource1->id, resource2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("%s < %s: not allowed", resource1->id, resource2->id);
return -1;
}
/* Does one node have more instances allocated?
* Instance whose current node has fewer instances sorts first.
*/
if (node1->count < node2->count) {
crm_trace("%s < %s: count", resource1->id, resource2->id);
return -1;
} else if (node1->count > node2->count) {
crm_trace("%s > %s: count", resource1->id, resource2->id);
return 1;
}
/* Failed instance sorts first */
can1 = did_fail(resource1);
can2 = did_fail(resource2);
if (can1 && !can2) {
crm_trace("%s > %s: failed", resource1->id, resource2->id);
return 1;
} else if (!can1 && can2) {
crm_trace("%s < %s: failed", resource1->id, resource2->id);
return -1;
}
rc = order_instance_by_colocation(resource1, resource2, data_set);
if (rc != 0) {
return rc;
}
/* Default to lexicographic order by ID */
rc = strcmp(resource1->id, resource2->id);
crm_trace("%s %c %s: default", resource1->id, rc < 0 ? '<' : '>', resource2->id);
return rc;
}
static pe_node_t *
can_run_instance(pe_resource_t * rsc, pe_node_t * node, int limit)
{
pe_node_t *local_node = NULL;
if (node == NULL && rsc->allowed_nodes) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) {
can_run_instance(rsc, local_node, limit);
}
return NULL;
}
if (!node) {
/* make clang analyzer happy */
goto bail;
} else if (can_run_resources(node) == FALSE) {
goto bail;
} else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
goto bail;
}
local_node = parent_node_instance(rsc, node);
if (local_node == NULL) {
crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname);
goto bail;
} else if (local_node->weight < 0) {
common_update_score(rsc, node->details->id, local_node->weight);
pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.",
rsc->id, node->details->uname);
} else if (local_node->count < limit) {
pe_rsc_trace(rsc, "%s can run on %s (already running %d)",
rsc->id, node->details->uname, local_node->count);
return local_node;
} else {
pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)",
rsc->id, node->details->uname, local_node->count, limit);
}
bail:
if (node) {
common_update_score(rsc, node->details->id, -INFINITY);
}
return NULL;
}
static pe_node_t *
allocate_instance(pe_resource_t *rsc, pe_node_t *prefer, gboolean all_coloc,
int limit, pe_working_set_t *data_set)
{
pe_node_t *chosen = NULL;
GHashTable *backup = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)",
rsc->id, (prefer? prefer->details->uname: "none"),
(all_coloc? "all" : "some"));
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return rsc->fns->location(rsc, NULL, FALSE);
} else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
/* Only include positive colocation preferences of dependent resources
* if not every node will get a copy of the clone
*/
append_parent_colocation(rsc->parent, rsc, all_coloc);
if (prefer) {
pe_node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
if (local_prefer == NULL || local_prefer->weight < 0) {
pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id,
prefer->details->uname);
return NULL;
}
}
can_run_instance(rsc, NULL, limit);
backup = pcmk__copy_node_table(rsc->allowed_nodes);
pe_rsc_trace(rsc, "Allocating instance %s", rsc->id);
chosen = rsc->cmds->allocate(rsc, prefer, data_set);
if (chosen && prefer && (chosen->details != prefer->details)) {
crm_info("Not pre-allocating %s to %s because %s is better",
rsc->id, prefer->details->uname, chosen->details->uname);
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = backup;
pcmk__unassign_resource(rsc);
chosen = NULL;
backup = NULL;
}
if (chosen) {
pe_node_t *local_node = parent_node_instance(rsc, chosen);
if (local_node) {
local_node->count++;
} else if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
/* what to do? we can't enforce per-node limits in this case */
pcmk__config_err("%s not found in %s (list of %d)",
chosen->details->id, rsc->parent->id,
g_hash_table_size(rsc->parent->allowed_nodes));
}
}
if(backup) {
g_hash_table_destroy(backup);
}
return chosen;
}
static void
append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all)
{
GList *gIter = NULL;
gIter = rsc->rsc_cons;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data;
if (all || cons->score < 0 || cons->score == INFINITY) {
child->rsc_cons = g_list_prepend(child->rsc_cons, cons);
}
}
gIter = rsc->rsc_cons_lhs;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(cons, child)) {
continue;
}
if (all || cons->score < 0) {
child->rsc_cons_lhs = g_list_prepend(child->rsc_cons_lhs, cons);
}
}
}
void
distribute_children(pe_resource_t *rsc, GList *children, GList *nodes,
int max, int per_host_max, pe_working_set_t * data_set);
void
distribute_children(pe_resource_t *rsc, GList *children, GList *nodes,
int max, int per_host_max, pe_working_set_t * data_set)
{
int loop_max = 0;
int allocated = 0;
int available_nodes = 0;
bool all_coloc = false;
/* count now tracks the number of clones currently allocated */
for(GList *nIter = nodes; nIter != NULL; nIter = nIter->next) {
pe_node_t *node = nIter->data;
node->count = 0;
if (can_run_resources(node)) {
available_nodes++;
}
}
all_coloc = (max < available_nodes) ? true : false;
if(available_nodes) {
loop_max = max / available_nodes;
}
if (loop_max < 1) {
loop_max = 1;
}
pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)",
max, rsc->id, available_nodes, per_host_max, loop_max);
/* Pre-allocate as many instances as we can to their current location */
for (GList *gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
pe_node_t *child_node = NULL;
pe_node_t *local_node = NULL;
if ((child->running_on == NULL)
|| !pcmk_is_set(child->flags, pe_rsc_provisional)
|| pcmk_is_set(child->flags, pe_rsc_failed)) {
continue;
}
child_node = pe__current_node(child);
local_node = parent_node_instance(child, child_node);
pe_rsc_trace(rsc,
"Checking pre-allocation of %s to %s (%d remaining of %d)",
child->id, child_node->details->uname, max - allocated,
max);
if (!can_run_resources(child_node) || (child_node->weight < 0)) {
pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s",
child_node->details->uname, child->id);
continue;
}
if ((local_node != NULL) && (local_node->count >= loop_max)) {
pe_rsc_trace(rsc,
"Not pre-allocating because %s already allocated "
"optimal instances", child_node->details->uname);
continue;
}
if (allocate_instance(child, child_node, all_coloc, per_host_max,
data_set)) {
pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id,
child_node->details->uname);
allocated++;
}
}
pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max);
for (GList *gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
if (child->running_on != NULL) {
pe_node_t *child_node = pe__current_node(child);
pe_node_t *local_node = parent_node_instance(child, child_node);
if (local_node == NULL) {
crm_err("%s is running on %s which isn't allowed",
child->id, child_node->details->uname);
}
}
if (!pcmk_is_set(child->flags, pe_rsc_provisional)) {
} else if (allocated >= max) {
pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max);
resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set);
} else {
if (allocate_instance(child, NULL, all_coloc, per_host_max,
data_set)) {
allocated++;
}
}
}
pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d",
allocated, rsc->id, max);
}
pe_node_t *
pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *prefer,
pe_working_set_t *data_set)
{
GList *nodes = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
return NULL;
} else if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__add_promotion_scores(rsc);
}
pe__set_resource_flags(rsc, pe_rsc_allocating);
/* this information is used by sort_clone_instance() when deciding in which
* order to allocate clone instances
*/
for (GList *gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
pe_rsc_trace(rsc, "%s: Allocating %s first",
rsc->id, constraint->primary->id);
constraint->primary->cmds->allocate(constraint->primary, prefer,
data_set);
}
for (GList *gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(constraint, NULL)) {
continue;
}
rsc->allowed_nodes = constraint->dependent->cmds->merge_weights(
constraint->dependent, rsc->id, rsc->allowed_nodes,
constraint->node_attribute, (float)constraint->score / INFINITY,
(pe_weights_rollback | pe_weights_positive));
}
pe__show_node_weights(!pcmk_is_set(data_set->flags, pe_flag_show_scores),
rsc, __func__, rsc->allowed_nodes, data_set);
nodes = g_hash_table_get_values(rsc->allowed_nodes);
nodes = sort_nodes_by_weight(nodes, NULL, data_set);
rsc->children = g_list_sort_with_data(rsc->children, sort_clone_instance, data_set);
distribute_children(rsc, rsc->children, nodes, clone_data->clone_max, clone_data->clone_node_max, data_set);
g_list_free(nodes);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__set_instance_roles(rsc, data_set);
}
pe__clear_resource_flags(rsc, pe_rsc_provisional|pe_rsc_allocating);
pe_rsc_trace(rsc, "Done allocating %s", rsc->id);
return NULL;
}
static void
clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting,
gboolean * active)
{
GList *gIter = NULL;
if (rsc->children) {
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
clone_update_pseudo_status(child, stopping, starting, active);
}
return;
}
CRM_ASSERT(active != NULL);
CRM_ASSERT(starting != NULL);
CRM_ASSERT(stopping != NULL);
if (rsc->running_on) {
*active = TRUE;
}
gIter = rsc->actions;
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (*starting && *stopping) {
return;
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid);
continue;
} else if (!pcmk_any_flags_set(action->flags,
pe_action_pseudo|pe_action_runnable)) {
pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid);
continue;
} else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) {
pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid);
*stopping = TRUE;
} else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) {
if (!pcmk_is_set(action->flags, pe_action_runnable)) {
pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d",
action->uuid,
pcmk_is_set(action->flags, pe_action_runnable),
pcmk_is_set(action->flags, pe_action_pseudo));
} else {
pe_rsc_trace(rsc, "Starting due to: %s", action->uuid);
pe_rsc_trace(rsc, "%s run=%d, pseudo=%d",
action->uuid,
pcmk_is_set(action->flags, pe_action_runnable),
pcmk_is_set(action->flags, pe_action_pseudo));
*starting = TRUE;
}
}
}
}
static pe_action_t *
find_rsc_action(pe_resource_t *rsc, const char *task)
{
pe_action_t *match = NULL;
GList *actions = pe__resource_actions(rsc, NULL, task, FALSE);
for (GList *item = actions; item != NULL; item = item->next) {
pe_action_t *op = (pe_action_t *) item->data;
if (!pcmk_is_set(op->flags, pe_action_optional)) {
if (match != NULL) {
// More than one match, don't return any
match = NULL;
break;
}
match = op;
}
}
g_list_free(actions);
return match;
}
static void
child_ordering_constraints(pe_resource_t * rsc, pe_working_set_t * data_set)
{
pe_action_t *stop = NULL;
pe_action_t *start = NULL;
pe_action_t *last_stop = NULL;
pe_action_t *last_start = NULL;
GList *gIter = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
if (clone_data->ordered == FALSE) {
return;
}
/* we have to maintain a consistent sorted child list when building order constraints */
rsc->children = g_list_sort(rsc->children, sort_rsc_id);
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
stop = find_rsc_action(child, RSC_STOP);
if (stop) {
if (last_stop) {
/* child/child relative stop */
order_actions(stop, last_stop, pe_order_optional);
}
last_stop = stop;
}
start = find_rsc_action(child, RSC_START);
if (start) {
if (last_start) {
/* child/child relative start */
order_actions(last_start, start, pe_order_optional);
}
last_start = start;
}
}
}
void
clone_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
clone_create_pseudo_actions(rsc, rsc->children, &clone_data->start_notify, &clone_data->stop_notify,data_set);
child_ordering_constraints(rsc, data_set);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
create_promotable_actions(rsc, data_set);
}
}
void
clone_create_pseudo_actions(
pe_resource_t * rsc, GList *children, notify_data_t **start_notify, notify_data_t **stop_notify, pe_working_set_t * data_set)
{
gboolean child_active = FALSE;
gboolean child_starting = FALSE;
gboolean child_stopping = FALSE;
gboolean allow_dependent_migrations = TRUE;
pe_action_t *stop = NULL;
pe_action_t *stopped = NULL;
pe_action_t *start = NULL;
pe_action_t *started = NULL;
pe_rsc_trace(rsc, "Creating actions for %s", rsc->id);
for (GList *gIter = children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
gboolean starting = FALSE;
gboolean stopping = FALSE;
child_rsc->cmds->create_actions(child_rsc, data_set);
clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active);
if (stopping && starting) {
allow_dependent_migrations = FALSE;
}
child_stopping |= stopping;
child_starting |= starting;
}
/* start */
- start = create_pseudo_resource_op(rsc, RSC_START, !child_starting, TRUE, data_set);
- started = create_pseudo_resource_op(rsc, RSC_STARTED, !child_starting, FALSE, data_set);
+ start = pcmk__new_rsc_pseudo_action(rsc, RSC_START, !child_starting, true);
+ started = pcmk__new_rsc_pseudo_action(rsc, RSC_STARTED, !child_starting,
+ false);
started->priority = INFINITY;
if (child_active || child_starting) {
pe__set_action_flags(started, pe_action_runnable);
}
if (start_notify != NULL && *start_notify == NULL) {
*start_notify = create_notification_boundaries(rsc, RSC_START, start, started, data_set);
}
/* stop */
- stop = create_pseudo_resource_op(rsc, RSC_STOP, !child_stopping, TRUE, data_set);
- stopped = create_pseudo_resource_op(rsc, RSC_STOPPED, !child_stopping, TRUE, data_set);
+ stop = pcmk__new_rsc_pseudo_action(rsc, RSC_STOP, !child_stopping, true);
+ stopped = pcmk__new_rsc_pseudo_action(rsc, RSC_STOPPED, !child_stopping,
+ true);
stopped->priority = INFINITY;
if (allow_dependent_migrations) {
pe__set_action_flags(stop, pe_action_migrate_runnable);
}
if (stop_notify != NULL && *stop_notify == NULL) {
*stop_notify = create_notification_boundaries(rsc, RSC_STOP, stop, stopped, data_set);
if (start_notify && *start_notify && *stop_notify) {
order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional);
}
}
}
void
clone_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
{
pe_resource_t *last_rsc = NULL;
GList *gIter;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id);
pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START,
pe_order_optional, data_set);
pcmk__order_resource_actions(rsc, RSC_START, rsc, RSC_STARTED,
pe_order_runnable_left, data_set);
pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_STOPPED,
pe_order_runnable_left, data_set);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP,
pe_order_optional, data_set);
pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE,
pe_order_runnable_left, data_set);
}
if (clone_data->ordered) {
/* we have to maintain a consistent sorted child list when building order constraints */
rsc->children = g_list_sort(rsc->children, sort_rsc_id);
}
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->internal_constraints(child_rsc, data_set);
pcmk__order_starts(rsc, child_rsc,
pe_order_runnable_left|pe_order_implies_first_printed,
data_set);
pcmk__order_resource_actions(child_rsc, RSC_START, rsc, RSC_STARTED,
pe_order_implies_then_printed, data_set);
if (clone_data->ordered && last_rsc) {
pcmk__order_starts(last_rsc, child_rsc, pe_order_optional,
data_set);
}
pcmk__order_stops(rsc, child_rsc, pe_order_implies_first_printed,
data_set);
pcmk__order_resource_actions(child_rsc, RSC_STOP, rsc, RSC_STOPPED,
pe_order_implies_then_printed, data_set);
if (clone_data->ordered && last_rsc) {
pcmk__order_stops(child_rsc, last_rsc, pe_order_optional, data_set);
}
last_rsc = child_rsc;
}
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
promotable_constraints(rsc, data_set);
}
}
gboolean
is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current)
{
pe_node_t *node = NULL;
enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current);
CRM_CHECK(child_rsc && local_node, return FALSE);
if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
/* We only want instances that haven't failed */
node = child_rsc->fns->location(child_rsc, NULL, current);
}
if (filter != RSC_ROLE_UNKNOWN && next_role != filter) {
crm_trace("Filtered %s", child_rsc->id);
return FALSE;
}
if (node && (node->details == local_node->details)) {
return TRUE;
} else if (node) {
crm_trace("%s - %s vs %s", child_rsc->id, node->details->uname,
local_node->details->uname);
} else {
crm_trace("%s - not allocated %d", child_rsc->id, current);
}
return FALSE;
}
pe_resource_t *
find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc,
enum rsc_role_e filter, gboolean current,
pe_working_set_t *data_set)
{
pe_resource_t *pair = NULL;
GList *gIter = NULL;
GList *scratch = NULL;
pe_node_t *local_node = NULL;
local_node = local_child->fns->location(local_child, NULL, current);
if (local_node) {
return find_compatible_child_by_node(local_child, local_node, rsc, filter, current);
}
scratch = g_hash_table_get_values(local_child->allowed_nodes);
scratch = sort_nodes_by_weight(scratch, NULL, data_set);
gIter = scratch;
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
pair = find_compatible_child_by_node(local_child, node, rsc, filter, current);
if (pair) {
goto done;
}
}
pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id);
done:
g_list_free(scratch);
return pair;
}
void
clone_rsc_colocation_lh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
/* -- Never called --
*
* Instead we add the colocation constraints to the child and call from there
*/
CRM_ASSERT(FALSE);
}
void
clone_rsc_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
GList *gIter = NULL;
gboolean do_interleave = FALSE;
const char *interleave_s = NULL;
CRM_CHECK(constraint != NULL, return);
CRM_CHECK(dependent != NULL,
pe_err("dependent was NULL for %s", constraint->id); return);
CRM_CHECK(primary != NULL,
pe_err("primary was NULL for %s", constraint->id); return);
CRM_CHECK(dependent->variant == pe_native, return);
pe_rsc_trace(primary, "Processing constraint %s: %s -> %s %d",
constraint->id, dependent->id, primary->id, constraint->score);
if (pcmk_is_set(primary->flags, pe_rsc_promotable)) {
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
pe_rsc_trace(primary, "%s is still provisional", primary->id);
return;
} else if (constraint->primary_role == RSC_ROLE_UNKNOWN) {
pe_rsc_trace(primary, "Handling %s as a clone colocation",
constraint->id);
} else {
promotable_colocation_rh(dependent, primary, constraint, data_set);
return;
}
}
/* only the LHS side needs to be labeled as interleave */
interleave_s = g_hash_table_lookup(constraint->dependent->meta,
XML_RSC_ATTR_INTERLEAVE);
if (crm_is_true(interleave_s)
&& (constraint->dependent->variant > pe_group)) {
/* @TODO Do we actually care about multiple primary copies sharing a
* dependent copy anymore?
*/
if (copies_per_node(constraint->dependent) != copies_per_node(constraint->primary)) {
pcmk__config_err("Cannot interleave %s and %s because they do not "
"support the same number of instances per node",
constraint->dependent->id,
constraint->primary->id);
} else {
do_interleave = TRUE;
}
}
if (pcmk_is_set(primary->flags, pe_rsc_provisional)) {
pe_rsc_trace(primary, "%s is still provisional", primary->id);
return;
} else if (do_interleave) {
pe_resource_t *primary_instance = NULL;
primary_instance = find_compatible_child(dependent, primary,
RSC_ROLE_UNKNOWN, FALSE,
data_set);
if (primary_instance != NULL) {
pe_rsc_debug(primary, "Pairing %s with %s",
dependent->id, primary_instance->id);
dependent->cmds->rsc_colocation_lh(dependent, primary_instance,
constraint, data_set);
} else if (constraint->score >= INFINITY) {
crm_notice("Cannot pair %s with instance of %s",
dependent->id, primary->id);
pcmk__assign_resource(dependent, NULL, true);
} else {
pe_rsc_debug(primary, "Cannot pair %s with instance of %s",
dependent->id, primary->id);
}
return;
} else if (constraint->score >= INFINITY) {
GList *affected_nodes = NULL;
gIter = primary->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
pe_rsc_trace(primary, "Allowing %s: %s %d",
constraint->id, chosen->details->uname,
chosen->weight);
affected_nodes = g_list_prepend(affected_nodes, chosen);
}
}
node_list_exclude(dependent->allowed_nodes, affected_nodes, FALSE);
g_list_free(affected_nodes);
return;
}
gIter = primary->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->rsc_colocation_rh(dependent, child_rsc, constraint,
data_set);
}
}
enum action_tasks
clone_child_action(pe_action_t * action)
{
enum action_tasks result = no_action;
pe_resource_t *child = (pe_resource_t *) action->rsc->children->data;
if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) {
/* Find the action we're notifying about instead */
int stop = 0;
char *key = action->uuid;
int lpc = strlen(key);
for (; lpc > 0; lpc--) {
if (key[lpc] == '_' && stop == 0) {
stop = lpc;
} else if (key[lpc] == '_') {
char *task_mutable = NULL;
lpc++;
task_mutable = strdup(key + lpc);
task_mutable[stop - lpc] = 0;
crm_trace("Extracted action '%s' from '%s'", task_mutable, key);
result = get_complex_task(child, task_mutable, TRUE);
free(task_mutable);
break;
}
}
} else {
result = get_complex_task(child, action->task, TRUE);
}
return result;
}
#define pe__clear_action_summary_flags(flags, action, flag) do { \
flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Action summary", action->rsc->id, \
flags, flag, #flag); \
} while (0)
enum pe_action_flags
summary_action_flags(pe_action_t * action, GList *children, pe_node_t * node)
{
GList *gIter = NULL;
gboolean any_runnable = FALSE;
gboolean check_runnable = TRUE;
enum action_tasks task = clone_child_action(action);
enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo);
const char *task_s = task2text(task);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
pe_action_t *child_action = NULL;
pe_resource_t *child = (pe_resource_t *) gIter->data;
child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node);
pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id,
node ? node->details->uname : "none", child_action?child_action->uuid:"NA");
if (child_action) {
enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node);
if (pcmk_is_set(flags, pe_action_optional)
&& !pcmk_is_set(child_flags, pe_action_optional)) {
pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid,
child_action->uuid);
pe__clear_action_summary_flags(flags, action, pe_action_optional);
pe__clear_action_flags(action, pe_action_optional);
}
if (pcmk_is_set(child_flags, pe_action_runnable)) {
any_runnable = TRUE;
}
}
}
if (check_runnable && any_runnable == FALSE) {
pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid);
pe__clear_action_summary_flags(flags, action, pe_action_runnable);
if (node == NULL) {
pe__clear_action_flags(action, pe_action_runnable);
}
}
return flags;
}
enum pe_action_flags
clone_action_flags(pe_action_t * action, pe_node_t * node)
{
return summary_action_flags(action, action->rsc->children, node);
}
void
clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
{
GList *gIter = rsc->children;
pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id);
pcmk__apply_location(constraint, rsc);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->rsc_location(child_rsc, constraint);
}
}
void
clone_expand(pe_resource_t * rsc, pe_working_set_t * data_set)
{
GList *gIter = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
g_list_foreach(rsc->actions, (GFunc) rsc->cmds->action_flags, NULL);
if (clone_data->start_notify) {
collect_notification_data(rsc, TRUE, TRUE, clone_data->start_notify);
pcmk__create_notification_keys(rsc, clone_data->start_notify, data_set);
create_notifications(rsc, clone_data->start_notify, data_set);
}
if (clone_data->stop_notify) {
collect_notification_data(rsc, TRUE, TRUE, clone_data->stop_notify);
pcmk__create_notification_keys(rsc, clone_data->stop_notify, data_set);
create_notifications(rsc, clone_data->stop_notify, data_set);
}
if (clone_data->promote_notify) {
collect_notification_data(rsc, TRUE, TRUE, clone_data->promote_notify);
pcmk__create_notification_keys(rsc, clone_data->promote_notify, data_set);
create_notifications(rsc, clone_data->promote_notify, data_set);
}
if (clone_data->demote_notify) {
collect_notification_data(rsc, TRUE, TRUE, clone_data->demote_notify);
pcmk__create_notification_keys(rsc, clone_data->demote_notify, data_set);
create_notifications(rsc, clone_data->demote_notify, data_set);
}
/* Now that the notifcations have been created we can expand the children */
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->cmds->expand(child_rsc, data_set);
}
native_expand(rsc, data_set);
/* The notifications are in the graph now, we can destroy the notify_data */
free_notification_data(clone_data->demote_notify);
clone_data->demote_notify = NULL;
free_notification_data(clone_data->stop_notify);
clone_data->stop_notify = NULL;
free_notification_data(clone_data->start_notify);
clone_data->start_notify = NULL;
free_notification_data(clone_data->promote_notify);
clone_data->promote_notify = NULL;
}
// Check whether a resource or any of its children is known on node
static bool
rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node)
{
if (rsc->children) {
for (GList *child_iter = rsc->children; child_iter != NULL;
child_iter = child_iter->next) {
pe_resource_t *child = (pe_resource_t *) child_iter->data;
if (rsc_known_on(child, node)) {
return TRUE;
}
}
} else if (rsc->known_on) {
GHashTableIter iter;
pe_node_t *known_node = NULL;
g_hash_table_iter_init(&iter, rsc->known_on);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) {
if (node->details == known_node->details) {
return TRUE;
}
}
}
return FALSE;
}
// Look for an instance of clone that is known on node
static pe_resource_t *
find_instance_on(const pe_resource_t *clone, const pe_node_t *node)
{
for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
if (rsc_known_on(child, node)) {
return child;
}
}
return NULL;
}
// For unique clones, probe each instance separately
static gboolean
probe_unique_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete,
gboolean force, pe_working_set_t *data_set)
{
gboolean any_created = FALSE;
for (GList *child_iter = rsc->children; child_iter != NULL;
child_iter = child_iter->next) {
pe_resource_t *child = (pe_resource_t *) child_iter->data;
any_created |= child->cmds->create_probe(child, node, complete, force,
data_set);
}
return any_created;
}
// For anonymous clones, only a single instance needs to be probed
static gboolean
probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node,
pe_action_t *complete, gboolean force,
pe_working_set_t *data_set)
{
// First, check if we probed an instance on this node last time
pe_resource_t *child = find_instance_on(rsc, node);
// Otherwise, check if we plan to start an instance on this node
if (child == NULL) {
for (GList *child_iter = rsc->children; child_iter && !child;
child_iter = child_iter->next) {
pe_node_t *local_node = NULL;
pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data;
if (child_rsc) { /* make clang analyzer happy */
local_node = child_rsc->fns->location(child_rsc, NULL, FALSE);
if (local_node && (local_node->details == node->details)) {
child = child_rsc;
}
}
}
}
// Otherwise, use the first clone instance
if (child == NULL) {
child = rsc->children->data;
}
CRM_ASSERT(child);
return child->cmds->create_probe(child, node, complete, force, data_set);
}
gboolean
clone_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete,
gboolean force, pe_working_set_t * data_set)
{
gboolean any_created = FALSE;
CRM_ASSERT(rsc);
rsc->children = g_list_sort(rsc->children, sort_rsc_id);
if (rsc->children == NULL) {
pe_warn("Clone %s has no children", rsc->id);
return FALSE;
}
if (rsc->exclusive_discover) {
pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) {
/* exclusive discover is enabled and this node is not marked
* as a node this resource should be discovered on
*
* remove the node from allowed_nodes so that the
* notification contains only nodes that we might ever run
* on
*/
g_hash_table_remove(rsc->allowed_nodes, node->details->id);
/* Bit of a shortcut - might as well take it */
return FALSE;
}
}
if (pcmk_is_set(rsc->flags, pe_rsc_unique)) {
any_created = probe_unique_clone(rsc, node, complete, force, data_set);
} else {
any_created = probe_anonymous_clone(rsc, node, complete, force,
data_set);
}
return any_created;
}
void
clone_append_meta(pe_resource_t * rsc, xmlNode * xml)
{
char *name = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
name = crm_meta_name(XML_RSC_ATTR_UNIQUE);
crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_unique));
free(name);
name = crm_meta_name(XML_RSC_ATTR_NOTIFY);
crm_xml_add(xml, name, pe__rsc_bool_str(rsc, pe_rsc_notify));
free(name);
name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX);
crm_xml_add_int(xml, name, clone_data->clone_max);
free(name);
name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX);
crm_xml_add_int(xml, name, clone_data->clone_node_max);
free(name);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX);
crm_xml_add_int(xml, name, clone_data->promoted_max);
free(name);
name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX);
crm_xml_add_int(xml, name, clone_data->promoted_node_max);
free(name);
/* @COMPAT Maintain backward compatibility with resource agents that
* expect the old names (deprecated since 2.0.0).
*/
name = crm_meta_name(PCMK_XE_PROMOTED_MAX_LEGACY);
crm_xml_add_int(xml, name, clone_data->promoted_max);
free(name);
name = crm_meta_name(PCMK_XE_PROMOTED_NODE_MAX_LEGACY);
crm_xml_add_int(xml, name, clone_data->promoted_node_max);
free(name);
}
}
diff --git a/lib/pacemaker/pcmk_sched_promotable.c b/lib/pacemaker/pcmk_sched_promotable.c
index 6bde60d687..2e91154c4c 100644
--- a/lib/pacemaker/pcmk_sched_promotable.c
+++ b/lib/pacemaker/pcmk_sched_promotable.c
@@ -1,1050 +1,1053 @@
/*
* Copyright 2004-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
#define VARIANT_CLONE 1
#include <lib/pengine/variant.h>
extern gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set);
extern bool pcmk__is_daemon;
static void
child_promoting_constraints(clone_variant_data_t * clone_data, enum pe_ordering type,
pe_resource_t * rsc, pe_resource_t * child, pe_resource_t * last,
pe_working_set_t * data_set)
{
if (child == NULL) {
if (clone_data->ordered && last != NULL) {
pe_rsc_trace(rsc, "Ordered version (last node)");
/* last child promote before promoted started */
pcmk__order_resource_actions(last, RSC_PROMOTE, rsc, RSC_PROMOTED,
type, data_set);
}
return;
}
/* child promote before global promoted */
pcmk__order_resource_actions(child, RSC_PROMOTE, rsc, RSC_PROMOTED, type,
data_set);
/* global promote before child promote */
pcmk__order_resource_actions(rsc, RSC_PROMOTE, child, RSC_PROMOTE, type,
data_set);
if (clone_data->ordered) {
pe_rsc_trace(rsc, "Ordered version");
if (last == NULL) {
/* global promote before first child promote */
last = rsc;
}
/* else: child/child relative promote */
pcmk__order_starts(last, child, type, data_set);
pcmk__order_resource_actions(last, RSC_PROMOTE, child, RSC_PROMOTE,
type, data_set);
} else {
pe_rsc_trace(rsc, "Un-ordered version");
}
}
static void
child_demoting_constraints(clone_variant_data_t * clone_data, enum pe_ordering type,
pe_resource_t * rsc, pe_resource_t * child, pe_resource_t * last,
pe_working_set_t * data_set)
{
if (child == NULL) {
if (clone_data->ordered && last != NULL) {
pe_rsc_trace(rsc, "Ordered version (last node)");
/* global demote before first child demote */
pcmk__order_resource_actions(rsc, RSC_DEMOTE, last, RSC_DEMOTE,
pe_order_optional, data_set);
}
return;
}
/* child demote before global demoted */
pcmk__order_resource_actions(child, RSC_DEMOTE, rsc, RSC_DEMOTED,
pe_order_implies_then_printed, data_set);
/* global demote before child demote */
pcmk__order_resource_actions(rsc, RSC_DEMOTE, child, RSC_DEMOTE,
pe_order_implies_first_printed, data_set);
if (clone_data->ordered && last != NULL) {
pe_rsc_trace(rsc, "Ordered version");
/* child/child relative demote */
pcmk__order_resource_actions(child, RSC_DEMOTE, last, RSC_DEMOTE, type,
data_set);
} else if (clone_data->ordered) {
pe_rsc_trace(rsc, "Ordered version (1st node)");
/* first child stop before global stopped */
pcmk__order_resource_actions(child, RSC_DEMOTE, rsc, RSC_DEMOTED, type,
data_set);
} else {
pe_rsc_trace(rsc, "Un-ordered version");
}
}
static void
check_promotable_actions(pe_resource_t *rsc, gboolean *demoting,
gboolean *promoting)
{
GList *gIter = NULL;
if (rsc->children) {
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
check_promotable_actions(child, demoting, promoting);
}
return;
}
CRM_ASSERT(demoting != NULL);
CRM_ASSERT(promoting != NULL);
gIter = rsc->actions;
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (*promoting && *demoting) {
return;
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
continue;
} else if (pcmk__str_eq(RSC_DEMOTE, action->task, pcmk__str_casei)) {
*demoting = TRUE;
} else if (pcmk__str_eq(RSC_PROMOTE, action->task, pcmk__str_casei)) {
*promoting = TRUE;
}
}
}
static void
apply_promoted_location(pe_resource_t *child, GList *location_constraints,
pe_node_t *chosen)
{
CRM_CHECK(child && chosen, return);
for (GList *gIter = location_constraints; gIter; gIter = gIter->next) {
pe_node_t *cons_node = NULL;
pe__location_t *cons = gIter->data;
if (cons->role_filter == RSC_ROLE_PROMOTED) {
pe_rsc_trace(child, "Applying %s to %s", cons->id, child->id);
cons_node = pe_find_node_id(cons->node_list_rh, chosen->details->id);
}
if (cons_node != NULL) {
int new_priority = pe__add_scores(child->priority,
cons_node->weight);
pe_rsc_trace(child, "\t%s[%s]: %d -> %d (%d)",
child->id, cons_node->details->uname, child->priority,
new_priority, cons_node->weight);
child->priority = new_priority;
}
}
}
static pe_node_t *
guest_location(pe_node_t *guest_node)
{
pe_resource_t *guest = guest_node->details->remote_rsc->container;
return guest->fns->location(guest, NULL, FALSE);
}
static pe_node_t *
node_to_be_promoted_on(pe_resource_t *rsc)
{
pe_node_t *node = NULL;
pe_node_t *local_node = NULL;
pe_resource_t *parent = uber_parent(rsc);
clone_variant_data_t *clone_data = NULL;
#if 0
enum rsc_role_e role = RSC_ROLE_UNKNOWN;
role = rsc->fns->state(rsc, FALSE);
crm_info("%s role: %s", rsc->id, role2text(role));
#endif
if (rsc->children) {
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
if (node_to_be_promoted_on(child) == NULL) {
pe_rsc_trace(rsc, "Child %s of %s can't be promoted", child->id, rsc->id);
return NULL;
}
}
}
node = rsc->fns->location(rsc, NULL, FALSE);
if (node == NULL) {
pe_rsc_trace(rsc, "%s cannot be promoted: not allocated", rsc->id);
return NULL;
} else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
if (rsc->fns->state(rsc, TRUE) == RSC_ROLE_PROMOTED) {
crm_notice("Forcing unmanaged instance %s to remain promoted on %s",
rsc->id, node->details->uname);
} else {
return NULL;
}
} else if (rsc->priority < 0) {
pe_rsc_trace(rsc, "%s cannot be promoted: preference: %d",
rsc->id, rsc->priority);
return NULL;
} else if (can_run_resources(node) == FALSE) {
crm_trace("Node can't run any resources: %s", node->details->uname);
return NULL;
/* @TODO It's possible this check should be done in can_run_resources()
* instead. We should investigate all its callers to figure out whether that
* would be a good idea.
*/
} else if (pe__is_guest_node(node) && (guest_location(node) == NULL)) {
pe_rsc_trace(rsc, "%s cannot be promoted: guest %s not allocated",
rsc->id, node->details->remote_rsc->container->id);
return NULL;
}
get_clone_variant_data(clone_data, parent);
local_node = pe_hash_table_lookup(parent->allowed_nodes, node->details->id);
if (local_node == NULL) {
crm_err("%s cannot run on %s: node not allowed", rsc->id, node->details->uname);
return NULL;
} else if ((local_node->count < clone_data->promoted_node_max)
|| !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
return local_node;
} else {
pe_rsc_trace(rsc, "%s cannot be promoted on %s: node full",
rsc->id, node->details->uname);
}
return NULL;
}
static gint
sort_promotable_instance(gconstpointer a, gconstpointer b, gpointer data_set)
{
int rc;
enum rsc_role_e role1 = RSC_ROLE_UNKNOWN;
enum rsc_role_e role2 = RSC_ROLE_UNKNOWN;
const pe_resource_t *resource1 = (const pe_resource_t *)a;
const pe_resource_t *resource2 = (const pe_resource_t *)b;
CRM_ASSERT(resource1 != NULL);
CRM_ASSERT(resource2 != NULL);
role1 = resource1->fns->state(resource1, TRUE);
role2 = resource2->fns->state(resource2, TRUE);
rc = sort_rsc_index(a, b);
if (rc != 0) {
crm_trace("%s %c %s (index)", resource1->id, rc < 0 ? '<' : '>', resource2->id);
return rc;
}
if (role1 > role2) {
crm_trace("%s %c %s (role)", resource1->id, '<', resource2->id);
return -1;
} else if (role1 < role2) {
crm_trace("%s %c %s (role)", resource1->id, '>', resource2->id);
return 1;
}
return sort_clone_instance(a, b, data_set);
}
static void
promotion_order(pe_resource_t *rsc, pe_working_set_t *data_set)
{
GList *gIter = NULL;
pe_node_t *node = NULL;
pe_node_t *chosen = NULL;
clone_variant_data_t *clone_data = NULL;
char score[33];
size_t len = sizeof(score);
get_clone_variant_data(clone_data, rsc);
if (clone_data->added_promoted_constraints) {
return;
}
clone_data->added_promoted_constraints = true;
pe_rsc_trace(rsc, "Merging weights for %s", rsc->id);
pe__set_resource_flags(rsc, pe_rsc_merging);
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
pe_rsc_trace(rsc, "Sort index: %s = %d", child->id, child->sort_index);
}
pe__show_node_weights(true, rsc, "Before", rsc->allowed_nodes, data_set);
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
chosen = child->fns->location(child, NULL, FALSE);
if (chosen == NULL || child->sort_index < 0) {
pe_rsc_trace(rsc, "Skipping %s", child->id);
continue;
}
node = (pe_node_t *) pe_hash_table_lookup(rsc->allowed_nodes, chosen->details->id);
CRM_ASSERT(node != NULL);
// Add promotion preferences and rsc_location scores when role=Promoted
score2char_stack(child->sort_index, score, len);
pe_rsc_trace(rsc, "Adding %s to %s from %s", score,
node->details->uname, child->id);
node->weight = pe__add_scores(child->sort_index, node->weight);
}
pe__show_node_weights(true, rsc, "Middle", rsc->allowed_nodes, data_set);
gIter = rsc->rsc_cons;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
/* (Re-)add location preferences of resources that a promoted instance
* should/must be colocated with.
*/
if (constraint->dependent_role == RSC_ROLE_PROMOTED) {
enum pe_weights flags = constraint->score == INFINITY ? 0 : pe_weights_rollback;
pe_rsc_trace(rsc, "RHS: %s with %s: %d",
constraint->dependent->id, constraint->primary->id,
constraint->score);
rsc->allowed_nodes = constraint->primary->cmds->merge_weights(
constraint->primary, rsc->id, rsc->allowed_nodes,
constraint->node_attribute,
constraint->score / (float) INFINITY, flags);
}
}
gIter = rsc->rsc_cons_lhs;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
if (!pcmk__colocation_has_influence(constraint, NULL)) {
continue;
}
/* (Re-)add location preferences of resources that wish to be colocated
* with a promoted instance.
*/
if (constraint->primary_role == RSC_ROLE_PROMOTED) {
pe_rsc_trace(rsc, "LHS: %s with %s: %d",
constraint->dependent->id, constraint->primary->id,
constraint->score);
rsc->allowed_nodes = constraint->dependent->cmds->merge_weights(
constraint->dependent, rsc->id, rsc->allowed_nodes,
constraint->node_attribute,
constraint->score / (float) INFINITY,
pe_weights_rollback|pe_weights_positive);
}
}
gIter = rsc->rsc_tickets;
for (; gIter != NULL; gIter = gIter->next) {
rsc_ticket_t *rsc_ticket = (rsc_ticket_t *) gIter->data;
if ((rsc_ticket->role_lh == RSC_ROLE_PROMOTED)
&& (rsc_ticket->ticket->granted == FALSE || rsc_ticket->ticket->standby)) {
resource_location(rsc, NULL, -INFINITY, "__stateful_without_ticket__", data_set);
}
}
pe__show_node_weights(true, rsc, "After", rsc->allowed_nodes, data_set);
/* write them back and sort */
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
chosen = child->fns->location(child, NULL, FALSE);
if (!pcmk_is_set(child->flags, pe_rsc_managed)
&& (child->next_role == RSC_ROLE_PROMOTED)) {
child->sort_index = INFINITY;
} else if (chosen == NULL || child->sort_index < 0) {
pe_rsc_trace(rsc, "%s: %d", child->id, child->sort_index);
} else {
node = (pe_node_t *) pe_hash_table_lookup(rsc->allowed_nodes, chosen->details->id);
CRM_ASSERT(node != NULL);
child->sort_index = node->weight;
}
pe_rsc_trace(rsc, "Set sort index: %s = %d", child->id, child->sort_index);
}
rsc->children = g_list_sort_with_data(rsc->children,
sort_promotable_instance, data_set);
pe__clear_resource_flags(rsc, pe_rsc_merging);
}
static gboolean
filter_anonymous_instance(pe_resource_t *rsc, const pe_node_t *node)
{
GList *rIter = NULL;
char *key = clone_strip(rsc->id);
pe_resource_t *parent = uber_parent(rsc);
for (rIter = parent->children; rIter; rIter = rIter->next) {
/* If there is an active instance on the node, only it receives the
* promotion score. Use ->find_rsc() in case this is a cloned group.
*/
pe_resource_t *child = rIter->data;
pe_resource_t *active = parent->fns->find_rsc(child, key, node, pe_find_clone|pe_find_current);
if(rsc == active) {
pe_rsc_trace(rsc, "Found %s for %s active on %s: done", active->id, key, node->details->uname);
free(key);
return TRUE;
} else if(active) {
pe_rsc_trace(rsc, "Found %s for %s on %s: not %s", active->id, key, node->details->uname, rsc->id);
free(key);
return FALSE;
} else {
pe_rsc_trace(rsc, "%s on %s: not active", key, node->details->uname);
}
}
for (rIter = parent->children; rIter; rIter = rIter->next) {
pe_resource_t *child = rIter->data;
/*
* We know it's not running, but any score will still count if
* the instance has been probed on $node
*
* Again use ->find_rsc() because we might be a cloned group
* and knowing that other members of the group are known here
* implies nothing
*/
rsc = parent->fns->find_rsc(child, key, NULL, pe_find_clone);
CRM_LOG_ASSERT(rsc);
if(rsc) {
pe_rsc_trace(rsc, "Checking %s for %s on %s", rsc->id, key, node->details->uname);
if (g_hash_table_lookup(rsc->known_on, node->details->id)) {
free(key);
return TRUE;
}
}
}
free(key);
return FALSE;
}
static const char *
lookup_promotion_score(pe_resource_t *rsc, const pe_node_t *node, const char *name)
{
const char *attr_value = NULL;
if (node && name) {
char *attr_name = pcmk_promotion_score_name(name);
attr_value = pe_node_attribute_calculated(node, attr_name, rsc);
free(attr_name);
}
return attr_value;
}
static int
promotion_score(pe_resource_t *rsc, const pe_node_t *node, int not_set_value)
{
char *name = rsc->id;
const char *attr_value = NULL;
int score = not_set_value;
pe_node_t *match = NULL;
CRM_CHECK(node != NULL, return not_set_value);
if (rsc->children) {
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
int c_score = promotion_score(child, node, not_set_value);
if (score == not_set_value) {
score = c_score;
} else {
score += c_score;
}
}
return score;
}
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)
&& filter_anonymous_instance(rsc, node)) {
pe_rsc_trace(rsc, "Anonymous clone %s is allowed on %s", rsc->id, node->details->uname);
} else if (rsc->running_on || g_hash_table_size(rsc->known_on)) {
/* If we've probed and/or started the resource anywhere, consider
* promotion scores only from nodes where we know the status. However,
* if the status of all nodes is unknown (e.g. cluster startup),
* skip this code, to make sure we take into account any permanent
* promotion scores set previously.
*/
pe_node_t *known = pe_hash_table_lookup(rsc->known_on, node->details->id);
match = pe_find_node_id(rsc->running_on, node->details->id);
if ((match == NULL) && (known == NULL)) {
pe_rsc_trace(rsc, "skipping %s (aka. %s) promotion score on %s because inactive",
rsc->id, rsc->clone_name, node->details->uname);
return score;
}
}
match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (match == NULL) {
return score;
} else if (match->weight < 0) {
pe_rsc_trace(rsc, "%s on %s has score: %d - ignoring",
rsc->id, match->details->uname, match->weight);
return score;
}
if (rsc->clone_name) {
/* Use the name the lrm knows this resource as,
* since that's what crm_attribute --promotion would have used
*/
name = rsc->clone_name;
}
attr_value = lookup_promotion_score(rsc, node, name);
pe_rsc_trace(rsc, "promotion score for %s on %s = %s",
name, node->details->uname, crm_str(attr_value));
if ((attr_value == NULL) && !pcmk_is_set(rsc->flags, pe_rsc_unique)) {
/* If we don't have any LRM history yet, we won't have clone_name -- in
* that case, for anonymous clones, try the resource name without any
* instance number.
*/
name = clone_strip(rsc->id);
if (strcmp(rsc->id, name)) {
attr_value = lookup_promotion_score(rsc, node, name);
pe_rsc_trace(rsc, "stripped promotion score for %s on %s = %s",
name, node->details->uname, crm_str(attr_value));
}
free(name);
}
if (attr_value != NULL) {
score = char2score(attr_value);
}
return score;
}
void
pcmk__add_promotion_scores(pe_resource_t *rsc)
{
int score, new_score;
GList *gIter = rsc->children;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
if (clone_data->added_promotion_scores) {
/* Make sure we only do this once */
return;
}
clone_data->added_promotion_scores = true;
for (; gIter != NULL; gIter = gIter->next) {
GHashTableIter iter;
pe_node_t *node = NULL;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
g_hash_table_iter_init(&iter, child_rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (can_run_resources(node) == FALSE) {
/* This node will never be promoted, so don't apply the
* promotion score, as that may lead to clone shuffling.
*/
continue;
}
score = promotion_score(child_rsc, node, 0);
if (score > 0) {
new_score = pe__add_scores(node->weight, score);
if (new_score != node->weight) {
pe_rsc_trace(rsc, "\t%s: Updating preference for %s (%d->%d)",
child_rsc->id, node->details->uname, node->weight, new_score);
node->weight = new_score;
}
}
new_score = QB_MAX(child_rsc->priority, score);
if (new_score != child_rsc->priority) {
pe_rsc_trace(rsc, "\t%s: Updating priority (%d->%d)",
child_rsc->id, child_rsc->priority, new_score);
child_rsc->priority = new_score;
}
}
}
}
static void
set_role_unpromoted(pe_resource_t *rsc, bool current)
{
GList *gIter = rsc->children;
if (current) {
if (rsc->role == RSC_ROLE_STARTED) {
rsc->role = RSC_ROLE_UNPROMOTED;
}
} else {
GList *allocated = NULL;
rsc->fns->location(rsc, &allocated, FALSE);
pe__set_next_role(rsc, (allocated? RSC_ROLE_UNPROMOTED : RSC_ROLE_STOPPED),
"unpromoted instance");
g_list_free(allocated);
}
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
set_role_unpromoted(child_rsc, current);
}
}
static void
set_role_promoted(pe_resource_t *rsc, gpointer user_data)
{
if (rsc->next_role == RSC_ROLE_UNKNOWN) {
pe__set_next_role(rsc, RSC_ROLE_PROMOTED, "promoted instance");
}
g_list_foreach(rsc->children, (GFunc) set_role_promoted, NULL);
}
pe_node_t *
pcmk__set_instance_roles(pe_resource_t *rsc, pe_working_set_t *data_set)
{
int promoted = 0;
GList *gIter = NULL;
GList *gIter2 = NULL;
GHashTableIter iter;
pe_node_t *node = NULL;
pe_node_t *chosen = NULL;
enum rsc_role_e next_role = RSC_ROLE_UNKNOWN;
char score[33];
size_t len = sizeof(score);
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
// Repurpose count to track the number of promoted instances allocated
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
node->count = 0;
}
/*
* assign priority
*/
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
GList *list = NULL;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_rsc_trace(rsc, "Assigning priority for %s: %s", child_rsc->id,
role2text(child_rsc->next_role));
if (child_rsc->fns->state(child_rsc, TRUE) == RSC_ROLE_STARTED) {
set_role_unpromoted(child_rsc, true);
}
chosen = child_rsc->fns->location(child_rsc, &list, FALSE);
if (pcmk__list_of_multiple(list)) {
pcmk__config_err("Cannot promote non-colocated child %s",
child_rsc->id);
}
g_list_free(list);
if (chosen == NULL) {
continue;
}
next_role = child_rsc->fns->state(child_rsc, FALSE);
switch (next_role) {
case RSC_ROLE_STARTED:
case RSC_ROLE_UNKNOWN:
/*
* Default to -1 if no value is set
*
* This allows instances eligible for promotion to be specified
* based solely on rsc_location constraints,
* but prevents anyone from being promoted if
* neither a constraint nor a promotion score is present
*/
child_rsc->priority = promotion_score(child_rsc, chosen, -1);
break;
case RSC_ROLE_UNPROMOTED:
case RSC_ROLE_STOPPED:
child_rsc->priority = -INFINITY;
break;
case RSC_ROLE_PROMOTED:
/* We will arrive here if we're re-creating actions after a stonith
*/
break;
default:
CRM_CHECK(FALSE /* unhandled */ ,
crm_err("Unknown resource role: %d for %s", next_role, child_rsc->id));
}
apply_promoted_location(child_rsc, child_rsc->rsc_location, chosen);
apply_promoted_location(child_rsc, rsc->rsc_location, chosen);
for (gIter2 = child_rsc->rsc_cons; gIter2 != NULL; gIter2 = gIter2->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) gIter2->data;
child_rsc->cmds->rsc_colocation_lh(child_rsc, cons->primary, cons,
data_set);
}
child_rsc->sort_index = child_rsc->priority;
pe_rsc_trace(rsc, "Assigning priority for %s: %d", child_rsc->id, child_rsc->priority);
if (next_role == RSC_ROLE_PROMOTED) {
child_rsc->sort_index = INFINITY;
}
}
pe__show_node_weights(true, rsc, "Pre merge", rsc->allowed_nodes, data_set);
promotion_order(rsc, data_set);
// Choose the first N eligible instances to be promoted
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
score2char_stack(child_rsc->sort_index, score, len);
chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
if (pcmk_is_set(data_set->flags, pe_flag_show_scores) && !pcmk__is_daemon) {
if (data_set->priv != NULL) {
pcmk__output_t *out = data_set->priv;
out->message(out, "promotion-score", child_rsc, chosen, score);
}
} else {
pe_rsc_trace(rsc, "%s promotion score on %s: %s", child_rsc->id,
(chosen? chosen->details->uname : "none"), score);
}
chosen = NULL; /* nuke 'chosen' so that we don't promote more than the
* required number of instances
*/
if (child_rsc->sort_index < 0) {
pe_rsc_trace(rsc, "Not supposed to promote child: %s", child_rsc->id);
} else if ((promoted < clone_data->promoted_max)
|| !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
chosen = node_to_be_promoted_on(child_rsc);
}
pe_rsc_debug(rsc, "%s promotion score: %d", child_rsc->id, child_rsc->priority);
if (chosen == NULL) {
set_role_unpromoted(child_rsc, false);
continue;
} else if ((child_rsc->role < RSC_ROLE_PROMOTED)
&& !pcmk_is_set(data_set->flags, pe_flag_have_quorum)
&& data_set->no_quorum_policy == no_quorum_freeze) {
crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze",
child_rsc->id, role2text(child_rsc->role), role2text(child_rsc->next_role));
set_role_unpromoted(child_rsc, false);
continue;
}
chosen->count++;
pe_rsc_info(rsc, "Promoting %s (%s %s)",
child_rsc->id, role2text(child_rsc->role), chosen->details->uname);
set_role_promoted(child_rsc, NULL);
promoted++;
}
pe_rsc_info(rsc, "%s: Promoted %d instances of a possible %d",
rsc->id, promoted, clone_data->promoted_max);
return NULL;
}
void
create_promotable_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
{
pe_action_t *action = NULL;
GList *gIter = rsc->children;
pe_action_t *action_complete = NULL;
gboolean any_promoting = FALSE;
gboolean any_demoting = FALSE;
pe_resource_t *last_promote_rsc = NULL;
pe_resource_t *last_demote_rsc = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
pe_rsc_debug(rsc, "Creating actions for %s", rsc->id);
for (; gIter != NULL; gIter = gIter->next) {
gboolean child_promoting = FALSE;
gboolean child_demoting = FALSE;
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_rsc_trace(rsc, "Creating actions for %s", child_rsc->id);
child_rsc->cmds->create_actions(child_rsc, data_set);
check_promotable_actions(child_rsc, &child_demoting, &child_promoting);
any_demoting = any_demoting || child_demoting;
any_promoting = any_promoting || child_promoting;
pe_rsc_trace(rsc, "Created actions for %s: %d %d", child_rsc->id, child_promoting,
child_demoting);
}
/* promote */
- action = create_pseudo_resource_op(rsc, RSC_PROMOTE, !any_promoting, TRUE, data_set);
- action_complete = create_pseudo_resource_op(rsc, RSC_PROMOTED, !any_promoting, TRUE, data_set);
+ action = pcmk__new_rsc_pseudo_action(rsc, RSC_PROMOTE, !any_promoting,
+ true);
+ action_complete = pcmk__new_rsc_pseudo_action(rsc, RSC_PROMOTED,
+ !any_promoting, true);
action_complete->priority = INFINITY;
child_promoting_constraints(clone_data, pe_order_optional,
rsc, NULL, last_promote_rsc, data_set);
if (clone_data->promote_notify == NULL) {
clone_data->promote_notify =
create_notification_boundaries(rsc, RSC_PROMOTE, action, action_complete, data_set);
}
/* demote */
- action = create_pseudo_resource_op(rsc, RSC_DEMOTE, !any_demoting, TRUE, data_set);
- action_complete = create_pseudo_resource_op(rsc, RSC_DEMOTED, !any_demoting, TRUE, data_set);
+ action = pcmk__new_rsc_pseudo_action(rsc, RSC_DEMOTE, !any_demoting, true);
+ action_complete = pcmk__new_rsc_pseudo_action(rsc, RSC_DEMOTED,
+ !any_demoting, true);
action_complete->priority = INFINITY;
child_demoting_constraints(clone_data, pe_order_optional, rsc, NULL, last_demote_rsc, data_set);
if (clone_data->demote_notify == NULL) {
clone_data->demote_notify =
create_notification_boundaries(rsc, RSC_DEMOTE, action, action_complete, data_set);
if (clone_data->promote_notify) {
/* If we ever wanted groups to have notifications we'd need to move this to native_internal_constraints() one day
* Requires exposing *_notify
*/
order_actions(clone_data->stop_notify->post_done, clone_data->promote_notify->pre,
pe_order_optional);
order_actions(clone_data->start_notify->post_done, clone_data->promote_notify->pre,
pe_order_optional);
order_actions(clone_data->demote_notify->post_done, clone_data->promote_notify->pre,
pe_order_optional);
order_actions(clone_data->demote_notify->post_done, clone_data->start_notify->pre,
pe_order_optional);
order_actions(clone_data->demote_notify->post_done, clone_data->stop_notify->pre,
pe_order_optional);
}
}
/* restore the correct priority */
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
child_rsc->priority = rsc->priority;
}
}
void
promote_demote_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
{
/* global stopped before start */
pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_START,
pe_order_optional, data_set);
/* global stopped before promote */
pcmk__order_resource_actions(rsc, RSC_STOPPED, rsc, RSC_PROMOTE,
pe_order_optional, data_set);
/* global demoted before start */
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_START,
pe_order_optional, data_set);
/* global started before promote */
pcmk__order_resource_actions(rsc, RSC_STARTED, rsc, RSC_PROMOTE,
pe_order_optional, data_set);
/* global demoted before stop */
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_STOP,
pe_order_optional, data_set);
/* global demote before demoted */
pcmk__order_resource_actions(rsc, RSC_DEMOTE, rsc, RSC_DEMOTED,
pe_order_optional, data_set);
/* global demoted before promote */
pcmk__order_resource_actions(rsc, RSC_DEMOTED, rsc, RSC_PROMOTE,
pe_order_optional, data_set);
}
void
promotable_constraints(pe_resource_t * rsc, pe_working_set_t * data_set)
{
GList *gIter = rsc->children;
pe_resource_t *last_rsc = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
promote_demote_constraints(rsc, data_set);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
/* child demote before promote */
pcmk__order_resource_actions(child_rsc, RSC_DEMOTE, child_rsc,
RSC_PROMOTE, pe_order_optional, data_set);
child_promoting_constraints(clone_data, pe_order_optional,
rsc, child_rsc, last_rsc, data_set);
child_demoting_constraints(clone_data, pe_order_optional,
rsc, child_rsc, last_rsc, data_set);
last_rsc = child_rsc;
}
}
static void
node_hash_update_one(GHashTable * hash, pe_node_t * other, const char *attr, int score)
{
GHashTableIter iter;
pe_node_t *node = NULL;
const char *value = NULL;
if (other == NULL) {
return;
} else if (attr == NULL) {
attr = CRM_ATTR_UNAME;
}
value = pe_node_attribute_raw(other, attr);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
const char *tmp = pe_node_attribute_raw(node, attr);
if (pcmk__str_eq(value, tmp, pcmk__str_casei)) {
crm_trace("%s: %d + %d", node->details->uname, node->weight, other->weight);
node->weight = pe__add_scores(node->weight, score);
}
}
}
void
promotable_colocation_rh(pe_resource_t *dependent, pe_resource_t *primary,
pcmk__colocation_t *constraint,
pe_working_set_t *data_set)
{
GList *gIter = NULL;
if (pcmk_is_set(dependent->flags, pe_rsc_provisional)) {
GList *affected_nodes = NULL;
for (gIter = primary->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, FALSE);
pe_rsc_trace(primary, "Processing: %s", child_rsc->id);
if ((chosen != NULL) && (next_role == constraint->primary_role)) {
pe_rsc_trace(primary, "Applying: %s %s %s %d", child_rsc->id,
role2text(next_role), chosen->details->uname, constraint->score);
if (constraint->score < INFINITY) {
node_hash_update_one(dependent->allowed_nodes, chosen,
constraint->node_attribute, constraint->score);
}
affected_nodes = g_list_prepend(affected_nodes, chosen);
}
}
/* Only do this if it's not a promoted-with-promoted colocation. Doing
* this unconditionally would prevent unpromoted instances from being
* started.
*/
if ((constraint->dependent_role != RSC_ROLE_PROMOTED)
|| (constraint->primary_role != RSC_ROLE_PROMOTED)) {
if (constraint->score >= INFINITY) {
node_list_exclude(dependent->allowed_nodes, affected_nodes,
TRUE);
}
}
g_list_free(affected_nodes);
} else if (constraint->dependent_role == RSC_ROLE_PROMOTED) {
pe_resource_t *primary_instance;
primary_instance = find_compatible_child(dependent, primary,
constraint->primary_role,
FALSE, data_set);
if ((primary_instance == NULL) && (constraint->score >= INFINITY)) {
pe_rsc_trace(dependent, "%s can't be promoted %s",
dependent->id, constraint->id);
dependent->priority = -INFINITY;
} else if (primary_instance != NULL) {
int new_priority = pe__add_scores(dependent->priority,
constraint->score);
pe_rsc_debug(dependent, "Applying %s to %s",
constraint->id, dependent->id);
pe_rsc_debug(dependent, "\t%s: %d->%d",
dependent->id, dependent->priority, new_priority);
dependent->priority = new_priority;
}
}
return;
}

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 23, 3:52 AM (4 h, 18 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1010192
Default Alt Text
(179 KB)

Event Timeline