Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F3687308
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
236 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/include/crm/common/location_internal.h b/include/crm/common/location_internal.h
index 2d732d7fb5..5991feea98 100644
--- a/include/crm/common/location_internal.h
+++ b/include/crm/common/location_internal.h
@@ -1,36 +1,36 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_COMMON_LOCATION_INTERNAL__H
#define PCMK__CRM_COMMON_LOCATION_INTERNAL__H
#include <glib.h> // GList
-#include <crm/common/nodes.h> // enum pe_discover_e
+#include <crm/common/nodes_internal.h> // enum pcmk__probe_mode
#include <crm/common/resources.h> // enum rsc_role_e
#include <crm/common/scheduler_types.h> // pcmk_resource_t
#ifdef __cplusplus
extern "C" {
#endif
//! Location constraint object
typedef struct {
char *id; // XML ID of location constraint
pcmk_resource_t *rsc; // Resource with location preference
enum rsc_role_e role_filter; // Limit to instances with this role
- enum pe_discover_e probe_mode; // How to probe resource on node
+ enum pcmk__probe_mode probe_mode; // How to probe resource on node
GList *nodes; // Affected nodes, with preference score
} pcmk__location_t;
#ifdef __cplusplus
}
#endif
#endif // PCMK__CRM_COMMON_LOCATION_INTERNAL__H
diff --git a/include/crm/common/nodes.h b/include/crm/common/nodes.h
index d17ab83a87..4d9a8d93e5 100644
--- a/include/crm/common/nodes.h
+++ b/include/crm/common/nodes.h
@@ -1,119 +1,102 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_COMMON_NODES__H
#define PCMK__CRM_COMMON_NODES__H
#include <stdbool.h> // bool
#include <glib.h> // gboolean, GList, GHashTable
#include <crm/common/scheduler_types.h> // pcmk_resource_t, pcmk_scheduler_t
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \file
* \brief Scheduler API for nodes
* \ingroup core
*/
// Special node attributes
#define PCMK_NODE_ATTR_MAINTENANCE "maintenance"
#define PCMK_NODE_ATTR_STANDBY "standby"
#define PCMK_NODE_ATTR_TERMINATE "terminate"
-// When to probe a resource on a node (as specified in location constraints)
-// @COMPAT Make this internal when we can break API backward compatibility
-//!@{
-//! \deprecated Do not use (public access will be removed in a future release)
-enum pe_discover_e {
- pcmk_probe_always = 0, // Always probe resource on node
- pcmk_probe_never = 1, // Never probe resource on node
- pcmk_probe_exclusive = 2, // Probe only on designated nodes
-
-#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
- pe_discover_always = pcmk_probe_always,
- pe_discover_never = pcmk_probe_never,
- pe_discover_exclusive = pcmk_probe_exclusive,
-#endif
-};
-//!@}
-
//! \internal Do not use
typedef struct pcmk__node_private pcmk__node_private_t;
// Basic node information (all node objects for the same node share this)
// @COMPAT Drop this struct once all members are moved to pcmk__node_private_t
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
struct pe_node_shared_s {
/* @COMPAT Convert these gbooleans into new enum pcmk__node_flags values
* when we no longer support versions of sbd that use them
*/
// NOTE: sbd (as of at least 1.5.2) uses this
//! \deprecated Call pcmk_node_is_online() instead
gboolean online; // Whether online
// NOTE: sbd (as of at least 1.5.2) uses this
//! \deprecated Call pcmk_node_is_pending() instead
gboolean pending; // Whether controller membership is pending
// NOTE: sbd (as of at least 1.5.2) uses this
//! \deprecated Call !pcmk_node_is_clean() instead
gboolean unclean; // Whether node requires fencing
// NOTE: sbd (as of at least 1.5.2) uses this
//! \deprecated Call pcmk_node_is_shutting_down() instead
gboolean shutdown; // Whether shutting down
// NOTE: sbd (as of at least 1.5.2) uses this
//! \deprecated Call pcmk_node_is_in_maintenance() instead
gboolean maintenance; // Whether in maintenance mode
// NOTE: sbd (as of at least 1.5.2) uses this
// \deprecated Call pcmk_foreach_active_resource() instead
GList *running_rsc; // List of resources active on node
};
//!@}
// Implementation of pcmk_node_t
// @COMPAT Make contents internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
struct pcmk__scored_node {
struct pcmk__node_assignment *assign;
// NOTE: sbd (as of at least 1.5.2) uses this
struct pe_node_shared_s *details; // Basic node information
//! \internal Do not use
pcmk__node_private_t *private;
};
//!@}
bool pcmk_node_is_online(const pcmk_node_t *node);
bool pcmk_node_is_pending(const pcmk_node_t *node);
bool pcmk_node_is_clean(const pcmk_node_t *node);
bool pcmk_node_is_shutting_down(const pcmk_node_t *node);
bool pcmk_node_is_in_maintenance(const pcmk_node_t *node);
bool pcmk_foreach_active_resource(pcmk_node_t *node,
bool (*fn)(pcmk_resource_t *, void *),
void *user_data);
#ifdef __cplusplus
}
#endif
#endif // PCMK__CRM_COMMON_NODES__H
diff --git a/include/crm/common/nodes_internal.h b/include/crm/common/nodes_internal.h
index ea7b41f0f6..ada54b9b07 100644
--- a/include/crm/common/nodes_internal.h
+++ b/include/crm/common/nodes_internal.h
@@ -1,187 +1,194 @@
/*
* Copyright 2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_COMMON_NODES_INTERNAL__H
#define PCMK__CRM_COMMON_NODES_INTERNAL__H
#include <stdio.h> // NULL
#include <stdbool.h> // bool
#include <stdint.h> // uint32_t, UINT32_C()
#include <glib.h>
#include <crm/common/nodes.h>
/*
* Special node attributes
*/
#define PCMK__NODE_ATTR_SHUTDOWN "shutdown"
/* @COMPAT Deprecated since 2.1.8. Use a location constraint with
* PCMK_XA_RSC_PATTERN=".*" and PCMK_XA_RESOURCE_DISCOVERY="never" instead of
* PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED="false".
*/
#define PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED "resource-discovery-enabled"
enum pcmk__node_variant { // Possible node types
pcmk__node_variant_ping = 0, // deprecated
pcmk__node_variant_cluster = 1, // Cluster layer node
pcmk__node_variant_remote = 2, // Pacemaker Remote node
};
enum pcmk__node_flags {
pcmk__node_none = UINT32_C(0),
// Whether node is in standby mode
pcmk__node_standby = (UINT32_C(1) << 0),
// Whether node is in standby mode due to PCMK_META_ON_FAIL
pcmk__node_fail_standby = (UINT32_C(1) << 1),
// Whether node has ever joined cluster (and thus has node state in CIB)
pcmk__node_seen = (UINT32_C(1) << 2),
// Whether expected join state is member
pcmk__node_expected_up = (UINT32_C(1) << 3),
// Whether probes are allowed on node
pcmk__node_probes_allowed = (UINT32_C(1) << 4),
/* Whether this either is a guest node whose guest resource must be
* recovered or a remote node that must be fenced
*/
pcmk__node_remote_reset = (UINT32_C(1) << 5),
/* Whether this is a Pacemaker Remote node that was fenced since it was last
* connected by the cluster
*/
pcmk__node_remote_fenced = (UINT32_C(1) << 6),
/*
* Whether this is a Pacemaker Remote node previously marked in its
* node state as being in maintenance mode
*/
pcmk__node_remote_maint = (UINT32_C(1) << 7),
// Whether node history has been unpacked
pcmk__node_unpacked = (UINT32_C(1) << 8),
};
+// When to probe a resource on a node (as specified in location constraints)
+enum pcmk__probe_mode {
+ pcmk__probe_always = 0, // Always probe resource on node
+ pcmk__probe_never = 1, // Never probe resource on node
+ pcmk__probe_exclusive = 2, // Probe only on designated nodes
+};
+
/* Per-node data used in resource assignment
*
* @COMPAT When we can make the pcmk_node_t implementation internal, move these
* there and drop this struct.
*/
struct pcmk__node_assignment {
int score; // Node's score for relevant resource
int count; // Counter reused by assignment and promotion code
- enum pe_discover_e probe_mode; // When to probe resource on this node
+ enum pcmk__probe_mode probe_mode; // When to probe resource on this node
};
/* Implementation of pcmk__node_private_t (pcmk_node_t objects are shallow
* copies, so all pcmk_node_t objects for the same node will share the same
* private data)
*/
typedef struct pcmk__node_private {
/* Node's XML ID in the CIB (the cluster layer ID for cluster nodes,
* the node name for Pacemaker Remote nodes)
*/
const char *id;
/*
* Sum of priorities of all resources active on node and on any guest nodes
* connected to this node, with +1 for promoted instances (used to compare
* nodes for PCMK_OPT_PRIORITY_FENCING_DELAY)
*/
int priority;
const char *name; // Node name in cluster
enum pcmk__node_variant variant; // Node variant
uint32_t flags; // Group of enum pcmk__node_flags
GHashTable *attrs; // Node attributes
GHashTable *utilization; // Node utilization attributes
int num_resources; // Number of active resources on node
GList *assigned_resources; // List of resources assigned to node
GHashTable *digest_cache; // Cache of calculated resource digests
pcmk_resource_t *remote; // Pacemaker Remote connection (if any)
pcmk_scheduler_t *scheduler; // Scheduler data that node is part of
} pcmk__node_private_t;
pcmk_node_t *pcmk__find_node_in_list(const GList *nodes, const char *node_name);
/*!
* \internal
* \brief Set node flags
*
* \param[in,out] node Node to set flags for
* \param[in] flags_to_set Group of enum pcmk_node_flags to set
*/
#define pcmk__set_node_flags(node, flags_to_set) do { \
(node)->private->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Node", pcmk__node_name(node), \
(node)->private->flags, (flags_to_set), #flags_to_set); \
} while (0)
/*!
* \internal
* \brief Clear node flags
*
* \param[in,out] node Node to clear flags for
* \param[in] flags_to_clear Group of enum pcmk_node_flags to clear
*/
#define pcmk__clear_node_flags(node, flags_to_clear) do { \
(node)->private->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Node", pcmk__node_name(node), \
(node)->private->flags, (flags_to_clear), #flags_to_clear); \
} while (0)
/*!
* \internal
* \brief Return a string suitable for logging as a node name
*
* \param[in] node Node to return a node name string for
*
* \return Node name if available, otherwise node ID if available,
* otherwise "unspecified node" if node is NULL or "unidentified node"
* if node has neither a name nor ID.
*/
static inline const char *
pcmk__node_name(const pcmk_node_t *node)
{
if (node == NULL) {
return "unspecified node";
} else if (node->private->name != NULL) {
return node->private->name;
} else if (node->private->id != NULL) {
return node->private->id;
} else {
return "unidentified node";
}
}
/*!
* \internal
* \brief Check whether two node objects refer to the same node
*
* \param[in] node1 First node object to compare
* \param[in] node2 Second node object to compare
*
* \return true if \p node1 and \p node2 refer to the same node
*/
static inline bool
pcmk__same_node(const pcmk_node_t *node1, const pcmk_node_t *node2)
{
return (node1 != NULL) && (node2 != NULL)
&& (node1->private == node2->private);
}
#endif // PCMK__CRM_COMMON_NODES_INTERNAL__H
diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c
index 694288508b..f8ba3abeac 100644
--- a/lib/pacemaker/pcmk_sched_clone.c
+++ b/lib/pacemaker/pcmk_sched_clone.c
@@ -1,739 +1,739 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/common/xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Assign a clone resource's instances to nodes
*
* \param[in,out] rsc Clone resource to assign
* \param[in] prefer Node to prefer, if all else is equal
* \param[in] stop_if_fail If \c true and a primitive descendant of \p rsc
* can't be assigned to a node, set the
* descendant's next role to stopped and update
* existing actions
*
* \return NULL (clones are not assigned to a single node)
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
pcmk_node_t *
pcmk__clone_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
bool stop_if_fail)
{
GList *colocations = NULL;
CRM_ASSERT(pcmk__is_clone(rsc));
if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) {
return NULL; // Assignment has already been done
}
// Detect assignment loops
if (pcmk_is_set(rsc->flags, pcmk__rsc_assigning)) {
pcmk__rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
return NULL;
}
pcmk__set_rsc_flags(rsc, pcmk__rsc_assigning);
// If this clone is promotable, consider nodes' promotion scores
if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) {
pcmk__add_promotion_scores(rsc);
}
// If this clone is colocated with any other resources, assign those first
colocations = pcmk__this_with_colocations(rsc);
for (GList *iter = colocations; iter != NULL; iter = iter->next) {
pcmk__colocation_t *constraint = (pcmk__colocation_t *) iter->data;
pcmk_resource_t *primary = constraint->primary;
pcmk__rsc_trace(rsc, "%s: Assigning colocation %s primary %s first",
rsc->id, constraint->id, primary->id);
primary->private->cmds->assign(primary, prefer, stop_if_fail);
}
g_list_free(colocations);
// If any resources are colocated with this one, consider their preferences
colocations = pcmk__with_this_colocations(rsc);
g_list_foreach(colocations, pcmk__add_dependent_scores, rsc);
g_list_free(colocations);
pe__show_node_scores(!pcmk_is_set(rsc->private->scheduler->flags,
pcmk_sched_output_scores),
rsc, __func__, rsc->private->allowed_nodes,
rsc->private->scheduler);
rsc->private->children = g_list_sort(rsc->private->children,
pcmk__cmp_instance);
pcmk__assign_instances(rsc, rsc->private->children, pe__clone_max(rsc),
pe__clone_node_max(rsc));
if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) {
pcmk__set_instance_roles(rsc);
}
pcmk__clear_rsc_flags(rsc, pcmk__rsc_unassigned|pcmk__rsc_assigning);
pcmk__rsc_trace(rsc, "Assigned clone %s", rsc->id);
return NULL;
}
/*!
* \internal
* \brief Create all actions needed for a given clone resource
*
* \param[in,out] rsc Clone resource to create actions for
*/
void
pcmk__clone_create_actions(pcmk_resource_t *rsc)
{
CRM_ASSERT(pcmk__is_clone(rsc));
pcmk__rsc_trace(rsc, "Creating actions for clone %s", rsc->id);
pcmk__create_instance_actions(rsc, rsc->private->children);
if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) {
pcmk__create_promotable_actions(rsc);
}
}
/*!
* \internal
* \brief Create implicit constraints needed for a clone resource
*
* \param[in,out] rsc Clone resource to create implicit constraints for
*/
void
pcmk__clone_internal_constraints(pcmk_resource_t *rsc)
{
bool ordered = false;
CRM_ASSERT(pcmk__is_clone(rsc));
pcmk__rsc_trace(rsc, "Creating internal constraints for clone %s", rsc->id);
// Restart ordering: Stop -> stopped -> start -> started
pcmk__order_resource_actions(rsc, PCMK_ACTION_STOPPED,
rsc, PCMK_ACTION_START,
pcmk__ar_ordered);
pcmk__order_resource_actions(rsc, PCMK_ACTION_START,
rsc, PCMK_ACTION_RUNNING,
pcmk__ar_unrunnable_first_blocks);
pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP,
rsc, PCMK_ACTION_STOPPED,
pcmk__ar_unrunnable_first_blocks);
// Demoted -> stop and started -> promote
if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) {
pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTED,
rsc, PCMK_ACTION_STOP,
pcmk__ar_ordered);
pcmk__order_resource_actions(rsc, PCMK_ACTION_RUNNING,
rsc, PCMK_ACTION_PROMOTE,
pcmk__ar_unrunnable_first_blocks);
}
ordered = pe__clone_is_ordered(rsc);
if (ordered) {
/* Ordered clone instances must start and stop by instance number. The
* instances might have been previously shuffled for assignment or
* promotion purposes, so re-sort them.
*/
rsc->private->children = g_list_sort(rsc->private->children,
pcmk__cmp_instance_number);
}
for (GList *iter = rsc->private->children;
iter != NULL; iter = iter->next) {
pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
instance->private->cmds->internal_constraints(instance);
// Start clone -> start instance -> clone started
pcmk__order_starts(rsc, instance, pcmk__ar_unrunnable_first_blocks
|pcmk__ar_then_implies_first_graphed);
pcmk__order_resource_actions(instance, PCMK_ACTION_START,
rsc, PCMK_ACTION_RUNNING,
pcmk__ar_first_implies_then_graphed);
// Stop clone -> stop instance -> clone stopped
pcmk__order_stops(rsc, instance, pcmk__ar_then_implies_first_graphed);
pcmk__order_resource_actions(instance, PCMK_ACTION_STOP,
rsc, PCMK_ACTION_STOPPED,
pcmk__ar_first_implies_then_graphed);
/* Instances of ordered clones must be started and stopped by instance
* number. Since only some instances may be starting or stopping, order
* each instance relative to every later instance.
*/
if (ordered) {
for (GList *later = iter->next;
later != NULL; later = later->next) {
pcmk__order_starts(instance, (pcmk_resource_t *) later->data,
pcmk__ar_ordered);
pcmk__order_stops((pcmk_resource_t *) later->data, instance,
pcmk__ar_ordered);
}
}
}
if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) {
pcmk__order_promotable_instances(rsc);
}
}
/*!
* \internal
* \brief Check whether colocated resources can be interleaved
*
* \param[in] colocation Colocation constraint with clone as primary
*
* \return true if colocated resources can be interleaved, otherwise false
*/
static bool
can_interleave(const pcmk__colocation_t *colocation)
{
const pcmk_resource_t *primary = colocation->primary;
const pcmk_resource_t *dependent = colocation->dependent;
// Only colocations between clone or bundle resources use interleaving
if (dependent->private->variant <= pcmk__rsc_variant_group) {
return false;
}
// Only the dependent needs to be marked for interleaving
if (!crm_is_true(g_hash_table_lookup(dependent->private->meta,
PCMK_META_INTERLEAVE))) {
return false;
}
/* @TODO Do we actually care about multiple primary instances sharing a
* dependent instance?
*/
if (dependent->private->fns->max_per_node(dependent)
!= primary->private->fns->max_per_node(primary)) {
pcmk__config_err("Cannot interleave %s and %s because they do not "
"support the same number of instances per node",
dependent->id, primary->id);
return false;
}
return true;
}
/*!
* \internal
* \brief Apply a colocation's score to node scores or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node scores (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__clone_apply_coloc_score(pcmk_resource_t *dependent,
const pcmk_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent)
{
const GList *iter = NULL;
/* This should never be called for the clone itself as a dependent. Instead,
* we add its colocation constraints to its instances and call the
* apply_coloc_score() method for the instances as dependents.
*/
CRM_ASSERT(!for_dependent);
CRM_ASSERT((colocation != NULL) && pcmk__is_clone(primary)
&& pcmk__is_primitive(dependent));
if (pcmk_is_set(primary->flags, pcmk__rsc_unassigned)) {
pcmk__rsc_trace(primary,
"Delaying processing colocation %s "
"because cloned primary %s is still provisional",
colocation->id, primary->id);
return;
}
pcmk__rsc_trace(primary, "Processing colocation %s (%s with clone %s @%s)",
colocation->id, dependent->id, primary->id,
pcmk_readable_score(colocation->score));
// Apply role-specific colocations
if (pcmk_is_set(primary->flags, pcmk__rsc_promotable)
&& (colocation->primary_role != pcmk_role_unknown)) {
if (pcmk_is_set(dependent->flags, pcmk__rsc_unassigned)) {
// We're assigning the dependent to a node
pcmk__update_dependent_with_promotable(primary, dependent,
colocation);
return;
}
if (colocation->dependent_role == pcmk_role_promoted) {
// We're choosing a role for the dependent
pcmk__update_promotable_dependent_priority(primary, dependent,
colocation);
return;
}
}
// Apply interleaved colocations
if (can_interleave(colocation)) {
const pcmk_resource_t *primary_instance = NULL;
primary_instance = pcmk__find_compatible_instance(dependent, primary,
pcmk_role_unknown,
false);
if (primary_instance != NULL) {
pcmk__rsc_debug(primary, "Interleaving %s with %s",
dependent->id, primary_instance->id);
dependent->private->cmds->apply_coloc_score(dependent,
primary_instance,
colocation, true);
} else if (colocation->score >= PCMK_SCORE_INFINITY) {
crm_notice("%s cannot run because it cannot interleave with "
"any instance of %s", dependent->id, primary->id);
pcmk__assign_resource(dependent, NULL, true, true);
} else {
pcmk__rsc_debug(primary,
"%s will not colocate with %s "
"because no instance can interleave with it",
dependent->id, primary->id);
}
return;
}
// Apply mandatory colocations
if (colocation->score >= PCMK_SCORE_INFINITY) {
GList *primary_nodes = NULL;
// Dependent can run only where primary will have unblocked instances
for (iter = primary->private->children;
iter != NULL; iter = iter->next) {
const pcmk_resource_t *instance = iter->data;
pcmk_node_t *chosen = NULL;
chosen = instance->private->fns->location(instance, NULL, 0);
if ((chosen != NULL)
&& !is_set_recursive(instance, pcmk__rsc_blocked, TRUE)) {
pcmk__rsc_trace(primary, "Allowing %s: %s %d",
colocation->id, pcmk__node_name(chosen),
chosen->assign->score);
primary_nodes = g_list_prepend(primary_nodes, chosen);
}
}
pcmk__colocation_intersect_nodes(dependent, primary, colocation,
primary_nodes, false);
g_list_free(primary_nodes);
return;
}
// Apply optional colocations
for (iter = primary->private->children; iter != NULL; iter = iter->next) {
const pcmk_resource_t *instance = iter->data;
instance->private->cmds->apply_coloc_score(dependent, instance,
colocation, false);
}
}
// Clone implementation of pcmk__assignment_methods_t:with_this_colocations()
void
pcmk__with_clone_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc, GList **list)
{
const pcmk_resource_t *parent = NULL;
CRM_CHECK((rsc != NULL) && (orig_rsc != NULL) && (list != NULL), return);
parent = rsc->private->parent;
pcmk__add_with_this_list(list, rsc->private->with_this_colocations,
orig_rsc);
if (parent != NULL) {
parent->private->cmds->with_this_colocations(parent, orig_rsc, list);
}
}
// Clone implementation of pcmk__assignment_methods_t:this_with_colocations()
void
pcmk__clone_with_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc, GList **list)
{
const pcmk_resource_t *parent = NULL;
CRM_CHECK((rsc != NULL) && (orig_rsc != NULL) && (list != NULL), return);
parent = rsc->private->parent;
pcmk__add_this_with_list(list, rsc->private->this_with_colocations,
orig_rsc);
if (parent != NULL) {
parent->private->cmds->this_with_colocations(parent, orig_rsc, list);
}
}
/*!
* \internal
* \brief Return action flags for a given clone resource action
*
* \param[in,out] action Action to get flags for
* \param[in] node If not NULL, limit effects to this node
*
* \return Flags appropriate to \p action on \p node
*/
uint32_t
pcmk__clone_action_flags(pcmk_action_t *action, const pcmk_node_t *node)
{
CRM_ASSERT((action != NULL) && pcmk__is_clone(action->rsc));
return pcmk__collective_action_flags(action, action->rsc->private->children,
node);
}
/*!
* \internal
* \brief Apply a location constraint to a clone resource's allowed node scores
*
* \param[in,out] rsc Clone resource to apply constraint to
* \param[in,out] location Location constraint to apply
*/
void
pcmk__clone_apply_location(pcmk_resource_t *rsc, pcmk__location_t *location)
{
CRM_CHECK((location != NULL) && pcmk__is_clone(rsc), return);
pcmk__apply_location(rsc, location);
for (GList *iter = rsc->private->children;
iter != NULL; iter = iter->next) {
pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
instance->private->cmds->apply_location(instance, location);
}
}
// GFunc wrapper for calling the action_flags() resource method
static void
call_action_flags(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = user_data;
rsc->private->cmds->action_flags((pcmk_action_t *) data, NULL);
}
/*!
* \internal
* \brief Add a clone resource's actions to the transition graph
*
* \param[in,out] rsc Resource whose actions should be added
*/
void
pcmk__clone_add_actions_to_graph(pcmk_resource_t *rsc)
{
CRM_ASSERT(pcmk__is_clone(rsc));
g_list_foreach(rsc->private->actions, call_action_flags, rsc);
pe__create_clone_notifications(rsc);
for (GList *iter = rsc->private->children;
iter != NULL; iter = iter->next) {
pcmk_resource_t *child_rsc = (pcmk_resource_t *) iter->data;
child_rsc->private->cmds->add_actions_to_graph(child_rsc);
}
pcmk__add_rsc_actions_to_graph(rsc);
pe__free_clone_notification_data(rsc);
}
/*!
* \internal
* \brief Check whether a resource or any children have been probed on a node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p node is in the probed_nodes table of \p rsc or any of its
* children, otherwise false
*/
static bool
rsc_probed_on(const pcmk_resource_t *rsc, const pcmk_node_t *node)
{
if (rsc->private->children != NULL) {
for (GList *child_iter = rsc->private->children;
child_iter != NULL; child_iter = child_iter->next) {
pcmk_resource_t *child = (pcmk_resource_t *) child_iter->data;
if (rsc_probed_on(child, node)) {
return true;
}
}
return false;
}
if (rsc->private->probed_nodes != NULL) {
GHashTableIter iter;
pcmk_node_t *known_node = NULL;
g_hash_table_iter_init(&iter, rsc->private->probed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) {
if (pcmk__same_node(node, known_node)) {
return true;
}
}
}
return false;
}
/*!
* \internal
* \brief Find clone instance that has been probed on given node
*
* \param[in] clone Clone resource to check
* \param[in] node Node to check
*
* \return Instance of \p clone that has been probed on \p node if any,
* otherwise NULL
*/
static pcmk_resource_t *
find_probed_instance_on(const pcmk_resource_t *clone, const pcmk_node_t *node)
{
for (GList *iter = clone->private->children;
iter != NULL; iter = iter->next) {
pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
if (rsc_probed_on(instance, node)) {
return instance;
}
}
return NULL;
}
/*!
* \internal
* \brief Probe an anonymous clone on a node
*
* \param[in,out] clone Anonymous clone to probe
* \param[in,out] node Node to probe \p clone on
*/
static bool
probe_anonymous_clone(pcmk_resource_t *clone, pcmk_node_t *node)
{
// Check whether we already probed an instance on this node
pcmk_resource_t *child = find_probed_instance_on(clone, node);
// Otherwise, check if we plan to start an instance on this node
for (GList *iter = clone->private->children;
(iter != NULL) && (child == NULL); iter = iter->next) {
pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
const pcmk_node_t *instance_node = NULL;
instance_node = instance->private->fns->location(instance, NULL, 0);
if (pcmk__same_node(instance_node, node)) {
child = instance;
}
}
// Otherwise, use the first clone instance
if (child == NULL) {
child = clone->private->children->data;
}
// Anonymous clones only need to probe a single instance
return child->private->cmds->create_probe(child, node);
}
/*!
* \internal
* \brief Schedule any probes needed for a resource on a node
*
* \param[in,out] rsc Resource to create probe for
* \param[in,out] node Node to create probe on
*
* \return true if any probe was created, otherwise false
*/
bool
pcmk__clone_create_probe(pcmk_resource_t *rsc, pcmk_node_t *node)
{
CRM_ASSERT((node != NULL) && pcmk__is_clone(rsc));
if (pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes)) {
/* The clone is configured to be probed only where a location constraint
* exists with PCMK_XA_RESOURCE_DISCOVERY set to exclusive.
*
* This check is not strictly necessary here since the instance's
* create_probe() method would also check, but doing it here is more
* efficient (especially for unique clones with a large number of
* instances), and affects the CRM_meta_notify_available_uname variable
* passed with notify actions.
*/
pcmk_node_t *allowed = g_hash_table_lookup(rsc->private->allowed_nodes,
node->private->id);
if ((allowed == NULL)
- || (allowed->assign->probe_mode != pcmk_probe_exclusive)) {
+ || (allowed->assign->probe_mode != pcmk__probe_exclusive)) {
/* This node is not marked for resource discovery. Remove it from
* allowed nodes so that notifications contain only nodes that the
* clone can possibly run on.
*/
pcmk__rsc_trace(rsc,
"Skipping probe for %s on %s because resource has "
"exclusive discovery but is not allowed on node",
rsc->id, pcmk__node_name(node));
g_hash_table_remove(rsc->private->allowed_nodes, node->private->id);
return false;
}
}
rsc->private->children = g_list_sort(rsc->private->children,
pcmk__cmp_instance_number);
if (pcmk_is_set(rsc->flags, pcmk__rsc_unique)) {
return pcmk__probe_resource_list(rsc->private->children, node);
} else {
return probe_anonymous_clone(rsc, node);
}
}
/*!
* \internal
* \brief Add meta-attributes relevant to transition graph actions to XML
*
* Add clone-specific meta-attributes needed for transition graph actions.
*
* \param[in] rsc Clone resource whose meta-attributes should be added
* \param[in,out] xml Transition graph action attributes XML to add to
*/
void
pcmk__clone_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml)
{
char *name = NULL;
CRM_ASSERT(pcmk__is_clone(rsc) && (xml != NULL));
name = crm_meta_name(PCMK_META_GLOBALLY_UNIQUE);
crm_xml_add(xml, name, pcmk__flag_text(rsc->flags, pcmk__rsc_unique));
free(name);
name = crm_meta_name(PCMK_META_NOTIFY);
crm_xml_add(xml, name, pcmk__flag_text(rsc->flags, pcmk__rsc_notify));
free(name);
name = crm_meta_name(PCMK_META_CLONE_MAX);
crm_xml_add_int(xml, name, pe__clone_max(rsc));
free(name);
name = crm_meta_name(PCMK_META_CLONE_NODE_MAX);
crm_xml_add_int(xml, name, pe__clone_node_max(rsc));
free(name);
if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) {
int promoted_max = pe__clone_promoted_max(rsc);
int promoted_node_max = pe__clone_promoted_node_max(rsc);
name = crm_meta_name(PCMK_META_PROMOTED_MAX);
crm_xml_add_int(xml, name, promoted_max);
free(name);
name = crm_meta_name(PCMK_META_PROMOTED_NODE_MAX);
crm_xml_add_int(xml, name, promoted_node_max);
free(name);
/* @COMPAT Maintain backward compatibility with resource agents that
* expect the old names (deprecated since 2.0.0).
*/
name = crm_meta_name(PCMK__META_PROMOTED_MAX_LEGACY);
crm_xml_add_int(xml, name, promoted_max);
free(name);
name = crm_meta_name(PCMK__META_PROMOTED_NODE_MAX_LEGACY);
crm_xml_add_int(xml, name, promoted_node_max);
free(name);
}
}
// Clone implementation of pcmk__assignment_methods_t:add_utilization()
void
pcmk__clone_add_utilization(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization)
{
bool existing = false;
pcmk_resource_t *child = NULL;
CRM_ASSERT(pcmk__is_clone(rsc) && (orig_rsc != NULL)
&& (utilization != NULL));
if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) {
return;
}
// Look for any child already existing in the list
for (GList *iter = rsc->private->children;
iter != NULL; iter = iter->next) {
child = (pcmk_resource_t *) iter->data;
if (g_list_find(all_rscs, child)) {
existing = true; // Keep checking remaining children
} else {
// If this is a clone of a group, look for group's members
for (GList *member_iter = child->private->children;
member_iter != NULL; member_iter = member_iter->next) {
pcmk_resource_t *member = (pcmk_resource_t *) member_iter->data;
if (g_list_find(all_rscs, member) != NULL) {
// Add *child's* utilization, not group member's
child->private->cmds->add_utilization(child, orig_rsc,
all_rscs,
utilization);
existing = true;
break;
}
}
}
}
if (!existing && (rsc->private->children != NULL)) {
// If nothing was found, still add first child's utilization
child = (pcmk_resource_t *) rsc->private->children->data;
child->private->cmds->add_utilization(child, orig_rsc, all_rscs,
utilization);
}
}
// Clone implementation of pcmk__assignment_methods_t:shutdown_lock()
void
pcmk__clone_shutdown_lock(pcmk_resource_t *rsc)
{
CRM_ASSERT(pcmk__is_clone(rsc));
return; // Clones currently don't support shutdown locks
}
diff --git a/lib/pacemaker/pcmk_sched_location.c b/lib/pacemaker/pcmk_sched_location.c
index de47be50a9..c37aecc1c3 100644
--- a/lib/pacemaker/pcmk_sched_location.c
+++ b/lib/pacemaker/pcmk_sched_location.c
@@ -1,732 +1,732 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <glib.h>
#include <crm/crm.h>
#include <crm/common/rules_internal.h>
#include <crm/pengine/status.h>
#include <crm/pengine/rules.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
static int
get_node_score(const char *rule, const char *score, bool raw,
pcmk_node_t *node, pcmk_resource_t *rsc)
{
int score_f = 0;
if (score == NULL) {
pcmk__config_warn("Rule %s: no score specified (assuming 0)", rule);
} else if (raw) {
score_f = char2score(score);
} else {
const char *target = NULL;
const char *attr_score = NULL;
target = g_hash_table_lookup(rsc->private->meta,
PCMK_META_CONTAINER_ATTRIBUTE_TARGET);
attr_score = pcmk__node_attr(node, score, target,
pcmk__rsc_node_current);
if (attr_score == NULL) {
crm_debug("Rule %s: %s did not have a value for %s",
rule, pcmk__node_name(node), score);
score_f = -PCMK_SCORE_INFINITY;
} else {
crm_debug("Rule %s: %s had value %s for %s",
rule, pcmk__node_name(node), attr_score, score);
score_f = char2score(attr_score);
}
}
return score_f;
}
/*!
* \internal
* \brief Parse a role configuration for a location constraint
*
* \param[in] role_spec Role specification
* \param[out] role Where to store parsed role
*
* \return true if role specification is valid, otherwise false
*/
static bool
parse_location_role(const char *role_spec, enum rsc_role_e *role)
{
if (role_spec == NULL) {
*role = pcmk_role_unknown;
return true;
}
*role = pcmk_parse_role(role_spec);
switch (*role) {
case pcmk_role_unknown:
return false;
case pcmk_role_started:
case pcmk_role_unpromoted:
/* Any promotable clone instance cannot be promoted without being in
* the unpromoted role first. Therefore, any constraint for the
* started or unpromoted role applies to every role.
*/
*role = pcmk_role_unknown;
break;
default:
break;
}
return true;
}
/*!
* \internal
* \brief Generate a location constraint from a rule
*
* \param[in,out] rsc Resource that constraint is for
* \param[in] rule_xml Rule XML (sub-element of location constraint)
* \param[in] discovery Value of \c PCMK_XA_RESOURCE_DISCOVERY for
* constraint
* \param[out] next_change Where to set when rule evaluation will change
* \param[in,out] rule_input Values used to evaluate rule criteria
* (node-specific values will be overwritten by
* this function)
*
* \return true if rule is valid, otherwise false
*/
static bool
generate_location_rule(pcmk_resource_t *rsc, xmlNode *rule_xml,
const char *discovery, crm_time_t *next_change,
pcmk_rule_input_t *rule_input)
{
const char *rule_id = NULL;
const char *score = NULL;
const char *boolean = NULL;
const char *role_spec = NULL;
GList *iter = NULL;
bool raw_score = true;
bool score_allocated = false;
pcmk__location_t *location_rule = NULL;
enum rsc_role_e role = pcmk_role_unknown;
enum pcmk__combine combine = pcmk__combine_unknown;
rule_xml = pcmk__xe_resolve_idref(rule_xml, rsc->private->scheduler->input);
if (rule_xml == NULL) {
return false; // Error already logged
}
rule_id = crm_element_value(rule_xml, PCMK_XA_ID);
if (rule_id == NULL) {
pcmk__config_err("Ignoring " PCMK_XE_RULE " without " PCMK_XA_ID
" in location constraint");
return false;
}
boolean = crm_element_value(rule_xml, PCMK_XA_BOOLEAN_OP);
role_spec = crm_element_value(rule_xml, PCMK_XA_ROLE);
if (parse_location_role(role_spec, &role)) {
crm_trace("Setting rule %s role filter to %s", rule_id, role_spec);
} else {
pcmk__config_err("Ignoring rule %s: Invalid " PCMK_XA_ROLE " '%s'",
rule_id, role_spec);
return false;
}
crm_trace("Processing location constraint rule %s", rule_id);
score = crm_element_value(rule_xml, PCMK_XA_SCORE);
if (score == NULL) {
score = crm_element_value(rule_xml, PCMK_XA_SCORE_ATTRIBUTE);
if (score != NULL) {
raw_score = false;
}
}
combine = pcmk__parse_combine(boolean);
switch (combine) {
case pcmk__combine_and:
case pcmk__combine_or:
break;
default:
/* @COMPAT When we can break behavioral backward compatibility,
* return false
*/
pcmk__config_warn("Location constraint rule %s has invalid "
PCMK_XA_BOOLEAN_OP " value '%s', using default "
"'" PCMK_VALUE_AND "'",
rule_id, boolean);
combine = pcmk__combine_and;
break;
}
location_rule = pcmk__new_location(rule_id, rsc, 0, discovery, NULL);
CRM_CHECK(location_rule != NULL, return NULL);
location_rule->role_filter = role;
if ((rule_input->rsc_id != NULL) && (rule_input->rsc_id_nmatches > 0)
&& !raw_score) {
char *result = pcmk__replace_submatches(score, rule_input->rsc_id,
rule_input->rsc_id_submatches,
rule_input->rsc_id_nmatches);
if (result != NULL) {
score = result;
score_allocated = true;
}
}
for (iter = rsc->private->scheduler->nodes;
iter != NULL; iter = iter->next) {
pcmk_node_t *node = iter->data;
rule_input->node_attrs = node->private->attrs;
rule_input->rsc_params = pe_rsc_params(rsc, node,
rsc->private->scheduler);
if (pcmk_evaluate_rule(rule_xml, rule_input,
next_change) == pcmk_rc_ok) {
pcmk_node_t *local = pe__copy_node(node);
location_rule->nodes = g_list_prepend(location_rule->nodes, local);
local->assign->score = get_node_score(rule_id, score, raw_score,
node, rsc);
crm_trace("%s has score %s after %s", pcmk__node_name(node),
pcmk_readable_score(local->assign->score), rule_id);
}
}
if (score_allocated) {
free((char *)score);
}
if (location_rule->nodes == NULL) {
crm_trace("No matching nodes for location constraint rule %s", rule_id);
} else {
crm_trace("Location constraint rule %s matched %d nodes",
rule_id, g_list_length(location_rule->nodes));
}
return true;
}
static void
unpack_rsc_location(xmlNode *xml_obj, pcmk_resource_t *rsc,
const char *role_spec, const char *score,
char *rsc_id_match, int rsc_id_nmatches,
regmatch_t *rsc_id_submatches)
{
const char *rsc_id = crm_element_value(xml_obj, PCMK_XA_RSC);
const char *id = crm_element_value(xml_obj, PCMK_XA_ID);
const char *node = crm_element_value(xml_obj, PCMK_XA_NODE);
const char *discovery = crm_element_value(xml_obj,
PCMK_XA_RESOURCE_DISCOVERY);
if (rsc == NULL) {
pcmk__config_warn("Ignoring constraint '%s' because resource '%s' "
"does not exist", id, rsc_id);
return;
}
if (score == NULL) {
score = crm_element_value(xml_obj, PCMK_XA_SCORE);
}
if ((node != NULL) && (score != NULL)) {
int score_i = char2score(score);
pcmk_node_t *match = pcmk_find_node(rsc->private->scheduler, node);
enum rsc_role_e role = pcmk_role_unknown;
pcmk__location_t *location = NULL;
if (match == NULL) {
crm_info("Ignoring location constraint %s "
"because '%s' is not a known node",
pcmk__s(id, "without ID"), node);
return;
}
if (role_spec == NULL) {
role_spec = crm_element_value(xml_obj, PCMK_XA_ROLE);
}
if (parse_location_role(role_spec, &role)) {
crm_trace("Setting location constraint %s role filter: %s",
id, role_spec);
} else {
/* @COMPAT The previous behavior of creating the constraint ignoring
* the role is retained for now, but we should ignore the entire
* constraint when we can break backward compatibility.
*/
pcmk__config_err("Ignoring role in constraint %s: "
"Invalid value '%s'", id, role_spec);
}
location = pcmk__new_location(id, rsc, score_i, discovery, match);
if (location == NULL) {
return; // Error already logged
}
location->role_filter = role;
} else {
bool empty = true;
crm_time_t *next_change = crm_time_new_undefined();
pcmk_rule_input_t rule_input = {
.now = rsc->private->scheduler->now,
.rsc_meta = rsc->private->meta,
.rsc_id = rsc_id_match,
.rsc_id_submatches = rsc_id_submatches,
.rsc_id_nmatches = rsc_id_nmatches,
};
/* This loop is logically parallel to pcmk__evaluate_rules(), except
* instead of checking whether any rule is active, we set up location
* constraints for each active rule.
*
* @COMPAT When we can break backward compatibility, limit location
* constraints to a single rule, for consistency with other contexts.
* Since a rule may contain other rules, this does not prohibit any
* existing use cases.
*/
for (xmlNode *rule_xml = pcmk__xe_first_child(xml_obj, PCMK_XE_RULE,
NULL, NULL);
rule_xml != NULL; rule_xml = pcmk__xe_next_same(rule_xml)) {
if (generate_location_rule(rsc, rule_xml, discovery, next_change,
&rule_input)) {
if (empty) {
empty = false;
continue;
}
pcmk__warn_once(pcmk__wo_location_rules,
"Support for multiple " PCMK_XE_RULE
" elements in a location constraint is "
"deprecated and will be removed in a future "
"release (use a single new rule combining the "
"previous rules with " PCMK_XA_BOOLEAN_OP
" set to '" PCMK_VALUE_OR "' instead)");
}
}
if (empty) {
pcmk__config_err("Ignoring constraint '%s' because it contains "
"no valid rules", id);
}
/* If there is a point in the future when the evaluation of a rule will
* change, make sure the scheduler is re-run by that time.
*/
if (crm_time_is_defined(next_change)) {
time_t t = (time_t) crm_time_get_seconds_since_epoch(next_change);
pe__update_recheck_time(t, rsc->private->scheduler,
"location rule evaluation");
}
crm_time_free(next_change);
}
}
static void
unpack_simple_location(xmlNode *xml_obj, pcmk_scheduler_t *scheduler)
{
const char *id = crm_element_value(xml_obj, PCMK_XA_ID);
const char *value = crm_element_value(xml_obj, PCMK_XA_RSC);
if (value) {
pcmk_resource_t *rsc;
rsc = pcmk__find_constraint_resource(scheduler->resources, value);
unpack_rsc_location(xml_obj, rsc, NULL, NULL, NULL, 0, NULL);
}
value = crm_element_value(xml_obj, PCMK_XA_RSC_PATTERN);
if (value) {
regex_t regex;
bool invert = false;
if (value[0] == '!') {
value++;
invert = true;
}
if (regcomp(®ex, value, REG_EXTENDED) != 0) {
pcmk__config_err("Ignoring constraint '%s' because "
PCMK_XA_RSC_PATTERN
" has invalid value '%s'", id, value);
return;
}
for (GList *iter = scheduler->resources; iter != NULL;
iter = iter->next) {
pcmk_resource_t *r = iter->data;
int nregs = 0;
regmatch_t *pmatch = NULL;
int status;
if (regex.re_nsub > 0) {
nregs = regex.re_nsub + 1;
} else {
nregs = 1;
}
pmatch = pcmk__assert_alloc(nregs, sizeof(regmatch_t));
status = regexec(®ex, r->id, nregs, pmatch, 0);
if (!invert && (status == 0)) {
crm_debug("'%s' matched '%s' for %s", r->id, value, id);
unpack_rsc_location(xml_obj, r, NULL, NULL, r->id, nregs,
pmatch);
} else if (invert && (status != 0)) {
crm_debug("'%s' is an inverted match of '%s' for %s",
r->id, value, id);
unpack_rsc_location(xml_obj, r, NULL, NULL, NULL, 0, NULL);
} else {
crm_trace("'%s' does not match '%s' for %s", r->id, value, id);
}
free(pmatch);
}
regfree(®ex);
}
}
// \return Standard Pacemaker return code
static int
unpack_location_tags(xmlNode *xml_obj, xmlNode **expanded_xml,
pcmk_scheduler_t *scheduler)
{
const char *id = NULL;
const char *rsc_id = NULL;
const char *state = NULL;
pcmk_resource_t *rsc = NULL;
pcmk_tag_t *tag = NULL;
xmlNode *rsc_set = NULL;
*expanded_xml = NULL;
CRM_CHECK(xml_obj != NULL, return EINVAL);
id = pcmk__xe_id(xml_obj);
if (id == NULL) {
pcmk__config_err("Ignoring <%s> constraint without " PCMK_XA_ID,
xml_obj->name);
return pcmk_rc_unpack_error;
}
// Check whether there are any resource sets with template or tag references
*expanded_xml = pcmk__expand_tags_in_sets(xml_obj, scheduler);
if (*expanded_xml != NULL) {
crm_log_xml_trace(*expanded_xml, "Expanded " PCMK_XE_RSC_LOCATION);
return pcmk_rc_ok;
}
rsc_id = crm_element_value(xml_obj, PCMK_XA_RSC);
if (rsc_id == NULL) {
return pcmk_rc_ok;
}
if (!pcmk__valid_resource_or_tag(scheduler, rsc_id, &rsc, &tag)) {
pcmk__config_err("Ignoring constraint '%s' because '%s' is not a "
"valid resource or tag", id, rsc_id);
return pcmk_rc_unpack_error;
} else if (rsc != NULL) {
// No template is referenced
return pcmk_rc_ok;
}
state = crm_element_value(xml_obj, PCMK_XA_ROLE);
*expanded_xml = pcmk__xml_copy(NULL, xml_obj);
/* Convert any template or tag reference into constraint
* PCMK_XE_RESOURCE_SET
*/
if (!pcmk__tag_to_set(*expanded_xml, &rsc_set, PCMK_XA_RSC,
false, scheduler)) {
pcmk__xml_free(*expanded_xml);
*expanded_xml = NULL;
return pcmk_rc_unpack_error;
}
if (rsc_set != NULL) {
if (state != NULL) {
/* Move PCMK_XA_RSC_ROLE into converted PCMK_XE_RESOURCE_SET as
* PCMK_XA_ROLE attribute
*/
crm_xml_add(rsc_set, PCMK_XA_ROLE, state);
pcmk__xe_remove_attr(*expanded_xml, PCMK_XA_ROLE);
}
crm_log_xml_trace(*expanded_xml, "Expanded " PCMK_XE_RSC_LOCATION);
} else {
// No sets
pcmk__xml_free(*expanded_xml);
*expanded_xml = NULL;
}
return pcmk_rc_ok;
}
// \return Standard Pacemaker return code
static int
unpack_location_set(xmlNode *location, xmlNode *set,
pcmk_scheduler_t *scheduler)
{
xmlNode *xml_rsc = NULL;
pcmk_resource_t *resource = NULL;
const char *set_id;
const char *role;
const char *local_score;
CRM_CHECK(set != NULL, return EINVAL);
set_id = pcmk__xe_id(set);
if (set_id == NULL) {
pcmk__config_err("Ignoring " PCMK_XE_RESOURCE_SET " without "
PCMK_XA_ID " in constraint '%s'",
pcmk__s(pcmk__xe_id(location), "(missing ID)"));
return pcmk_rc_unpack_error;
}
role = crm_element_value(set, PCMK_XA_ROLE);
local_score = crm_element_value(set, PCMK_XA_SCORE);
for (xml_rsc = pcmk__xe_first_child(set, PCMK_XE_RESOURCE_REF, NULL, NULL);
xml_rsc != NULL; xml_rsc = pcmk__xe_next_same(xml_rsc)) {
resource = pcmk__find_constraint_resource(scheduler->resources,
pcmk__xe_id(xml_rsc));
if (resource == NULL) {
pcmk__config_err("%s: No resource found for %s",
set_id, pcmk__xe_id(xml_rsc));
return pcmk_rc_unpack_error;
}
unpack_rsc_location(location, resource, role, local_score, NULL, 0,
NULL);
}
return pcmk_rc_ok;
}
void
pcmk__unpack_location(xmlNode *xml_obj, pcmk_scheduler_t *scheduler)
{
xmlNode *set = NULL;
bool any_sets = false;
xmlNode *orig_xml = NULL;
xmlNode *expanded_xml = NULL;
if (unpack_location_tags(xml_obj, &expanded_xml, scheduler) != pcmk_rc_ok) {
return;
}
if (expanded_xml) {
orig_xml = xml_obj;
xml_obj = expanded_xml;
}
for (set = pcmk__xe_first_child(xml_obj, PCMK_XE_RESOURCE_SET, NULL, NULL);
set != NULL; set = pcmk__xe_next_same(set)) {
any_sets = true;
set = pcmk__xe_resolve_idref(set, scheduler->input);
if ((set == NULL) // Configuration error, message already logged
|| (unpack_location_set(xml_obj, set, scheduler) != pcmk_rc_ok)) {
if (expanded_xml) {
pcmk__xml_free(expanded_xml);
}
return;
}
}
if (expanded_xml) {
pcmk__xml_free(expanded_xml);
xml_obj = orig_xml;
}
if (!any_sets) {
unpack_simple_location(xml_obj, scheduler);
}
}
/*!
* \internal
* \brief Add a new location constraint to scheduler data
*
* \param[in] id XML ID of location constraint
* \param[in,out] rsc Resource in location constraint
* \param[in] node_score Constraint score
* \param[in] probe_mode When resource should be probed on node
* \param[in] node Node in constraint (or NULL if rule-based)
*
* \return Newly allocated location constraint on success, otherwise NULL
* \note The result will be added to the cluster (via \p rsc) and should not be
* freed separately.
*/
pcmk__location_t *
pcmk__new_location(const char *id, pcmk_resource_t *rsc,
int node_score, const char *probe_mode, pcmk_node_t *node)
{
pcmk__location_t *new_con = NULL;
CRM_CHECK((node != NULL) || (node_score == 0), return NULL);
if (id == NULL) {
pcmk__config_err("Invalid constraint: no ID specified");
return NULL;
}
if (rsc == NULL) {
pcmk__config_err("Invalid constraint %s: no resource specified", id);
return NULL;
}
new_con = pcmk__assert_alloc(1, sizeof(pcmk__location_t));
new_con->id = pcmk__str_copy(id);
new_con->rsc = rsc;
new_con->nodes = NULL;
new_con->role_filter = pcmk_role_unknown;
if (pcmk__str_eq(probe_mode, PCMK_VALUE_ALWAYS,
pcmk__str_null_matches|pcmk__str_casei)) {
- new_con->probe_mode = pcmk_probe_always;
+ new_con->probe_mode = pcmk__probe_always;
} else if (pcmk__str_eq(probe_mode, PCMK_VALUE_NEVER, pcmk__str_casei)) {
- new_con->probe_mode = pcmk_probe_never;
+ new_con->probe_mode = pcmk__probe_never;
} else if (pcmk__str_eq(probe_mode, PCMK_VALUE_EXCLUSIVE,
pcmk__str_casei)) {
- new_con->probe_mode = pcmk_probe_exclusive;
+ new_con->probe_mode = pcmk__probe_exclusive;
pcmk__set_rsc_flags(rsc, pcmk__rsc_exclusive_probes);
} else {
pcmk__config_err("Invalid " PCMK_XA_RESOURCE_DISCOVERY " value %s "
"in location constraint", probe_mode);
}
if (node != NULL) {
pcmk_node_t *copy = pe__copy_node(node);
copy->assign->score = node_score;
new_con->nodes = g_list_prepend(NULL, copy);
}
rsc->private->scheduler->placement_constraints =
g_list_prepend(rsc->private->scheduler->placement_constraints, new_con);
rsc->private->location_constraints =
g_list_prepend(rsc->private->location_constraints, new_con);
return new_con;
}
/*!
* \internal
* \brief Apply all location constraints
*
* \param[in,out] scheduler Scheduler data
*/
void
pcmk__apply_locations(pcmk_scheduler_t *scheduler)
{
for (GList *iter = scheduler->placement_constraints;
iter != NULL; iter = iter->next) {
pcmk__location_t *location = iter->data;
location->rsc->private->cmds->apply_location(location->rsc, location);
}
}
/*!
* \internal
* \brief Apply a location constraint to a resource's allowed node scores
*
* \param[in,out] rsc Resource to apply constraint to
* \param[in,out] location Location constraint to apply
*
* \note This does not consider the resource's children, so the resource's
* apply_location() method should be used instead in most cases.
*/
void
pcmk__apply_location(pcmk_resource_t *rsc, pcmk__location_t *location)
{
bool need_role = false;
CRM_ASSERT((rsc != NULL) && (location != NULL));
// If a role was specified, ensure constraint is applicable
need_role = (location->role_filter > pcmk_role_unknown);
if (need_role && (location->role_filter != rsc->private->next_role)) {
pcmk__rsc_trace(rsc,
"Not applying %s to %s because role will be %s not %s",
location->id, rsc->id,
pcmk_role_text(rsc->private->next_role),
pcmk_role_text(location->role_filter));
return;
}
if (location->nodes == NULL) {
pcmk__rsc_trace(rsc, "Not applying %s to %s because no nodes match",
location->id, rsc->id);
return;
}
pcmk__rsc_trace(rsc, "Applying %s%s%s to %s", location->id,
(need_role? " for role " : ""),
(need_role? pcmk_role_text(location->role_filter) : ""),
rsc->id);
for (GList *iter = location->nodes; iter != NULL; iter = iter->next) {
pcmk_node_t *node = iter->data;
pcmk_node_t *allowed_node = NULL;
allowed_node = g_hash_table_lookup(rsc->private->allowed_nodes,
node->private->id);
if (allowed_node == NULL) {
pcmk__rsc_trace(rsc, "* = %d on %s",
node->assign->score, pcmk__node_name(node));
allowed_node = pe__copy_node(node);
g_hash_table_insert(rsc->private->allowed_nodes,
(gpointer) allowed_node->private->id,
allowed_node);
} else {
pcmk__rsc_trace(rsc, "* + %d on %s",
node->assign->score, pcmk__node_name(node));
allowed_node->assign->score =
pcmk__add_scores(allowed_node->assign->score,
node->assign->score);
}
if (allowed_node->assign->probe_mode < location->probe_mode) {
- if (location->probe_mode == pcmk_probe_exclusive) {
+ if (location->probe_mode == pcmk__probe_exclusive) {
pcmk__set_rsc_flags(rsc, pcmk__rsc_exclusive_probes);
}
/* exclusive > never > always... always is default */
allowed_node->assign->probe_mode = location->probe_mode;
}
}
}
diff --git a/lib/pacemaker/pcmk_sched_probes.c b/lib/pacemaker/pcmk_sched_probes.c
index 2065d7b24e..600ea03bea 100644
--- a/lib/pacemaker/pcmk_sched_probes.c
+++ b/lib/pacemaker/pcmk_sched_probes.c
@@ -1,905 +1,905 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <crm/crm.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Add the expected result to a newly created probe
*
* \param[in,out] probe Probe action to add expected result to
* \param[in] rsc Resource that probe is for
* \param[in] node Node that probe will run on
*/
static void
add_expected_result(pcmk_action_t *probe, const pcmk_resource_t *rsc,
const pcmk_node_t *node)
{
// Check whether resource is currently active on node
pcmk_node_t *running = pe_find_node_id(rsc->private->active_nodes,
node->private->id);
// The expected result is what we think the resource's current state is
if (running == NULL) {
pe__add_action_expected_result(probe, CRM_EX_NOT_RUNNING);
} else if (rsc->private->orig_role == pcmk_role_promoted) {
pe__add_action_expected_result(probe, CRM_EX_PROMOTED);
}
}
/*!
* \internal
* \brief Create any needed robes on a node for a list of resources
*
* \param[in,out] rscs List of resources to create probes for
* \param[in,out] node Node to create probes on
*
* \return true if any probe was created, otherwise false
*/
bool
pcmk__probe_resource_list(GList *rscs, pcmk_node_t *node)
{
bool any_created = false;
for (GList *iter = rscs; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
if (rsc->private->cmds->create_probe(rsc, node)) {
any_created = true;
}
}
return any_created;
}
/*!
* \internal
* \brief Order one resource's start after another's start-up probe
*
* \param[in,out] rsc1 Resource that might get start-up probe
* \param[in] rsc2 Resource that might be started
*/
static void
probe_then_start(pcmk_resource_t *rsc1, pcmk_resource_t *rsc2)
{
const pcmk_node_t *rsc1_node = rsc1->private->assigned_node;
if ((rsc1_node != NULL)
&& (g_hash_table_lookup(rsc1->private->probed_nodes,
rsc1_node->private->id) == NULL)) {
pcmk__new_ordering(rsc1,
pcmk__op_key(rsc1->id, PCMK_ACTION_MONITOR, 0),
NULL,
rsc2, pcmk__op_key(rsc2->id, PCMK_ACTION_START, 0),
NULL,
pcmk__ar_ordered, rsc1->private->scheduler);
}
}
/*!
* \internal
* \brief Check whether a guest resource will stop
*
* \param[in] node Guest node to check
*
* \return true if guest resource will likely stop, otherwise false
*/
static bool
guest_resource_will_stop(const pcmk_node_t *node)
{
const pcmk_resource_t *guest_rsc = NULL;
const pcmk_node_t *guest_node = NULL;
guest_rsc = node->private->remote->private->launcher;
guest_node = guest_rsc->private->assigned_node;
/* Ideally, we'd check whether the guest has a required stop, but that
* information doesn't exist yet, so approximate it ...
*/
return pcmk_is_set(node->private->flags, pcmk__node_remote_reset)
|| node->details->unclean
|| pcmk_is_set(guest_rsc->flags, pcmk__rsc_failed)
|| (guest_rsc->private->next_role == pcmk_role_stopped)
// Guest is moving
|| ((guest_rsc->private->orig_role > pcmk_role_stopped)
&& (guest_node != NULL)
&& pcmk__find_node_in_list(guest_rsc->private->active_nodes,
guest_node->private->name) == NULL);
}
/*!
* \internal
* \brief Create a probe action for a resource on a node
*
* \param[in,out] rsc Resource to create probe for
* \param[in,out] node Node to create probe on
*
* \return Newly created probe action
*/
static pcmk_action_t *
probe_action(pcmk_resource_t *rsc, pcmk_node_t *node)
{
pcmk_action_t *probe = NULL;
char *key = pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0);
crm_debug("Scheduling probe of %s %s on %s",
pcmk_role_text(rsc->private->orig_role), rsc->id,
pcmk__node_name(node));
probe = custom_action(rsc, key, PCMK_ACTION_MONITOR, node, FALSE,
rsc->private->scheduler);
pcmk__clear_action_flags(probe, pcmk_action_optional);
pcmk__order_vs_unfence(rsc, node, probe, pcmk__ar_ordered);
add_expected_result(probe, rsc, node);
return probe;
}
/*!
* \internal
* \brief Create probes for a resource on a node, if needed
*
* \brief Schedule any probes needed for a resource on a node
*
* \param[in,out] rsc Resource to create probe for
* \param[in,out] node Node to create probe on
*
* \return true if any probe was created, otherwise false
*/
bool
pcmk__probe_rsc_on_node(pcmk_resource_t *rsc, pcmk_node_t *node)
{
uint32_t flags = pcmk__ar_ordered;
pcmk_action_t *probe = NULL;
pcmk_node_t *allowed = NULL;
pcmk_resource_t *top = uber_parent(rsc);
const char *reason = NULL;
CRM_ASSERT((rsc != NULL) && (node != NULL));
if (!pcmk_is_set(rsc->private->scheduler->flags,
pcmk_sched_probe_resources)) {
reason = "start-up probes are disabled";
goto no_probe;
}
if (pcmk__is_pacemaker_remote_node(node)) {
const char *class = crm_element_value(rsc->private->xml, PCMK_XA_CLASS);
if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) {
reason = "Pacemaker Remote nodes cannot run stonith agents";
goto no_probe;
} else if (pcmk__is_guest_or_bundle_node(node)
&& pe__resource_contains_guest_node(rsc->private->scheduler,
rsc)) {
reason = "guest nodes cannot run resources containing guest nodes";
goto no_probe;
} else if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
reason = "Pacemaker Remote nodes cannot host remote connections";
goto no_probe;
}
}
// If this is a collective resource, probes are created for its children
if (rsc->private->children != NULL) {
return pcmk__probe_resource_list(rsc->private->children, node);
}
if ((rsc->private->launcher != NULL)
&& !pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
reason = "resource is inside a container";
goto no_probe;
} else if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
reason = "resource is orphaned";
goto no_probe;
} else if (g_hash_table_lookup(rsc->private->probed_nodes,
node->private->id) != NULL) {
reason = "resource state is already known";
goto no_probe;
}
allowed = g_hash_table_lookup(rsc->private->allowed_nodes,
node->private->id);
if (pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes)
|| pcmk_is_set(top->flags, pcmk__rsc_exclusive_probes)) {
// Exclusive discovery is enabled ...
if (allowed == NULL) {
// ... but this node is not allowed to run the resource
reason = "resource has exclusive discovery but is not allowed "
"on node";
goto no_probe;
- } else if (allowed->assign->probe_mode != pcmk_probe_exclusive) {
+ } else if (allowed->assign->probe_mode != pcmk__probe_exclusive) {
// ... but no constraint marks this node for discovery of resource
reason = "resource has exclusive discovery but is not enabled "
"on node";
goto no_probe;
}
}
if (allowed == NULL) {
allowed = node;
}
- if (allowed->assign->probe_mode == pcmk_probe_never) {
+ if (allowed->assign->probe_mode == pcmk__probe_never) {
reason = "node has discovery disabled";
goto no_probe;
}
if (pcmk__is_guest_or_bundle_node(node)) {
pcmk_resource_t *guest = node->private->remote->private->launcher;
if (guest->private->orig_role == pcmk_role_stopped) {
// The guest is stopped, so we know no resource is active there
reason = "node's guest is stopped";
probe_then_start(guest, top);
goto no_probe;
} else if (guest_resource_will_stop(node)) {
reason = "node's guest will stop";
// Order resource start after guest stop (in case it's restarting)
pcmk__new_ordering(guest,
pcmk__op_key(guest->id, PCMK_ACTION_STOP, 0),
NULL, top,
pcmk__op_key(top->id, PCMK_ACTION_START, 0),
NULL, pcmk__ar_ordered, rsc->private->scheduler);
goto no_probe;
}
}
// We've eliminated all cases where a probe is not needed, so now it is
probe = probe_action(rsc, node);
/* Below, we will order the probe relative to start or reload. If this is a
* clone instance, the start or reload is for the entire clone rather than
* just the instance. Otherwise, the start or reload is for the resource
* itself.
*/
if (!pcmk__is_clone(top)) {
top = rsc;
}
/* Prevent a start if the resource can't be probed, but don't cause the
* resource or entire clone to stop if already active.
*/
if (!pcmk_is_set(probe->flags, pcmk_action_runnable)
&& (top->private->active_nodes == NULL)) {
pcmk__set_relation_flags(flags, pcmk__ar_unrunnable_first_blocks);
}
// Start or reload after probing the resource
pcmk__new_ordering(rsc, NULL, probe,
top, pcmk__op_key(top->id, PCMK_ACTION_START, 0), NULL,
flags, rsc->private->scheduler);
pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
pcmk__ar_ordered, rsc->private->scheduler);
return true;
no_probe:
pcmk__rsc_trace(rsc,
"Skipping probe for %s on %s because %s",
rsc->id, node->private->id, reason);
return false;
}
/*!
* \internal
* \brief Check whether a probe should be ordered before another action
*
* \param[in] probe Probe action to check
* \param[in] then Other action to check
*
* \return true if \p probe should be ordered before \p then, otherwise false
*/
static bool
probe_needed_before_action(const pcmk_action_t *probe,
const pcmk_action_t *then)
{
// Probes on a node are performed after unfencing it, not before
if (pcmk__str_eq(then->task, PCMK_ACTION_STONITH, pcmk__str_none)
&& pcmk__same_node(probe->node, then->node)) {
const char *op = g_hash_table_lookup(then->meta,
PCMK__META_STONITH_ACTION);
if (pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) {
return false;
}
}
// Probes should be done on a node before shutting it down
if (pcmk__str_eq(then->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)
&& (probe->node != NULL) && (then->node != NULL)
&& !pcmk__same_node(probe->node, then->node)) {
return false;
}
// Otherwise probes should always be done before any other action
return true;
}
/*!
* \internal
* \brief Add implicit "probe then X" orderings for "stop then X" orderings
*
* If the state of a resource is not known yet, a probe will be scheduled,
* expecting a "not running" result. If the probe fails, a stop will not be
* scheduled until the next transition. Thus, if there are ordering constraints
* like "stop this resource then do something else that's not for the same
* resource", add implicit "probe this resource then do something" equivalents
* so the relation is upheld until we know whether a stop is needed.
*
* \param[in,out] scheduler Scheduler data
*/
static void
add_probe_orderings_for_stops(pcmk_scheduler_t *scheduler)
{
for (GList *iter = scheduler->ordering_constraints; iter != NULL;
iter = iter->next) {
pcmk__action_relation_t *order = iter->data;
uint32_t order_flags = pcmk__ar_ordered;
GList *probes = NULL;
GList *then_actions = NULL;
pcmk_action_t *first = NULL;
pcmk_action_t *then = NULL;
// Skip disabled orderings
if (order->flags == pcmk__ar_none) {
continue;
}
// Skip non-resource orderings, and orderings for the same resource
if ((order->rsc1 == NULL) || (order->rsc1 == order->rsc2)) {
continue;
}
// Skip invalid orderings (shouldn't be possible)
first = order->action1;
then = order->action2;
if (((first == NULL) && (order->task1 == NULL))
|| ((then == NULL) && (order->task2 == NULL))) {
continue;
}
// Skip orderings for first actions other than stop
if ((first != NULL) && !pcmk__str_eq(first->task, PCMK_ACTION_STOP,
pcmk__str_none)) {
continue;
} else if ((first == NULL)
&& !pcmk__ends_with(order->task1,
"_" PCMK_ACTION_STOP "_0")) {
continue;
}
/* Do not imply a probe ordering for a resource inside of a stopping
* launcher. Otherwise, it might introduce a transition loop, since a
* probe could be scheduled after the launcher starts again.
*/
if ((order->rsc2 != NULL)
&& (order->rsc1->private->launcher == order->rsc2)) {
if ((then != NULL) && pcmk__str_eq(then->task, PCMK_ACTION_STOP,
pcmk__str_none)) {
continue;
} else if ((then == NULL)
&& pcmk__ends_with(order->task2,
"_" PCMK_ACTION_STOP "_0")) {
continue;
}
}
// Preserve certain order options for future filtering
if (pcmk_is_set(order->flags, pcmk__ar_if_first_unmigratable)) {
pcmk__set_relation_flags(order_flags,
pcmk__ar_if_first_unmigratable);
}
if (pcmk_is_set(order->flags, pcmk__ar_if_on_same_node)) {
pcmk__set_relation_flags(order_flags, pcmk__ar_if_on_same_node);
}
// Preserve certain order types for future filtering
if ((order->flags == pcmk__ar_if_required_on_same_node)
|| (order->flags == pcmk__ar_if_on_same_node_or_target)) {
order_flags = order->flags;
}
// List all scheduled probes for the first resource
probes = pe__resource_actions(order->rsc1, NULL, PCMK_ACTION_MONITOR,
FALSE);
if (probes == NULL) { // There aren't any
continue;
}
// List all relevant "then" actions
if (then != NULL) {
then_actions = g_list_prepend(NULL, then);
} else if (order->rsc2 != NULL) {
then_actions = find_actions(order->rsc2->private->actions,
order->task2, NULL);
if (then_actions == NULL) { // There aren't any
g_list_free(probes);
continue;
}
}
crm_trace("Implying 'probe then' orderings for '%s then %s' "
"(id=%d, type=%.6x)",
((first == NULL)? order->task1 : first->uuid),
((then == NULL)? order->task2 : then->uuid),
order->id, order->flags);
for (GList *probe_iter = probes; probe_iter != NULL;
probe_iter = probe_iter->next) {
pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
for (GList *then_iter = then_actions; then_iter != NULL;
then_iter = then_iter->next) {
pcmk_action_t *then = (pcmk_action_t *) then_iter->data;
if (probe_needed_before_action(probe, then)) {
order_actions(probe, then, order_flags);
}
}
}
g_list_free(then_actions);
g_list_free(probes);
}
}
/*!
* \internal
* \brief Add necessary orderings between probe and starts of clone instances
*
* , in additon to the ordering with the parent resource added upon creating
* the probe.
*
* \param[in,out] probe Probe as 'first' action in an ordering
* \param[in,out] after 'then' action wrapper in the ordering
*/
static void
add_start_orderings_for_probe(pcmk_action_t *probe,
pcmk__related_action_t *after)
{
uint32_t flags = pcmk__ar_ordered|pcmk__ar_unrunnable_first_blocks;
/* Although the ordering between the probe of the clone instance and the
* start of its parent has been added in pcmk__probe_rsc_on_node(), we
* avoided enforcing `pcmk__ar_unrunnable_first_blocks` order type for that
* as long as any of the clone instances are running to prevent them from
* being unexpectedly stopped.
*
* On the other hand, we still need to prevent any inactive instances from
* starting unless the probe is runnable so that we don't risk starting too
* many instances before we know the state on all nodes.
*/
if ((after->action->rsc->private->variant <= pcmk__rsc_variant_group)
|| pcmk_is_set(probe->flags, pcmk_action_runnable)
// The order type is already enforced for its parent.
|| pcmk_is_set(after->type, pcmk__ar_unrunnable_first_blocks)
|| (pe__const_top_resource(probe->rsc, false) != after->action->rsc)
|| !pcmk__str_eq(after->action->task, PCMK_ACTION_START,
pcmk__str_none)) {
return;
}
crm_trace("Adding probe start orderings for 'unrunnable %s@%s "
"then instances of %s@%s'",
probe->uuid, pcmk__node_name(probe->node),
after->action->uuid, pcmk__node_name(after->action->node));
for (GList *then_iter = after->action->actions_after; then_iter != NULL;
then_iter = then_iter->next) {
pcmk__related_action_t *then = then_iter->data;
if ((then->action->rsc->private->active_nodes != NULL)
|| (pe__const_top_resource(then->action->rsc, false)
!= after->action->rsc)
|| !pcmk__str_eq(then->action->task, PCMK_ACTION_START,
pcmk__str_none)) {
continue;
}
crm_trace("Adding probe start ordering for 'unrunnable %s@%s "
"then %s@%s' (type=%#.6x)",
probe->uuid, pcmk__node_name(probe->node),
then->action->uuid, pcmk__node_name(then->action->node),
flags);
/* Prevent the instance from starting if the instance can't, but don't
* cause any other intances to stop if already active.
*/
order_actions(probe, then->action, flags);
}
return;
}
/*!
* \internal
* \brief Order probes before restarts and re-promotes
*
* If a given ordering is a "probe then start" or "probe then promote" ordering,
* add an implicit "probe then stop/demote" ordering in case the action is part
* of a restart/re-promote, and do the same recursively for all actions ordered
* after the "then" action.
*
* \param[in,out] probe Probe as 'first' action in an ordering
* \param[in,out] after 'then' action in the ordering
*/
static void
add_restart_orderings_for_probe(pcmk_action_t *probe, pcmk_action_t *after)
{
GList *iter = NULL;
bool interleave = false;
pcmk_resource_t *compatible_rsc = NULL;
// Validate that this is a resource probe followed by some action
if ((after == NULL) || (probe == NULL) || !pcmk__is_primitive(probe->rsc)
|| !pcmk__str_eq(probe->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
return;
}
// Avoid running into any possible loop
if (pcmk_is_set(after->flags, pcmk_action_detect_loop)) {
return;
}
pcmk__set_action_flags(after, pcmk_action_detect_loop);
crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
probe->uuid, pcmk__node_name(probe->node),
after->uuid, pcmk__node_name(after->node));
/* Add restart orderings if "then" is for a different primitive.
* Orderings for collective resources will be added later.
*/
if (pcmk__is_primitive(after->rsc) && (probe->rsc != after->rsc)) {
GList *then_actions = NULL;
if (pcmk__str_eq(after->task, PCMK_ACTION_START, pcmk__str_none)) {
then_actions = pe__resource_actions(after->rsc, NULL,
PCMK_ACTION_STOP, FALSE);
} else if (pcmk__str_eq(after->task, PCMK_ACTION_PROMOTE,
pcmk__str_none)) {
then_actions = pe__resource_actions(after->rsc, NULL,
PCMK_ACTION_DEMOTE, FALSE);
}
for (iter = then_actions; iter != NULL; iter = iter->next) {
pcmk_action_t *then = (pcmk_action_t *) iter->data;
// Skip pseudo-actions (for example, those implied by fencing)
if (!pcmk_is_set(then->flags, pcmk_action_pseudo)) {
order_actions(probe, then, pcmk__ar_ordered);
}
}
g_list_free(then_actions);
}
/* Detect whether "then" is an interleaved clone action. For these, we want
* to add orderings only for the relevant instance.
*/
if ((after->rsc != NULL)
&& (after->rsc->private->variant > pcmk__rsc_variant_group)) {
interleave = crm_is_true(g_hash_table_lookup(after->rsc->private->meta,
PCMK_META_INTERLEAVE));
if (interleave) {
compatible_rsc = pcmk__find_compatible_instance(probe->rsc,
after->rsc,
pcmk_role_unknown,
false);
}
}
/* Now recursively do the same for all actions ordered after "then". This
* also handles collective resources since the collective action will be
* ordered before its individual instances' actions.
*/
for (iter = after->actions_after; iter != NULL; iter = iter->next) {
pcmk__related_action_t *after_wrapper = iter->data;
const pcmk_resource_t *chained_rsc = NULL;
/* pcmk__ar_first_implies_then is the reason why a required A.start
* implies/enforces B.start to be required too, which is the cause of
* B.restart/re-promote.
*
* Not sure about pcmk__ar_first_implies_same_node_then though. It's now
* only used for unfencing case, which tends to introduce transition
* loops...
*/
if (!pcmk_is_set(after_wrapper->type, pcmk__ar_first_implies_then)) {
/* The order type between a group/clone and its child such as
* B.start-> B_child.start is:
* pcmk__ar_then_implies_first_graphed
* |pcmk__ar_unrunnable_first_blocks
*
* Proceed through the ordering chain and build dependencies with
* its children.
*/
if ((after->rsc == NULL)
|| (after->rsc->private->variant < pcmk__rsc_variant_group)
|| (probe->rsc->private->parent == after->rsc)
|| (after_wrapper->action->rsc == NULL)) {
continue;
}
chained_rsc = after_wrapper->action->rsc;
if ((chained_rsc->private->variant > pcmk__rsc_variant_group)
|| (after->rsc != chained_rsc->private->parent)) {
continue;
}
/* Proceed to the children of a group or a non-interleaved clone.
* For an interleaved clone, proceed only to the relevant child.
*/
if ((after->rsc->private->variant > pcmk__rsc_variant_group)
&& interleave
&& ((compatible_rsc == NULL)
|| (compatible_rsc != chained_rsc))) {
continue;
}
}
crm_trace("Recursively adding probe restart orderings for "
"'%s@%s then %s@%s' (type=%#.6x)",
after->uuid, pcmk__node_name(after->node),
after_wrapper->action->uuid,
pcmk__node_name(after_wrapper->action->node),
after_wrapper->type);
add_restart_orderings_for_probe(probe, after_wrapper->action);
}
}
/*!
* \internal
* \brief Clear the tracking flag on all scheduled actions
*
* \param[in,out] scheduler Scheduler data
*/
static void
clear_actions_tracking_flag(pcmk_scheduler_t *scheduler)
{
for (GList *iter = scheduler->actions; iter != NULL; iter = iter->next) {
pcmk_action_t *action = iter->data;
pcmk__clear_action_flags(action, pcmk_action_detect_loop);
}
}
/*!
* \internal
* \brief Add start and restart orderings for probes scheduled for a resource
*
* \param[in,out] data Resource whose probes should be ordered
* \param[in] user_data Unused
*/
static void
add_start_restart_orderings_for_rsc(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
GList *probes = NULL;
// For collective resources, order each instance recursively
if (!pcmk__is_primitive(rsc)) {
g_list_foreach(rsc->private->children,
add_start_restart_orderings_for_rsc, NULL);
return;
}
// Find all probes for given resource
probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
// Add probe restart orderings for each probe found
for (GList *iter = probes; iter != NULL; iter = iter->next) {
pcmk_action_t *probe = (pcmk_action_t *) iter->data;
for (GList *then_iter = probe->actions_after; then_iter != NULL;
then_iter = then_iter->next) {
pcmk__related_action_t *then = then_iter->data;
add_start_orderings_for_probe(probe, then);
add_restart_orderings_for_probe(probe, then->action);
clear_actions_tracking_flag(rsc->private->scheduler);
}
}
g_list_free(probes);
}
/*!
* \internal
* \brief Add "A then probe B" orderings for "A then B" orderings
*
* \param[in,out] scheduler Scheduler data
*
* \note This function is currently disabled (see next comment).
*/
static void
order_then_probes(pcmk_scheduler_t *scheduler)
{
#if 0
/* Given an ordering "A then B", we would prefer to wait for A to be started
* before probing B.
*
* For example, if A is a filesystem which B can't even run without, it
* would be helpful if the author of B's agent could assume that A is
* running before B.monitor will be called.
*
* However, we can't _only_ probe after A is running, otherwise we wouldn't
* detect the state of B if A could not be started. We can't even do an
* opportunistic version of this, because B may be moving:
*
* A.stop -> A.start -> B.probe -> B.stop -> B.start
*
* and if we add B.stop -> A.stop here, we get a loop:
*
* A.stop -> A.start -> B.probe -> B.stop -> A.stop
*
* We could kill the "B.probe -> B.stop" dependency, but that could mean
* stopping B "too" soon, because B.start must wait for the probe, and
* we don't want to stop B if we can't start it.
*
* We could add the ordering only if A is an anonymous clone with
* clone-max == node-max (since we'll never be moving it). However, we could
* still be stopping one instance at the same time as starting another.
*
* The complexity of checking for allowed conditions combined with the ever
* narrowing use case suggests that this code should remain disabled until
* someone gets smarter.
*/
for (GList *iter = scheduler->resources; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
pcmk_action_t *start = NULL;
GList *actions = NULL;
GList *probes = NULL;
actions = pe__resource_actions(rsc, NULL, PCMK_ACTION_START, FALSE);
if (actions) {
start = actions->data;
g_list_free(actions);
}
if (start == NULL) {
crm_debug("No start action for %s", rsc->id);
continue;
}
probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
for (actions = start->actions_before; actions != NULL;
actions = actions->next) {
pcmk__related_action_t *before = actions->data;
pcmk_action_t *first = before->action;
pcmk_resource_t *first_rsc = first->rsc;
if (first->required_runnable_before) {
for (GList *clone_actions = first->actions_before;
clone_actions != NULL;
clone_actions = clone_actions->next) {
before = clone_actions->data;
crm_trace("Testing '%s then %s' for %s",
first->uuid, before->action->uuid, start->uuid);
CRM_ASSERT(before->action->rsc != NULL);
first_rsc = before->action->rsc;
break;
}
} else if (!pcmk__str_eq(first->task, PCMK_ACTION_START,
pcmk__str_none)) {
crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
}
if (first_rsc == NULL) {
continue;
} else if (pe__const_top_resource(first_rsc, false)
== pe__const_top_resource(start->rsc, false)) {
crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
continue;
} else if (!pcmk__is_clone(pe__const_top_resource(first_rsc,
false))) {
crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
continue;
}
crm_debug("Applying %s before %s", first->uuid, start->uuid);
for (GList *probe_iter = probes; probe_iter != NULL;
probe_iter = probe_iter->next) {
pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
crm_debug("Ordering %s before %s", first->uuid, probe->uuid);
order_actions(first, probe, pcmk__ar_ordered);
}
}
}
#endif
}
void
pcmk__order_probes(pcmk_scheduler_t *scheduler)
{
// Add orderings for "probe then X"
g_list_foreach(scheduler->resources, add_start_restart_orderings_for_rsc,
NULL);
add_probe_orderings_for_stops(scheduler);
order_then_probes(scheduler);
}
/*!
* \internal
* \brief Schedule any probes needed
*
* \param[in,out] scheduler Scheduler data
*
* \note This may also schedule fencing of failed remote nodes.
*/
void
pcmk__schedule_probes(pcmk_scheduler_t *scheduler)
{
// Schedule probes on each node in the cluster as needed
for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
if (!node->details->online) { // Don't probe offline nodes
if (pcmk__is_failed_remote_node(node)) {
pe_fence_node(scheduler, node,
"the connection is unrecoverable", FALSE);
}
continue;
}
if (node->details->unclean) { // Don't probe nodes that need fencing
continue;
}
if (!pcmk_is_set(node->private->flags, pcmk__node_probes_allowed)) {
// The user requested that probes not be done on this node
continue;
}
// Probe each resource in the cluster on this node, as needed
pcmk__probe_resource_list(scheduler->resources, node);
}
}
diff --git a/lib/pacemaker/pcmk_scheduler.c b/lib/pacemaker/pcmk_scheduler.c
index d2e4c74450..edea10542c 100644
--- a/lib/pacemaker/pcmk_scheduler.c
+++ b/lib/pacemaker/pcmk_scheduler.c
@@ -1,907 +1,907 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/cib/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/scheduler_internal.h>
#include <glib.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
CRM_TRACE_INIT_DATA(pacemaker);
/*!
* \internal
* \brief Do deferred action checks after assignment
*
* When unpacking the resource history, the scheduler checks for resource
* configurations that have changed since an action was run. However, at that
* time, bundles using the REMOTE_CONTAINER_HACK don't have their final
* parameter information, so instead they add a deferred check to a list. This
* function processes one entry in that list.
*
* \param[in,out] rsc Resource that action history is for
* \param[in,out] node Node that action history is for
* \param[in] rsc_op Action history entry
* \param[in] check Type of deferred check to do
*/
static void
check_params(pcmk_resource_t *rsc, pcmk_node_t *node, const xmlNode *rsc_op,
enum pcmk__check_parameters check)
{
const char *reason = NULL;
pcmk__op_digest_t *digest_data = NULL;
switch (check) {
case pcmk__check_active:
if (pcmk__check_action_config(rsc, node, rsc_op)
&& pe_get_failcount(node, rsc, NULL, pcmk__fc_effective,
NULL)) {
reason = "action definition changed";
}
break;
case pcmk__check_last_failure:
digest_data = rsc_action_digest_cmp(rsc, rsc_op, node,
rsc->private->scheduler);
switch (digest_data->rc) {
case pcmk__digest_unknown:
crm_trace("Resource %s history entry %s on %s has "
"no digest to compare",
rsc->id, pcmk__xe_id(rsc_op), node->private->id);
break;
case pcmk__digest_match:
break;
default:
reason = "resource parameters have changed";
break;
}
break;
}
if (reason != NULL) {
pe__clear_failcount(rsc, node, reason, rsc->private->scheduler);
}
}
/*!
* \internal
* \brief Check whether a resource has failcount clearing scheduled on a node
*
* \param[in] node Node to check
* \param[in] rsc Resource to check
*
* \return true if \p rsc has failcount clearing scheduled on \p node,
* otherwise false
*/
static bool
failcount_clear_action_exists(const pcmk_node_t *node,
const pcmk_resource_t *rsc)
{
GList *list = pe__resource_actions(rsc, node, PCMK_ACTION_CLEAR_FAILCOUNT,
TRUE);
if (list != NULL) {
g_list_free(list);
return true;
}
return false;
}
/*!
* \internal
* \brief Ban a resource from a node if it reached its failure threshold there
*
* \param[in,out] data Resource to check failure threshold for
* \param[in] user_data Node to check resource on
*/
static void
check_failure_threshold(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
const pcmk_node_t *node = user_data;
// If this is a collective resource, apply recursively to children instead
if (rsc->private->children != NULL) {
g_list_foreach(rsc->private->children, check_failure_threshold,
user_data);
return;
}
if (!failcount_clear_action_exists(node, rsc)) {
/* Don't force the resource away from this node due to a failcount
* that's going to be cleared.
*
* @TODO Failcount clearing can be scheduled in
* pcmk__handle_rsc_config_changes() via process_rsc_history(), or in
* schedule_resource_actions() via check_params(). This runs well before
* then, so it cannot detect those, meaning we might check the migration
* threshold when we shouldn't. Worst case, we stop or move the
* resource, then move it back in the next transition.
*/
pcmk_resource_t *failed = NULL;
if (pcmk__threshold_reached(rsc, node, &failed)) {
resource_location(failed, node, -PCMK_SCORE_INFINITY,
"__fail_limit__", rsc->private->scheduler);
}
}
}
/*!
* \internal
* \brief If resource has exclusive discovery, ban node if not allowed
*
* Location constraints have a PCMK_XA_RESOURCE_DISCOVERY option that allows
* users to specify where probes are done for the affected resource. If this is
* set to \c exclusive, probes will only be done on nodes listed in exclusive
* constraints. This function bans the resource from the node if the node is not
* listed.
*
* \param[in,out] data Resource to check
* \param[in] user_data Node to check resource on
*/
static void
apply_exclusive_discovery(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
const pcmk_node_t *node = user_data;
if (pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes)
|| pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
pcmk__rsc_exclusive_probes)) {
pcmk_node_t *match = NULL;
// If this is a collective resource, apply recursively to children
g_list_foreach(rsc->private->children, apply_exclusive_discovery,
user_data);
match = g_hash_table_lookup(rsc->private->allowed_nodes,
node->private->id);
if ((match != NULL)
- && (match->assign->probe_mode != pcmk_probe_exclusive)) {
+ && (match->assign->probe_mode != pcmk__probe_exclusive)) {
match->assign->score = -PCMK_SCORE_INFINITY;
}
}
}
/*!
* \internal
* \brief Apply stickiness to a resource if appropriate
*
* \param[in,out] data Resource to check for stickiness
* \param[in] user_data Ignored
*/
static void
apply_stickiness(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
pcmk_node_t *node = NULL;
// If this is a collective resource, apply recursively to children instead
if (rsc->private->children != NULL) {
g_list_foreach(rsc->private->children, apply_stickiness, NULL);
return;
}
/* A resource is sticky if it is managed, has stickiness configured, and is
* active on a single node.
*/
if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)
|| (rsc->private->stickiness < 1)
|| !pcmk__list_of_1(rsc->private->active_nodes)) {
return;
}
node = rsc->private->active_nodes->data;
/* In a symmetric cluster, stickiness can always be used. In an
* asymmetric cluster, we have to check whether the resource is still
* allowed on the node, so we don't keep the resource somewhere it is no
* longer explicitly enabled.
*/
if (!pcmk_is_set(rsc->private->scheduler->flags,
pcmk_sched_symmetric_cluster)
&& (g_hash_table_lookup(rsc->private->allowed_nodes,
node->private->id) == NULL)) {
pcmk__rsc_debug(rsc,
"Ignoring %s stickiness because the cluster is "
"asymmetric and %s is not explicitly allowed",
rsc->id, pcmk__node_name(node));
return;
}
pcmk__rsc_debug(rsc, "Resource %s has %d stickiness on %s",
rsc->id, rsc->private->stickiness, pcmk__node_name(node));
resource_location(rsc, node, rsc->private->stickiness, "stickiness",
rsc->private->scheduler);
}
/*!
* \internal
* \brief Apply shutdown locks for all resources as appropriate
*
* \param[in,out] scheduler Scheduler data
*/
static void
apply_shutdown_locks(pcmk_scheduler_t *scheduler)
{
if (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
return;
}
for (GList *iter = scheduler->resources; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
rsc->private->cmds->shutdown_lock(rsc);
}
}
/*!
* \internal
* \brief Calculate the number of available nodes in the cluster
*
* \param[in,out] scheduler Scheduler data
*/
static void
count_available_nodes(pcmk_scheduler_t *scheduler)
{
if (pcmk_is_set(scheduler->flags, pcmk_sched_no_compat)) {
return;
}
// @COMPAT for API backward compatibility only (cluster does not use value)
for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
if ((node != NULL) && (node->assign->score >= 0)
&& node->details->online
&& (node->private->variant != pcmk__node_variant_ping)) {
scheduler->max_valid_nodes++;
}
}
crm_trace("Online node count: %d", scheduler->max_valid_nodes);
}
/*
* \internal
* \brief Apply node-specific scheduling criteria
*
* After the CIB has been unpacked, process node-specific scheduling criteria
* including shutdown locks, location constraints, resource stickiness,
* migration thresholds, and exclusive resource discovery.
*/
static void
apply_node_criteria(pcmk_scheduler_t *scheduler)
{
crm_trace("Applying node-specific scheduling criteria");
apply_shutdown_locks(scheduler);
count_available_nodes(scheduler);
pcmk__apply_locations(scheduler);
g_list_foreach(scheduler->resources, apply_stickiness, NULL);
for (GList *node_iter = scheduler->nodes; node_iter != NULL;
node_iter = node_iter->next) {
for (GList *rsc_iter = scheduler->resources; rsc_iter != NULL;
rsc_iter = rsc_iter->next) {
check_failure_threshold(rsc_iter->data, node_iter->data);
apply_exclusive_discovery(rsc_iter->data, node_iter->data);
}
}
}
/*!
* \internal
* \brief Assign resources to nodes
*
* \param[in,out] scheduler Scheduler data
*/
static void
assign_resources(pcmk_scheduler_t *scheduler)
{
GList *iter = NULL;
crm_trace("Assigning resources to nodes");
if (!pcmk__str_eq(scheduler->placement_strategy, PCMK_VALUE_DEFAULT,
pcmk__str_casei)) {
pcmk__sort_resources(scheduler);
}
pcmk__show_node_capacities("Original", scheduler);
if (pcmk_is_set(scheduler->flags, pcmk_sched_have_remote_nodes)) {
/* Assign remote connection resources first (which will also assign any
* colocation dependencies). If the connection is migrating, always
* prefer the partial migration target.
*/
for (iter = scheduler->resources; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
const pcmk_node_t *target = rsc->private->partial_migration_target;
if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
pcmk__rsc_trace(rsc, "Assigning remote connection resource '%s'",
rsc->id);
rsc->private->cmds->assign(rsc, target, true);
}
}
}
/* now do the rest of the resources */
for (iter = scheduler->resources; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
if (!pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
pcmk__rsc_trace(rsc, "Assigning %s resource '%s'",
rsc->private->xml->name, rsc->id);
rsc->private->cmds->assign(rsc, NULL, true);
}
}
pcmk__show_node_capacities("Remaining", scheduler);
}
/*!
* \internal
* \brief Schedule fail count clearing on online nodes if resource is orphaned
*
* \param[in,out] data Resource to check
* \param[in] user_data Ignored
*/
static void
clear_failcounts_if_orphaned(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
return;
}
crm_trace("Clear fail counts for orphaned resource %s", rsc->id);
/* There's no need to recurse into rsc->private->children because those
* should just be unassigned clone instances.
*/
for (GList *iter = rsc->private->scheduler->nodes;
iter != NULL; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
pcmk_action_t *clear_op = NULL;
if (!node->details->online) {
continue;
}
if (pe_get_failcount(node, rsc, NULL, pcmk__fc_effective, NULL) == 0) {
continue;
}
clear_op = pe__clear_failcount(rsc, node, "it is orphaned",
rsc->private->scheduler);
/* We can't use order_action_then_stop() here because its
* pcmk__ar_guest_allowed breaks things
*/
pcmk__new_ordering(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc),
NULL, pcmk__ar_ordered, rsc->private->scheduler);
}
}
/*!
* \internal
* \brief Schedule any resource actions needed
*
* \param[in,out] scheduler Scheduler data
*/
static void
schedule_resource_actions(pcmk_scheduler_t *scheduler)
{
// Process deferred action checks
pe__foreach_param_check(scheduler, check_params);
pe__free_param_checks(scheduler);
if (pcmk_is_set(scheduler->flags, pcmk_sched_probe_resources)) {
crm_trace("Scheduling probes");
pcmk__schedule_probes(scheduler);
}
if (pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) {
g_list_foreach(scheduler->resources, clear_failcounts_if_orphaned,
NULL);
}
crm_trace("Scheduling resource actions");
for (GList *iter = scheduler->resources; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
rsc->private->cmds->create_actions(rsc);
}
}
/*!
* \internal
* \brief Check whether a resource or any of its descendants are managed
*
* \param[in] rsc Resource to check
*
* \return true if resource or any descendant is managed, otherwise false
*/
static bool
is_managed(const pcmk_resource_t *rsc)
{
if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
return true;
}
for (GList *iter = rsc->private->children;
iter != NULL; iter = iter->next) {
if (is_managed((pcmk_resource_t *) iter->data)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether any resources in the cluster are managed
*
* \param[in] scheduler Scheduler data
*
* \return true if any resource is managed, otherwise false
*/
static bool
any_managed_resources(const pcmk_scheduler_t *scheduler)
{
for (const GList *iter = scheduler->resources;
iter != NULL; iter = iter->next) {
if (is_managed((const pcmk_resource_t *) iter->data)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether a node requires fencing
*
* \param[in] node Node to check
* \param[in] have_managed Whether any resource in cluster is managed
*
* \return true if \p node should be fenced, otherwise false
*/
static bool
needs_fencing(const pcmk_node_t *node, bool have_managed)
{
return have_managed && node->details->unclean
&& pe_can_fence(node->private->scheduler, node);
}
/*!
* \internal
* \brief Check whether a node requires shutdown
*
* \param[in] node Node to check
*
* \return true if \p node should be shut down, otherwise false
*/
static bool
needs_shutdown(const pcmk_node_t *node)
{
if (pcmk__is_pacemaker_remote_node(node)) {
/* Do not send shutdown actions for Pacemaker Remote nodes.
* @TODO We might come up with a good use for this in the future.
*/
return false;
}
return node->details->online && node->details->shutdown;
}
/*!
* \internal
* \brief Track and order non-DC fencing
*
* \param[in,out] list List of existing non-DC fencing actions
* \param[in,out] action Fencing action to prepend to \p list
* \param[in] scheduler Scheduler data
*
* \return (Possibly new) head of \p list
*/
static GList *
add_nondc_fencing(GList *list, pcmk_action_t *action,
const pcmk_scheduler_t *scheduler)
{
if (!pcmk_is_set(scheduler->flags, pcmk_sched_concurrent_fencing)
&& (list != NULL)) {
/* Concurrent fencing is disabled, so order each non-DC
* fencing in a chain. If there is any DC fencing or
* shutdown, it will be ordered after the last action in the
* chain later.
*/
order_actions((pcmk_action_t *) list->data, action, pcmk__ar_ordered);
}
return g_list_prepend(list, action);
}
/*!
* \internal
* \brief Schedule a node for fencing
*
* \param[in,out] node Node that requires fencing
*/
static pcmk_action_t *
schedule_fencing(pcmk_node_t *node)
{
pcmk_action_t *fencing = pe_fence_op(node, NULL, FALSE, "node is unclean",
FALSE, node->private->scheduler);
pcmk__sched_warn("Scheduling node %s for fencing", pcmk__node_name(node));
pcmk__order_vs_fence(fencing, node->private->scheduler);
return fencing;
}
/*!
* \internal
* \brief Create and order node fencing and shutdown actions
*
* \param[in,out] scheduler Scheduler data
*/
static void
schedule_fencing_and_shutdowns(pcmk_scheduler_t *scheduler)
{
pcmk_action_t *dc_down = NULL;
bool integrity_lost = false;
bool have_managed = any_managed_resources(scheduler);
GList *fencing_ops = NULL;
GList *shutdown_ops = NULL;
crm_trace("Scheduling fencing and shutdowns as needed");
if (!have_managed) {
crm_notice("No fencing will be done until there are resources "
"to manage");
}
// Check each node for whether it needs fencing or shutdown
for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
pcmk_action_t *fencing = NULL;
const bool is_dc = pcmk__same_node(node, scheduler->dc_node);
/* Guest nodes are "fenced" by recovering their container resource,
* so handle them separately.
*/
if (pcmk__is_guest_or_bundle_node(node)) {
if (pcmk_is_set(node->private->flags, pcmk__node_remote_reset)
&& have_managed && pe_can_fence(scheduler, node)) {
pcmk__fence_guest(node);
}
continue;
}
if (needs_fencing(node, have_managed)) {
fencing = schedule_fencing(node);
// Track DC and non-DC fence actions separately
if (is_dc) {
dc_down = fencing;
} else {
fencing_ops = add_nondc_fencing(fencing_ops, fencing,
scheduler);
}
} else if (needs_shutdown(node)) {
pcmk_action_t *down_op = pcmk__new_shutdown_action(node);
// Track DC and non-DC shutdown actions separately
if (is_dc) {
dc_down = down_op;
} else {
shutdown_ops = g_list_prepend(shutdown_ops, down_op);
}
}
if ((fencing == NULL) && node->details->unclean) {
integrity_lost = true;
pcmk__config_warn("Node %s is unclean but cannot be fenced",
pcmk__node_name(node));
}
}
if (integrity_lost) {
if (!pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
pcmk__config_warn("Resource functionality and data integrity "
"cannot be guaranteed (configure, enable, "
"and test fencing to correct this)");
} else if (!pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
crm_notice("Unclean nodes will not be fenced until quorum is "
"attained or " PCMK_OPT_NO_QUORUM_POLICY " is set to "
PCMK_VALUE_IGNORE);
}
}
if (dc_down != NULL) {
/* Order any non-DC shutdowns before any DC shutdown, to avoid repeated
* DC elections. However, we don't want to order non-DC shutdowns before
* a DC *fencing*, because even though we don't want a node that's
* shutting down to become DC, the DC fencing could be ordered before a
* clone stop that's also ordered before the shutdowns, thus leading to
* a graph loop.
*/
if (pcmk__str_eq(dc_down->task, PCMK_ACTION_DO_SHUTDOWN,
pcmk__str_none)) {
pcmk__order_after_each(dc_down, shutdown_ops);
}
// Order any non-DC fencing before any DC fencing or shutdown
if (pcmk_is_set(scheduler->flags, pcmk_sched_concurrent_fencing)) {
/* With concurrent fencing, order each non-DC fencing action
* separately before any DC fencing or shutdown.
*/
pcmk__order_after_each(dc_down, fencing_ops);
} else if (fencing_ops != NULL) {
/* Without concurrent fencing, the non-DC fencing actions are
* already ordered relative to each other, so we just need to order
* the DC fencing after the last action in the chain (which is the
* first item in the list).
*/
order_actions((pcmk_action_t *) fencing_ops->data, dc_down,
pcmk__ar_ordered);
}
}
g_list_free(fencing_ops);
g_list_free(shutdown_ops);
}
static void
log_resource_details(pcmk_scheduler_t *scheduler)
{
pcmk__output_t *out = scheduler->priv;
GList *all = NULL;
/* Due to the `crm_mon --node=` feature, out->message() for all the
* resource-related messages expects a list of nodes that we are allowed to
* output information for. Here, we create a wildcard to match all nodes.
*/
all = g_list_prepend(all, (gpointer) "*");
for (GList *item = scheduler->resources; item != NULL; item = item->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) item->data;
// Log all resources except inactive orphans
if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed)
|| (rsc->private->orig_role != pcmk_role_stopped)) {
out->message(out, pcmk__map_element_name(rsc->private->xml), 0UL,
rsc, all, all);
}
}
g_list_free(all);
}
static void
log_all_actions(pcmk_scheduler_t *scheduler)
{
/* This only ever outputs to the log, so ignore whatever output object was
* previously set and just log instead.
*/
pcmk__output_t *prev_out = scheduler->priv;
pcmk__output_t *out = NULL;
if (pcmk__log_output_new(&out) != pcmk_rc_ok) {
return;
}
pe__register_messages(out);
pcmk__register_lib_messages(out);
pcmk__output_set_log_level(out, LOG_NOTICE);
scheduler->priv = out;
out->begin_list(out, NULL, NULL, "Actions");
pcmk__output_actions(scheduler);
out->end_list(out);
out->finish(out, CRM_EX_OK, true, NULL);
pcmk__output_free(out);
scheduler->priv = prev_out;
}
/*!
* \internal
* \brief Log all required but unrunnable actions at trace level
*
* \param[in] scheduler Scheduler data
*/
static void
log_unrunnable_actions(const pcmk_scheduler_t *scheduler)
{
const uint64_t flags = pcmk_action_optional
|pcmk_action_runnable
|pcmk_action_pseudo;
crm_trace("Required but unrunnable actions:");
for (const GList *iter = scheduler->actions;
iter != NULL; iter = iter->next) {
const pcmk_action_t *action = (const pcmk_action_t *) iter->data;
if (!pcmk_any_flags_set(action->flags, flags)) {
pcmk__log_action("\t", action, true);
}
}
}
/*!
* \internal
* \brief Unpack the CIB for scheduling
*
* \param[in,out] cib CIB XML to unpack (may be NULL if already unpacked)
* \param[in] flags Scheduler flags to set in addition to defaults
* \param[in,out] scheduler Scheduler data
*/
static void
unpack_cib(xmlNode *cib, unsigned long long flags, pcmk_scheduler_t *scheduler)
{
const char* localhost_save = NULL;
if (pcmk_is_set(scheduler->flags, pcmk_sched_have_status)) {
crm_trace("Reusing previously calculated cluster status");
pcmk__set_scheduler_flags(scheduler, flags);
return;
}
if (scheduler->localhost) {
localhost_save = scheduler->localhost;
}
CRM_ASSERT(cib != NULL);
crm_trace("Calculating cluster status");
/* This will zero the entire struct without freeing anything first, so
* callers should never call pcmk__schedule_actions() with a populated data
* set unless pcmk_sched_have_status is set (i.e. cluster_status() was
* previously called, whether directly or via pcmk__schedule_actions()).
*/
set_working_set_defaults(scheduler);
if (localhost_save) {
scheduler->localhost = localhost_save;
}
pcmk__set_scheduler_flags(scheduler, flags);
scheduler->input = cib;
cluster_status(scheduler); // Sets pcmk_sched_have_status
}
/*!
* \internal
* \brief Run the scheduler for a given CIB
*
* \param[in,out] cib CIB XML to use as scheduler input
* \param[in] flags Scheduler flags to set in addition to defaults
* \param[in,out] scheduler Scheduler data
*/
void
pcmk__schedule_actions(xmlNode *cib, unsigned long long flags,
pcmk_scheduler_t *scheduler)
{
unpack_cib(cib, flags, scheduler);
pcmk__set_assignment_methods(scheduler);
pcmk__apply_node_health(scheduler);
pcmk__unpack_constraints(scheduler);
if (pcmk_is_set(scheduler->flags, pcmk_sched_validate_only)) {
return;
}
if (!pcmk_is_set(scheduler->flags, pcmk_sched_location_only)
&& pcmk__is_daemon) {
log_resource_details(scheduler);
}
apply_node_criteria(scheduler);
if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) {
return;
}
pcmk__create_internal_constraints(scheduler);
pcmk__handle_rsc_config_changes(scheduler);
assign_resources(scheduler);
schedule_resource_actions(scheduler);
/* Remote ordering constraints need to happen prior to calculating fencing
* because it is one more place we can mark nodes as needing fencing.
*/
pcmk__order_remote_connection_actions(scheduler);
schedule_fencing_and_shutdowns(scheduler);
pcmk__apply_orderings(scheduler);
log_all_actions(scheduler);
pcmk__create_graph(scheduler);
if (get_crm_log_level() == LOG_TRACE) {
log_unrunnable_actions(scheduler);
}
}
/*!
* \internal
* \brief Initialize scheduler data
*
* Make our own copies of the CIB XML and date/time object, if they're not
* \c NULL. This way we don't have to take ownership of the objects passed via
* the API.
*
* This function is most useful for public API functions that want the caller
* to retain ownership of the CIB object
*
* \param[in,out] out Output object
* \param[in] input The CIB XML to check (if \c NULL, use current CIB)
* \param[in] date Date and time to use in the scheduler (if \c NULL,
* use current date and time). This can be used for
* checking whether a rule is in effect at a certa
* date and time.
* \param[out] scheduler Where to store initialized scheduler data
*
* \return Standard Pacemaker return code
*/
int
pcmk__init_scheduler(pcmk__output_t *out, xmlNodePtr input, const crm_time_t *date,
pcmk_scheduler_t **scheduler)
{
// Allows for cleaner syntax than dereferencing the scheduler argument
pcmk_scheduler_t *new_scheduler = NULL;
new_scheduler = pe_new_working_set();
if (new_scheduler == NULL) {
return ENOMEM;
}
pcmk__set_scheduler_flags(new_scheduler,
pcmk_sched_no_counts|pcmk_sched_no_compat);
// Populate the scheduler data
// Make our own copy of the given input or fetch the CIB and use that
if (input != NULL) {
new_scheduler->input = pcmk__xml_copy(NULL, input);
if (new_scheduler->input == NULL) {
out->err(out, "Failed to copy input XML");
pe_free_working_set(new_scheduler);
return ENOMEM;
}
} else {
int rc = cib__signon_query(out, NULL, &(new_scheduler->input));
if (rc != pcmk_rc_ok) {
pe_free_working_set(new_scheduler);
return rc;
}
}
// Make our own copy of the given crm_time_t object; otherwise
// cluster_status() populates with the current time
if (date != NULL) {
// pcmk_copy_time() guarantees non-NULL
new_scheduler->now = pcmk_copy_time(date);
}
// Unpack everything
cluster_status(new_scheduler);
*scheduler = new_scheduler;
return pcmk_rc_ok;
}
diff --git a/lib/pengine/bundle.c b/lib/pengine/bundle.c
index 66c63ab845..7eff081eec 100644
--- a/lib/pengine/bundle.c
+++ b/lib/pengine/bundle.c
@@ -1,2149 +1,2149 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <ctype.h>
#include <stdint.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/status.h>
#include <crm/pengine/internal.h>
#include <crm/common/xml.h>
#include <crm/common/output.h>
#include <crm/common/xml_internal.h>
#include <pe_status_private.h>
enum pe__bundle_mount_flags {
pe__bundle_mount_none = 0x00,
// mount instance-specific subdirectory rather than source directly
pe__bundle_mount_subdir = 0x01
};
typedef struct {
char *source;
char *target;
char *options;
uint32_t flags; // bitmask of pe__bundle_mount_flags
} pe__bundle_mount_t;
typedef struct {
char *source;
char *target;
} pe__bundle_port_t;
enum pe__container_agent {
PE__CONTAINER_AGENT_UNKNOWN,
PE__CONTAINER_AGENT_DOCKER,
PE__CONTAINER_AGENT_RKT,
PE__CONTAINER_AGENT_PODMAN,
};
#define PE__CONTAINER_AGENT_UNKNOWN_S "unknown"
#define PE__CONTAINER_AGENT_DOCKER_S "docker"
#define PE__CONTAINER_AGENT_RKT_S "rkt"
#define PE__CONTAINER_AGENT_PODMAN_S "podman"
typedef struct pe__bundle_variant_data_s {
int promoted_max;
int nreplicas;
int nreplicas_per_host;
char *prefix;
char *image;
const char *ip_last;
char *host_network;
char *host_netmask;
char *control_port;
char *container_network;
char *ip_range_start;
gboolean add_host;
gchar *container_host_options;
char *container_command;
char *launcher_options;
const char *attribute_target;
pcmk_resource_t *child;
GList *replicas; // pcmk__bundle_replica_t *
GList *ports; // pe__bundle_port_t *
GList *mounts; // pe__bundle_mount_t *
enum pe__container_agent agent_type;
} pe__bundle_variant_data_t;
#define get_bundle_variant_data(data, rsc) do { \
CRM_ASSERT(pcmk__is_bundle(rsc)); \
data = rsc->private->variant_opaque; \
} while (0)
/*!
* \internal
* \brief Get maximum number of bundle replicas allowed to run
*
* \param[in] rsc Bundle or bundled resource to check
*
* \return Maximum replicas for bundle corresponding to \p rsc
*/
int
pe__bundle_max(const pcmk_resource_t *rsc)
{
const pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, pe__const_top_resource(rsc, true));
return bundle_data->nreplicas;
}
/*!
* \internal
* \brief Get the resource inside a bundle
*
* \param[in] bundle Bundle to check
*
* \return Resource inside \p bundle if any, otherwise NULL
*/
pcmk_resource_t *
pe__bundled_resource(const pcmk_resource_t *rsc)
{
const pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, pe__const_top_resource(rsc, true));
return bundle_data->child;
}
/*!
* \internal
* \brief Get containerized resource corresponding to a given bundle container
*
* \param[in] instance Collective instance that might be a bundle container
*
* \return Bundled resource instance inside \p instance if it is a bundle
* container instance, otherwise NULL
*/
const pcmk_resource_t *
pe__get_rsc_in_container(const pcmk_resource_t *instance)
{
const pe__bundle_variant_data_t *data = NULL;
const pcmk_resource_t *top = pe__const_top_resource(instance, true);
if (!pcmk__is_bundle(top)) {
return NULL;
}
get_bundle_variant_data(data, top);
for (const GList *iter = data->replicas; iter != NULL; iter = iter->next) {
const pcmk__bundle_replica_t *replica = iter->data;
if (instance == replica->container) {
return replica->child;
}
}
return NULL;
}
/*!
* \internal
* \brief Check whether a given node is created by a bundle
*
* \param[in] bundle Bundle resource to check
* \param[in] node Node to check
*
* \return true if \p node is an instance of \p bundle, otherwise false
*/
bool
pe__node_is_bundle_instance(const pcmk_resource_t *bundle,
const pcmk_node_t *node)
{
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, bundle);
for (GList *iter = bundle_data->replicas; iter != NULL; iter = iter->next) {
pcmk__bundle_replica_t *replica = iter->data;
if (pcmk__same_node(node, replica->node)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Get the container of a bundle's first replica
*
* \param[in] bundle Bundle resource to get container for
*
* \return Container resource from first replica of \p bundle if any,
* otherwise NULL
*/
pcmk_resource_t *
pe__first_container(const pcmk_resource_t *bundle)
{
const pe__bundle_variant_data_t *bundle_data = NULL;
const pcmk__bundle_replica_t *replica = NULL;
get_bundle_variant_data(bundle_data, bundle);
if (bundle_data->replicas == NULL) {
return NULL;
}
replica = bundle_data->replicas->data;
return replica->container;
}
/*!
* \internal
* \brief Iterate over bundle replicas
*
* \param[in,out] bundle Bundle to iterate over
* \param[in] fn Function to call for each replica (its return value
* indicates whether to continue iterating)
* \param[in,out] user_data Pointer to pass to \p fn
*/
void
pe__foreach_bundle_replica(pcmk_resource_t *bundle,
bool (*fn)(pcmk__bundle_replica_t *, void *),
void *user_data)
{
const pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, bundle);
for (GList *iter = bundle_data->replicas; iter != NULL; iter = iter->next) {
if (!fn((pcmk__bundle_replica_t *) iter->data, user_data)) {
break;
}
}
}
/*!
* \internal
* \brief Iterate over const bundle replicas
*
* \param[in] bundle Bundle to iterate over
* \param[in] fn Function to call for each replica (its return value
* indicates whether to continue iterating)
* \param[in,out] user_data Pointer to pass to \p fn
*/
void
pe__foreach_const_bundle_replica(const pcmk_resource_t *bundle,
bool (*fn)(const pcmk__bundle_replica_t *,
void *),
void *user_data)
{
const pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, bundle);
for (const GList *iter = bundle_data->replicas; iter != NULL;
iter = iter->next) {
if (!fn((const pcmk__bundle_replica_t *) iter->data, user_data)) {
break;
}
}
}
static char *
next_ip(const char *last_ip)
{
unsigned int oct1 = 0;
unsigned int oct2 = 0;
unsigned int oct3 = 0;
unsigned int oct4 = 0;
int rc = sscanf(last_ip, "%u.%u.%u.%u", &oct1, &oct2, &oct3, &oct4);
if (rc != 4) {
/*@ TODO check for IPv6 */
return NULL;
} else if (oct3 > 253) {
return NULL;
} else if (oct4 > 253) {
++oct3;
oct4 = 1;
} else {
++oct4;
}
return crm_strdup_printf("%u.%u.%u.%u", oct1, oct2, oct3, oct4);
}
static void
allocate_ip(pe__bundle_variant_data_t *data, pcmk__bundle_replica_t *replica,
GString *buffer)
{
if(data->ip_range_start == NULL) {
return;
} else if(data->ip_last) {
replica->ipaddr = next_ip(data->ip_last);
} else {
replica->ipaddr = strdup(data->ip_range_start);
}
data->ip_last = replica->ipaddr;
switch (data->agent_type) {
case PE__CONTAINER_AGENT_DOCKER:
case PE__CONTAINER_AGENT_PODMAN:
if (data->add_host) {
g_string_append_printf(buffer, " --add-host=%s-%d:%s",
data->prefix, replica->offset,
replica->ipaddr);
} else {
g_string_append_printf(buffer, " --hosts-entry=%s=%s-%d",
replica->ipaddr, data->prefix,
replica->offset);
}
break;
case PE__CONTAINER_AGENT_RKT:
g_string_append_printf(buffer, " --hosts-entry=%s=%s-%d",
replica->ipaddr, data->prefix,
replica->offset);
break;
default: // PE__CONTAINER_AGENT_UNKNOWN
break;
}
}
static xmlNode *
create_resource(const char *name, const char *provider, const char *kind)
{
xmlNode *rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
crm_xml_add(rsc, PCMK_XA_ID, name);
crm_xml_add(rsc, PCMK_XA_CLASS, PCMK_RESOURCE_CLASS_OCF);
crm_xml_add(rsc, PCMK_XA_PROVIDER, provider);
crm_xml_add(rsc, PCMK_XA_TYPE, kind);
return rsc;
}
/*!
* \internal
* \brief Check whether cluster can manage resource inside container
*
* \param[in,out] data Container variant data
*
* \return TRUE if networking configuration is acceptable, FALSE otherwise
*
* \note The resource is manageable if an IP range or control port has been
* specified. If a control port is used without an IP range, replicas per
* host must be 1.
*/
static bool
valid_network(pe__bundle_variant_data_t *data)
{
if(data->ip_range_start) {
return TRUE;
}
if(data->control_port) {
if(data->nreplicas_per_host > 1) {
pcmk__config_err("Specifying the '" PCMK_XA_CONTROL_PORT "' for %s "
"requires '" PCMK_XA_REPLICAS_PER_HOST "=1'",
data->prefix);
data->nreplicas_per_host = 1;
// @TODO to be sure:
// pcmk__clear_rsc_flags(rsc, pcmk__rsc_unique);
}
return TRUE;
}
return FALSE;
}
static int
create_ip_resource(pcmk_resource_t *parent, pe__bundle_variant_data_t *data,
pcmk__bundle_replica_t *replica)
{
if(data->ip_range_start) {
char *id = NULL;
xmlNode *xml_ip = NULL;
xmlNode *xml_obj = NULL;
id = crm_strdup_printf("%s-ip-%s", data->prefix, replica->ipaddr);
pcmk__xml_sanitize_id(id);
xml_ip = create_resource(id, "heartbeat", "IPaddr2");
free(id);
xml_obj = pcmk__xe_create(xml_ip, PCMK_XE_INSTANCE_ATTRIBUTES);
pcmk__xe_set_id(xml_obj, "%s-attributes-%d",
data->prefix, replica->offset);
crm_create_nvpair_xml(xml_obj, NULL, "ip", replica->ipaddr);
if(data->host_network) {
crm_create_nvpair_xml(xml_obj, NULL, "nic", data->host_network);
}
if(data->host_netmask) {
crm_create_nvpair_xml(xml_obj, NULL,
"cidr_netmask", data->host_netmask);
} else {
crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", "32");
}
xml_obj = pcmk__xe_create(xml_ip, PCMK_XE_OPERATIONS);
crm_create_op_xml(xml_obj, pcmk__xe_id(xml_ip), PCMK_ACTION_MONITOR,
"60s", NULL);
// TODO: Other ops? Timeouts and intervals from underlying resource?
if (pe__unpack_resource(xml_ip, &replica->ip, parent,
parent->private->scheduler) != pcmk_rc_ok) {
return pcmk_rc_unpack_error;
}
parent->private->children = g_list_append(parent->private->children,
replica->ip);
}
return pcmk_rc_ok;
}
static const char*
container_agent_str(enum pe__container_agent t)
{
switch (t) {
case PE__CONTAINER_AGENT_DOCKER: return PE__CONTAINER_AGENT_DOCKER_S;
case PE__CONTAINER_AGENT_RKT: return PE__CONTAINER_AGENT_RKT_S;
case PE__CONTAINER_AGENT_PODMAN: return PE__CONTAINER_AGENT_PODMAN_S;
default: // PE__CONTAINER_AGENT_UNKNOWN
break;
}
return PE__CONTAINER_AGENT_UNKNOWN_S;
}
static int
create_container_resource(pcmk_resource_t *parent,
const pe__bundle_variant_data_t *data,
pcmk__bundle_replica_t *replica)
{
char *id = NULL;
xmlNode *xml_container = NULL;
xmlNode *xml_obj = NULL;
// Agent-specific
const char *hostname_opt = NULL;
const char *env_opt = NULL;
const char *agent_str = NULL;
int volid = 0; // rkt-only
GString *buffer = NULL;
GString *dbuffer = NULL;
// Where syntax differences are drop-in replacements, set them now
switch (data->agent_type) {
case PE__CONTAINER_AGENT_DOCKER:
case PE__CONTAINER_AGENT_PODMAN:
hostname_opt = "-h ";
env_opt = "-e ";
break;
case PE__CONTAINER_AGENT_RKT:
hostname_opt = "--hostname=";
env_opt = "--environment=";
break;
default: // PE__CONTAINER_AGENT_UNKNOWN
return pcmk_rc_unpack_error;
}
agent_str = container_agent_str(data->agent_type);
buffer = g_string_sized_new(4096);
id = crm_strdup_printf("%s-%s-%d", data->prefix, agent_str,
replica->offset);
pcmk__xml_sanitize_id(id);
xml_container = create_resource(id, "heartbeat", agent_str);
free(id);
xml_obj = pcmk__xe_create(xml_container, PCMK_XE_INSTANCE_ATTRIBUTES);
pcmk__xe_set_id(xml_obj, "%s-attributes-%d", data->prefix, replica->offset);
crm_create_nvpair_xml(xml_obj, NULL, "image", data->image);
crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", PCMK_VALUE_TRUE);
crm_create_nvpair_xml(xml_obj, NULL, "force_kill", PCMK_VALUE_FALSE);
crm_create_nvpair_xml(xml_obj, NULL, "reuse", PCMK_VALUE_FALSE);
if (data->agent_type == PE__CONTAINER_AGENT_DOCKER) {
g_string_append(buffer, " --restart=no");
}
/* Set a container hostname only if we have an IP to map it to. The user can
* set -h or --uts=host themselves if they want a nicer name for logs, but
* this makes applications happy who need their hostname to match the IP
* they bind to.
*/
if (data->ip_range_start != NULL) {
g_string_append_printf(buffer, " %s%s-%d", hostname_opt, data->prefix,
replica->offset);
}
pcmk__g_strcat(buffer, " ", env_opt, "PCMK_stderr=1", NULL);
if (data->container_network != NULL) {
pcmk__g_strcat(buffer, " --net=", data->container_network, NULL);
}
if (data->control_port != NULL) {
pcmk__g_strcat(buffer, " ", env_opt, "PCMK_" PCMK__ENV_REMOTE_PORT "=",
data->control_port, NULL);
} else {
g_string_append_printf(buffer, " %sPCMK_" PCMK__ENV_REMOTE_PORT "=%d",
env_opt, DEFAULT_REMOTE_PORT);
}
for (GList *iter = data->mounts; iter != NULL; iter = iter->next) {
pe__bundle_mount_t *mount = (pe__bundle_mount_t *) iter->data;
char *source = NULL;
if (pcmk_is_set(mount->flags, pe__bundle_mount_subdir)) {
source = crm_strdup_printf("%s/%s-%d", mount->source, data->prefix,
replica->offset);
pcmk__add_separated_word(&dbuffer, 1024, source, ",");
}
switch (data->agent_type) {
case PE__CONTAINER_AGENT_DOCKER:
case PE__CONTAINER_AGENT_PODMAN:
pcmk__g_strcat(buffer,
" -v ", pcmk__s(source, mount->source),
":", mount->target, NULL);
if (mount->options != NULL) {
pcmk__g_strcat(buffer, ":", mount->options, NULL);
}
break;
case PE__CONTAINER_AGENT_RKT:
g_string_append_printf(buffer,
" --volume vol%d,kind=host,"
"source=%s%s%s "
"--mount volume=vol%d,target=%s",
volid, pcmk__s(source, mount->source),
(mount->options != NULL)? "," : "",
pcmk__s(mount->options, ""),
volid, mount->target);
volid++;
break;
default:
break;
}
free(source);
}
for (GList *iter = data->ports; iter != NULL; iter = iter->next) {
pe__bundle_port_t *port = (pe__bundle_port_t *) iter->data;
switch (data->agent_type) {
case PE__CONTAINER_AGENT_DOCKER:
case PE__CONTAINER_AGENT_PODMAN:
if (replica->ipaddr != NULL) {
pcmk__g_strcat(buffer,
" -p ", replica->ipaddr, ":", port->source,
":", port->target, NULL);
} else if (!pcmk__str_eq(data->container_network,
PCMK_VALUE_HOST, pcmk__str_none)) {
// No need to do port mapping if net == host
pcmk__g_strcat(buffer,
" -p ", port->source, ":", port->target,
NULL);
}
break;
case PE__CONTAINER_AGENT_RKT:
if (replica->ipaddr != NULL) {
pcmk__g_strcat(buffer,
" --port=", port->target,
":", replica->ipaddr, ":", port->source,
NULL);
} else {
pcmk__g_strcat(buffer,
" --port=", port->target, ":", port->source,
NULL);
}
break;
default:
break;
}
}
/* @COMPAT: We should use pcmk__add_word() here, but we can't yet, because
* it would cause restarts during rolling upgrades.
*
* In a previous version of the container resource creation logic, if
* data->launcher_options is not NULL, we append
* (" %s", data->launcher_options) even if data->launcher_options is an
* empty string. Likewise for data->container_host_options. Using
*
* pcmk__add_word(buffer, 0, data->launcher_options)
*
* removes that extra trailing space, causing a resource definition change.
*/
if (data->launcher_options != NULL) {
pcmk__g_strcat(buffer, " ", data->launcher_options, NULL);
}
if (data->container_host_options != NULL) {
pcmk__g_strcat(buffer, " ", data->container_host_options, NULL);
}
crm_create_nvpair_xml(xml_obj, NULL, "run_opts",
(const char *) buffer->str);
g_string_free(buffer, TRUE);
crm_create_nvpair_xml(xml_obj, NULL, "mount_points",
(dbuffer != NULL)? (const char *) dbuffer->str : "");
if (dbuffer != NULL) {
g_string_free(dbuffer, TRUE);
}
if (replica->child != NULL) {
if (data->container_command != NULL) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
data->container_command);
} else {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
SBIN_DIR "/pacemaker-remoted");
}
/* TODO: Allow users to specify their own?
*
* We just want to know if the container is alive; we'll monitor the
* child independently.
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
#if 0
/* @TODO Consider supporting the use case where we can start and stop
* resources, but not proxy local commands (such as setting node
* attributes), by running the local executor in stand-alone mode.
* However, this would probably be better done via ACLs as with other
* Pacemaker Remote nodes.
*/
} else if ((child != NULL) && data->untrusted) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
CRM_DAEMON_DIR "/pacemaker-execd");
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd",
CRM_DAEMON_DIR "/pacemaker/cts-exec-helper -c poke");
#endif
} else {
if (data->container_command != NULL) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
data->container_command);
}
/* TODO: Allow users to specify their own?
*
* We don't know what's in the container, so we just want to know if it
* is alive.
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
}
xml_obj = pcmk__xe_create(xml_container, PCMK_XE_OPERATIONS);
crm_create_op_xml(xml_obj, pcmk__xe_id(xml_container), PCMK_ACTION_MONITOR,
"60s", NULL);
// TODO: Other ops? Timeouts and intervals from underlying resource?
if (pe__unpack_resource(xml_container, &replica->container, parent,
parent->private->scheduler) != pcmk_rc_ok) {
return pcmk_rc_unpack_error;
}
pcmk__set_rsc_flags(replica->container, pcmk__rsc_replica_container);
parent->private->children = g_list_append(parent->private->children,
replica->container);
return pcmk_rc_ok;
}
/*!
* \brief Ban a node from a resource's (and its children's) allowed nodes list
*
* \param[in,out] rsc Resource to modify
* \param[in] uname Name of node to ban
*/
static void
disallow_node(pcmk_resource_t *rsc, const char *uname)
{
gpointer match = g_hash_table_lookup(rsc->private->allowed_nodes, uname);
if (match) {
((pcmk_node_t *) match)->assign->score = -PCMK_SCORE_INFINITY;
- ((pcmk_node_t *) match)->assign->probe_mode = pcmk_probe_never;
+ ((pcmk_node_t *) match)->assign->probe_mode = pcmk__probe_never;
}
g_list_foreach(rsc->private->children, (GFunc) disallow_node,
(gpointer) uname);
}
static int
create_remote_resource(pcmk_resource_t *parent, pe__bundle_variant_data_t *data,
pcmk__bundle_replica_t *replica)
{
if (replica->child && valid_network(data)) {
GHashTableIter gIter;
pcmk_node_t *node = NULL;
xmlNode *xml_remote = NULL;
char *id = crm_strdup_printf("%s-%d", data->prefix, replica->offset);
char *port_s = NULL;
const char *uname = NULL;
const char *connect_name = NULL;
if (pe_find_resource(parent->private->scheduler->resources,
id) != NULL) {
free(id);
// The biggest hammer we have
id = crm_strdup_printf("pcmk-internal-%s-remote-%d",
replica->child->id, replica->offset);
//@TODO return error instead of asserting?
CRM_ASSERT(pe_find_resource(parent->private->scheduler->resources,
id) == NULL);
}
/* REMOTE_CONTAINER_HACK: Using "#uname" as the server name when the
* connection does not have its own IP is a magic string that we use to
* support nested remotes (i.e. a bundle running on a remote node).
*/
connect_name = (replica->ipaddr? replica->ipaddr : "#uname");
if (data->control_port == NULL) {
port_s = pcmk__itoa(DEFAULT_REMOTE_PORT);
}
/* This sets replica->container as replica->remote's container, which is
* similar to what happens with guest nodes. This is how the scheduler
* knows that the bundle node is fenced by recovering the container, and
* that remote should be ordered relative to the container.
*/
xml_remote = pe_create_remote_xml(NULL, id, replica->container->id,
NULL, NULL, NULL,
connect_name, (data->control_port?
data->control_port : port_s));
free(port_s);
/* Abandon our created ID, and pull the copy from the XML, because we
* need something that will get freed during scheduler data cleanup to
* use as the node ID and uname.
*/
free(id);
id = NULL;
uname = pcmk__xe_id(xml_remote);
/* Ensure a node has been created for the guest (it may have already
* been, if it has a permanent node attribute), and ensure its weight is
* -INFINITY so no other resources can run on it.
*/
node = pcmk_find_node(parent->private->scheduler, uname);
if (node == NULL) {
node = pe_create_node(uname, uname, PCMK_VALUE_REMOTE,
PCMK_VALUE_MINUS_INFINITY,
parent->private->scheduler);
} else {
node->assign->score = -PCMK_SCORE_INFINITY;
}
- node->assign->probe_mode = pcmk_probe_never;
+ node->assign->probe_mode = pcmk__probe_never;
/* unpack_remote_nodes() ensures that each remote node and guest node
* has a pcmk_node_t entry. Ideally, it would do the same for bundle
* nodes. Unfortunately, a bundle has to be mostly unpacked before it's
* obvious what nodes will be needed, so we do it just above.
*
* Worse, that means that the node may have been utilized while
* unpacking other resources, without our weight correction. The most
* likely place for this to happen is when pe__unpack_resource() calls
* resource_location() to set a default score in symmetric clusters.
* This adds a node *copy* to each resource's allowed nodes, and these
* copies will have the wrong weight.
*
* As a hacky workaround, fix those copies here.
*
* @TODO Possible alternative: ensure bundles are unpacked before other
* resources, so the weight is correct before any copies are made.
*/
g_list_foreach(parent->private->scheduler->resources,
(GFunc) disallow_node, (gpointer) uname);
replica->node = pe__copy_node(node);
replica->node->assign->score = 500;
- replica->node->assign->probe_mode = pcmk_probe_exclusive;
+ replica->node->assign->probe_mode = pcmk__probe_exclusive;
/* Ensure the node shows up as allowed and with the correct discovery set */
if (replica->child->private->allowed_nodes != NULL) {
g_hash_table_destroy(replica->child->private->allowed_nodes);
}
replica->child->private->allowed_nodes = pcmk__strkey_table(NULL, free);
g_hash_table_insert(replica->child->private->allowed_nodes,
(gpointer) replica->node->private->id,
pe__copy_node(replica->node));
{
const pcmk_resource_t *parent = replica->child->private->parent;
pcmk_node_t *copy = pe__copy_node(replica->node);
copy->assign->score = -PCMK_SCORE_INFINITY;
g_hash_table_insert(parent->private->allowed_nodes,
(gpointer) replica->node->private->id, copy);
}
if (pe__unpack_resource(xml_remote, &replica->remote, parent,
parent->private->scheduler) != pcmk_rc_ok) {
return pcmk_rc_unpack_error;
}
g_hash_table_iter_init(&gIter, replica->remote->private->allowed_nodes);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&node)) {
if (pcmk__is_pacemaker_remote_node(node)) {
/* Remote resources can only run on 'normal' cluster node */
node->assign->score = -PCMK_SCORE_INFINITY;
}
}
replica->node->private->remote = replica->remote;
// Ensure pcmk__is_guest_or_bundle_node() functions correctly
replica->remote->private->launcher = replica->container;
/* A bundle's #kind is closer to "container" (guest node) than the
* "remote" set by pe_create_node().
*/
pcmk__insert_dup(replica->node->private->attrs,
CRM_ATTR_KIND, "container");
/* One effect of this is that unpack_launcher() will add
* replica->remote to replica->container's launched resources, which
* will make pe__resource_contains_guest_node() true for
* replica->container.
*
* replica->child does NOT get added to replica->container's launched
* resources. The only noticeable effect if it did would be for its
* fail count to be taken into account when checking
* replica->container's migration threshold.
*/
parent->private->children = g_list_append(parent->private->children,
replica->remote);
}
return pcmk_rc_ok;
}
static int
create_replica_resources(pcmk_resource_t *parent,
pe__bundle_variant_data_t *data,
pcmk__bundle_replica_t *replica)
{
int rc = pcmk_rc_ok;
rc = create_container_resource(parent, data, replica);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = create_ip_resource(parent, data, replica);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = create_remote_resource(parent, data, replica);
if (rc != pcmk_rc_ok) {
return rc;
}
if ((replica->child != NULL) && (replica->ipaddr != NULL)) {
pcmk__insert_meta(replica->child->private, "external-ip",
replica->ipaddr);
}
if (replica->remote != NULL) {
/*
* Allow the remote connection resource to be allocated to a
* different node than the one on which the container is active.
*
* This makes it possible to have Pacemaker Remote nodes running
* containers with pacemaker-remoted inside in order to start
* services inside those containers.
*/
pcmk__set_rsc_flags(replica->remote, pcmk__rsc_remote_nesting_allowed);
}
return rc;
}
static void
mount_add(pe__bundle_variant_data_t *bundle_data, const char *source,
const char *target, const char *options, uint32_t flags)
{
pe__bundle_mount_t *mount = pcmk__assert_alloc(1,
sizeof(pe__bundle_mount_t));
mount->source = pcmk__str_copy(source);
mount->target = pcmk__str_copy(target);
mount->options = pcmk__str_copy(options);
mount->flags = flags;
bundle_data->mounts = g_list_append(bundle_data->mounts, mount);
}
static void
mount_free(pe__bundle_mount_t *mount)
{
free(mount->source);
free(mount->target);
free(mount->options);
free(mount);
}
static void
port_free(pe__bundle_port_t *port)
{
free(port->source);
free(port->target);
free(port);
}
static pcmk__bundle_replica_t *
replica_for_remote(pcmk_resource_t *remote)
{
pcmk_resource_t *top = remote;
pe__bundle_variant_data_t *bundle_data = NULL;
if (top == NULL) {
return NULL;
}
while (top->private->parent != NULL) {
top = top->private->parent;
}
get_bundle_variant_data(bundle_data, top);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
if (replica->remote == remote) {
return replica;
}
}
CRM_LOG_ASSERT(FALSE);
return NULL;
}
bool
pe__bundle_needs_remote_name(pcmk_resource_t *rsc)
{
const char *value;
GHashTable *params = NULL;
if (rsc == NULL) {
return false;
}
// Use NULL node since pcmk__bundle_expand() uses that to set value
params = pe_rsc_params(rsc, NULL, rsc->private->scheduler);
value = g_hash_table_lookup(params, PCMK_REMOTE_RA_ADDR);
return pcmk__str_eq(value, "#uname", pcmk__str_casei)
&& xml_contains_remote_node(rsc->private->xml);
}
const char *
pe__add_bundle_remote_name(pcmk_resource_t *rsc, xmlNode *xml,
const char *field)
{
// REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside
pcmk_node_t *node = NULL;
pcmk__bundle_replica_t *replica = NULL;
if (!pe__bundle_needs_remote_name(rsc)) {
return NULL;
}
replica = replica_for_remote(rsc);
if (replica == NULL) {
return NULL;
}
node = replica->container->private->assigned_node;
if (node == NULL) {
/* If it won't be running anywhere after the
* transition, go with where it's running now.
*/
node = pcmk__current_node(replica->container);
}
if(node == NULL) {
crm_trace("Cannot determine address for bundle connection %s", rsc->id);
return NULL;
}
crm_trace("Setting address for bundle connection %s to bundle host %s",
rsc->id, pcmk__node_name(node));
if(xml != NULL && field != NULL) {
crm_xml_add(xml, field, node->private->name);
}
return node->private->name;
}
#define pe__set_bundle_mount_flags(mount_xml, flags, flags_to_set) do { \
flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Bundle mount", pcmk__xe_id(mount_xml), \
flags, (flags_to_set), #flags_to_set); \
} while (0)
gboolean
pe__unpack_bundle(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
{
const char *value = NULL;
xmlNode *xml_obj = NULL;
const xmlNode *xml_child = NULL;
xmlNode *xml_resource = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
bool need_log_mount = TRUE;
CRM_ASSERT(rsc != NULL);
pcmk__rsc_trace(rsc, "Processing resource %s...", rsc->id);
bundle_data = pcmk__assert_alloc(1, sizeof(pe__bundle_variant_data_t));
rsc->private->variant_opaque = bundle_data;
bundle_data->prefix = strdup(rsc->id);
xml_obj = pcmk__xe_first_child(rsc->private->xml, PCMK_XE_DOCKER, NULL,
NULL);
if (xml_obj != NULL) {
bundle_data->agent_type = PE__CONTAINER_AGENT_DOCKER;
} else {
xml_obj = pcmk__xe_first_child(rsc->private->xml, PCMK__XE_RKT, NULL,
NULL);
if (xml_obj != NULL) {
pcmk__warn_once(pcmk__wo_rkt,
"Support for " PCMK__XE_RKT " in bundles "
"(such as %s) is deprecated and will be "
"removed in a future release", rsc->id);
bundle_data->agent_type = PE__CONTAINER_AGENT_RKT;
} else {
xml_obj = pcmk__xe_first_child(rsc->private->xml, PCMK_XE_PODMAN,
NULL, NULL);
if (xml_obj != NULL) {
bundle_data->agent_type = PE__CONTAINER_AGENT_PODMAN;
} else {
return FALSE;
}
}
}
// Use 0 for default, minimum, and invalid PCMK_XA_PROMOTED_MAX
value = crm_element_value(xml_obj, PCMK_XA_PROMOTED_MAX);
if (value == NULL) {
// @COMPAT deprecated since 2.0.0
value = crm_element_value(xml_obj, PCMK__XA_PROMOTED_MAX_LEGACY);
if (value != NULL) {
pcmk__warn_once(pcmk__wo_bundle_master,
"Support for the " PCMK__XA_PROMOTED_MAX_LEGACY
" attribute (such as in %s) is deprecated and "
"will be removed in a future release. Use "
PCMK_XA_PROMOTED_MAX " instead.",
rsc->id);
}
}
pcmk__scan_min_int(value, &bundle_data->promoted_max, 0);
/* Default replicas to PCMK_XA_PROMOTED_MAX if it was specified and 1
* otherwise
*/
value = crm_element_value(xml_obj, PCMK_XA_REPLICAS);
if ((value == NULL) && (bundle_data->promoted_max > 0)) {
bundle_data->nreplicas = bundle_data->promoted_max;
} else {
pcmk__scan_min_int(value, &bundle_data->nreplicas, 1);
}
/*
* Communication between containers on the same host via the
* floating IPs only works if the container is started with:
* --userland-proxy=false --ip-masq=false
*/
value = crm_element_value(xml_obj, PCMK_XA_REPLICAS_PER_HOST);
pcmk__scan_min_int(value, &bundle_data->nreplicas_per_host, 1);
if (bundle_data->nreplicas_per_host == 1) {
pcmk__clear_rsc_flags(rsc, pcmk__rsc_unique);
}
bundle_data->container_command =
crm_element_value_copy(xml_obj, PCMK_XA_RUN_COMMAND);
bundle_data->launcher_options = crm_element_value_copy(xml_obj,
PCMK_XA_OPTIONS);
bundle_data->image = crm_element_value_copy(xml_obj, PCMK_XA_IMAGE);
bundle_data->container_network = crm_element_value_copy(xml_obj,
PCMK_XA_NETWORK);
xml_obj = pcmk__xe_first_child(rsc->private->xml, PCMK_XE_NETWORK, NULL,
NULL);
if(xml_obj) {
bundle_data->ip_range_start =
crm_element_value_copy(xml_obj, PCMK_XA_IP_RANGE_START);
bundle_data->host_netmask =
crm_element_value_copy(xml_obj, PCMK_XA_HOST_NETMASK);
bundle_data->host_network =
crm_element_value_copy(xml_obj, PCMK_XA_HOST_INTERFACE);
bundle_data->control_port =
crm_element_value_copy(xml_obj, PCMK_XA_CONTROL_PORT);
value = crm_element_value(xml_obj, PCMK_XA_ADD_HOST);
if (crm_str_to_boolean(value, &bundle_data->add_host) != 1) {
bundle_data->add_host = TRUE;
}
for (xml_child = pcmk__xe_first_child(xml_obj, PCMK_XE_PORT_MAPPING,
NULL, NULL);
xml_child != NULL; xml_child = pcmk__xe_next_same(xml_child)) {
pe__bundle_port_t *port =
pcmk__assert_alloc(1, sizeof(pe__bundle_port_t));
port->source = crm_element_value_copy(xml_child, PCMK_XA_PORT);
if(port->source == NULL) {
port->source = crm_element_value_copy(xml_child, PCMK_XA_RANGE);
} else {
port->target = crm_element_value_copy(xml_child,
PCMK_XA_INTERNAL_PORT);
}
if(port->source != NULL && strlen(port->source) > 0) {
if(port->target == NULL) {
port->target = strdup(port->source);
}
bundle_data->ports = g_list_append(bundle_data->ports, port);
} else {
pcmk__config_err("Invalid " PCMK_XA_PORT " directive %s",
pcmk__xe_id(xml_child));
port_free(port);
}
}
}
xml_obj = pcmk__xe_first_child(rsc->private->xml, PCMK_XE_STORAGE, NULL,
NULL);
for (xml_child = pcmk__xe_first_child(xml_obj, PCMK_XE_STORAGE_MAPPING,
NULL, NULL);
xml_child != NULL; xml_child = pcmk__xe_next_same(xml_child)) {
const char *source = crm_element_value(xml_child, PCMK_XA_SOURCE_DIR);
const char *target = crm_element_value(xml_child, PCMK_XA_TARGET_DIR);
const char *options = crm_element_value(xml_child, PCMK_XA_OPTIONS);
int flags = pe__bundle_mount_none;
if (source == NULL) {
source = crm_element_value(xml_child, PCMK_XA_SOURCE_DIR_ROOT);
pe__set_bundle_mount_flags(xml_child, flags,
pe__bundle_mount_subdir);
}
if (source && target) {
mount_add(bundle_data, source, target, options, flags);
if (strcmp(target, "/var/log") == 0) {
need_log_mount = FALSE;
}
} else {
pcmk__config_err("Invalid mount directive %s",
pcmk__xe_id(xml_child));
}
}
xml_obj = pcmk__xe_first_child(rsc->private->xml, PCMK_XE_PRIMITIVE, NULL,
NULL);
if (xml_obj && valid_network(bundle_data)) {
const char *suffix = NULL;
char *value = NULL;
xmlNode *xml_set = NULL;
xml_resource = pcmk__xe_create(NULL, PCMK_XE_CLONE);
/* @COMPAT We no longer use the <master> tag, but we need to keep it as
* part of the resource name, so that bundles don't restart in a rolling
* upgrade. (It also avoids needing to change regression tests.)
*/
suffix = (const char *) xml_resource->name;
if (bundle_data->promoted_max > 0) {
suffix = "master";
}
pcmk__xe_set_id(xml_resource, "%s-%s", bundle_data->prefix, suffix);
xml_set = pcmk__xe_create(xml_resource, PCMK_XE_META_ATTRIBUTES);
pcmk__xe_set_id(xml_set, "%s-%s-meta",
bundle_data->prefix, xml_resource->name);
crm_create_nvpair_xml(xml_set, NULL,
PCMK_META_ORDERED, PCMK_VALUE_TRUE);
value = pcmk__itoa(bundle_data->nreplicas);
crm_create_nvpair_xml(xml_set, NULL, PCMK_META_CLONE_MAX, value);
free(value);
value = pcmk__itoa(bundle_data->nreplicas_per_host);
crm_create_nvpair_xml(xml_set, NULL, PCMK_META_CLONE_NODE_MAX, value);
free(value);
crm_create_nvpair_xml(xml_set, NULL, PCMK_META_GLOBALLY_UNIQUE,
pcmk__btoa(bundle_data->nreplicas_per_host > 1));
if (bundle_data->promoted_max) {
crm_create_nvpair_xml(xml_set, NULL,
PCMK_META_PROMOTABLE, PCMK_VALUE_TRUE);
value = pcmk__itoa(bundle_data->promoted_max);
crm_create_nvpair_xml(xml_set, NULL, PCMK_META_PROMOTED_MAX, value);
free(value);
}
//crm_xml_add(xml_obj, PCMK_XA_ID, bundle_data->prefix);
pcmk__xml_copy(xml_resource, xml_obj);
} else if(xml_obj) {
pcmk__config_err("Cannot control %s inside %s without either "
PCMK_XA_IP_RANGE_START " or " PCMK_XA_CONTROL_PORT,
rsc->id, pcmk__xe_id(xml_obj));
return FALSE;
}
if(xml_resource) {
int lpc = 0;
GList *childIter = NULL;
pe__bundle_port_t *port = NULL;
GString *buffer = NULL;
if (pe__unpack_resource(xml_resource, &(bundle_data->child), rsc,
scheduler) != pcmk_rc_ok) {
return FALSE;
}
/* Currently, we always map the default authentication key location
* into the same location inside the container.
*
* Ideally, we would respect the host's PCMK_authkey_location, but:
* - it may be different on different nodes;
* - the actual connection will do extra checking to make sure the key
* file exists and is readable, that we can't do here on the DC
* - tools such as crm_resource and crm_simulate may not have the same
* environment variables as the cluster, causing operation digests to
* differ
*
* Always using the default location inside the container is fine,
* because we control the pacemaker_remote environment, and it avoids
* having to pass another environment variable to the container.
*
* @TODO A better solution may be to have only pacemaker_remote use the
* environment variable, and have the cluster nodes use a new
* cluster option for key location. This would introduce the limitation
* of the location being the same on all cluster nodes, but that's
* reasonable.
*/
mount_add(bundle_data, DEFAULT_REMOTE_KEY_LOCATION,
DEFAULT_REMOTE_KEY_LOCATION, NULL, pe__bundle_mount_none);
if (need_log_mount) {
mount_add(bundle_data, CRM_BUNDLE_DIR, "/var/log", NULL,
pe__bundle_mount_subdir);
}
port = pcmk__assert_alloc(1, sizeof(pe__bundle_port_t));
if(bundle_data->control_port) {
port->source = strdup(bundle_data->control_port);
} else {
/* If we wanted to respect PCMK_remote_port, we could use
* crm_default_remote_port() here and elsewhere in this file instead
* of DEFAULT_REMOTE_PORT.
*
* However, it gains nothing, since we control both the container
* environment and the connection resource parameters, and the user
* can use a different port if desired by setting
* PCMK_XA_CONTROL_PORT.
*/
port->source = pcmk__itoa(DEFAULT_REMOTE_PORT);
}
port->target = strdup(port->source);
bundle_data->ports = g_list_append(bundle_data->ports, port);
buffer = g_string_sized_new(1024);
for (childIter = bundle_data->child->private->children;
childIter != NULL; childIter = childIter->next) {
pcmk__bundle_replica_t *replica = NULL;
replica = pcmk__assert_alloc(1, sizeof(pcmk__bundle_replica_t));
replica->child = childIter->data;
pcmk__set_rsc_flags(replica->child, pcmk__rsc_exclusive_probes);
replica->offset = lpc++;
// Ensure the child's notify gets set based on the underlying primitive's value
if (pcmk_is_set(replica->child->flags, pcmk__rsc_notify)) {
pcmk__set_rsc_flags(bundle_data->child, pcmk__rsc_notify);
}
allocate_ip(bundle_data, replica, buffer);
bundle_data->replicas = g_list_append(bundle_data->replicas,
replica);
bundle_data->attribute_target =
g_hash_table_lookup(replica->child->private->meta,
PCMK_META_CONTAINER_ATTRIBUTE_TARGET);
}
bundle_data->container_host_options = g_string_free(buffer, FALSE);
if (bundle_data->attribute_target) {
pcmk__insert_dup(rsc->private->meta,
PCMK_META_CONTAINER_ATTRIBUTE_TARGET,
bundle_data->attribute_target);
pcmk__insert_dup(bundle_data->child->private->meta,
PCMK_META_CONTAINER_ATTRIBUTE_TARGET,
bundle_data->attribute_target);
}
} else {
// Just a naked container, no pacemaker-remote
GString *buffer = g_string_sized_new(1024);
for (int lpc = 0; lpc < bundle_data->nreplicas; lpc++) {
pcmk__bundle_replica_t *replica = NULL;
replica = pcmk__assert_alloc(1, sizeof(pcmk__bundle_replica_t));
replica->offset = lpc;
allocate_ip(bundle_data, replica, buffer);
bundle_data->replicas = g_list_append(bundle_data->replicas,
replica);
}
bundle_data->container_host_options = g_string_free(buffer, FALSE);
}
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
if (create_replica_resources(rsc, bundle_data, replica) != pcmk_rc_ok) {
pcmk__config_err("Failed unpacking resource %s", rsc->id);
rsc->private->fns->free(rsc);
return FALSE;
}
/* Utilization needs special handling for bundles. It makes no sense for
* the inner primitive to have utilization, because it is tied
* one-to-one to the guest node created by the container resource -- and
* there's no way to set capacities for that guest node anyway.
*
* What the user really wants is to configure utilization for the
* container. However, the schema only allows utilization for
* primitives, and the container resource is implicit anyway, so the
* user can *only* configure utilization for the inner primitive. If
* they do, move the primitive's utilization values to the container.
*
* @TODO This means that bundles without an inner primitive can't have
* utilization. An alternative might be to allow utilization values in
* the top-level bundle XML in the schema, and copy those to each
* container.
*/
if (replica->child != NULL) {
GHashTable *empty = replica->container->private->utilization;
replica->container->private->utilization =
replica->child->private->utilization;
replica->child->private->utilization = empty;
}
}
if (bundle_data->child) {
rsc->private->children = g_list_append(rsc->private->children,
bundle_data->child);
}
return TRUE;
}
static int
replica_resource_active(pcmk_resource_t *rsc, gboolean all)
{
if (rsc) {
gboolean child_active = rsc->private->fns->active(rsc, all);
if (child_active && !all) {
return TRUE;
} else if (!child_active && all) {
return FALSE;
}
}
return -1;
}
gboolean
pe__bundle_active(pcmk_resource_t *rsc, gboolean all)
{
pe__bundle_variant_data_t *bundle_data = NULL;
GList *iter = NULL;
get_bundle_variant_data(bundle_data, rsc);
for (iter = bundle_data->replicas; iter != NULL; iter = iter->next) {
pcmk__bundle_replica_t *replica = iter->data;
int rsc_active;
rsc_active = replica_resource_active(replica->ip, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = replica_resource_active(replica->child, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = replica_resource_active(replica->container, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = replica_resource_active(replica->remote, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
}
/* If "all" is TRUE, we've already checked that no resources were inactive,
* so return TRUE; if "all" is FALSE, we didn't find any active resources,
* so return FALSE.
*/
return all;
}
/*!
* \internal
* \brief Find the bundle replica corresponding to a given node
*
* \param[in] bundle Top-level bundle resource
* \param[in] node Node to search for
*
* \return Bundle replica if found, NULL otherwise
*/
pcmk_resource_t *
pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_ASSERT(bundle && node);
get_bundle_variant_data(bundle_data, bundle);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica && replica->node);
if (pcmk__same_node(replica->node, node)) {
return replica->child;
}
}
return NULL;
}
PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pcmk_resource_t *", "GList *",
"GList *")
int
pe__bundle_xml(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
pe__bundle_variant_data_t *bundle_data = NULL;
int rc = pcmk_rc_no_output;
gboolean printed_header = FALSE;
gboolean print_everything = TRUE;
const char *desc = NULL;
CRM_ASSERT(rsc != NULL);
get_bundle_variant_data(bundle_data, rsc);
if (rsc->private->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
pcmk_resource_t *ip = replica->ip;
pcmk_resource_t *child = replica->child;
pcmk_resource_t *container = replica->container;
pcmk_resource_t *remote = replica->remote;
char *id = NULL;
gboolean print_ip, print_child, print_ctnr, print_remote;
CRM_ASSERT(replica);
if (pcmk__rsc_filtered_by_node(container, only_node)) {
continue;
}
print_ip = (ip != NULL)
&& !ip->private->fns->is_filtered(ip, only_rsc,
print_everything);
print_child = (child != NULL)
&& !child->private->fns->is_filtered(child, only_rsc,
print_everything);
print_ctnr = !container->private->fns->is_filtered(container, only_rsc,
print_everything);
print_remote = (remote != NULL)
&& !remote->private->fns->is_filtered(remote, only_rsc,
print_everything);
if (!print_everything && !print_ip && !print_child && !print_ctnr && !print_remote) {
continue;
}
if (!printed_header) {
const char *type = container_agent_str(bundle_data->agent_type);
const char *unique = pcmk__flag_text(rsc->flags, pcmk__rsc_unique);
const char *maintenance = pcmk__flag_text(rsc->flags,
pcmk__rsc_maintenance);
const char *managed = pcmk__flag_text(rsc->flags,
pcmk__rsc_managed);
const char *failed = pcmk__flag_text(rsc->flags, pcmk__rsc_failed);
printed_header = TRUE;
desc = pe__resource_description(rsc, show_opts);
rc = pe__name_and_nvpairs_xml(out, true, PCMK_XE_BUNDLE,
PCMK_XA_ID, rsc->id,
PCMK_XA_TYPE, type,
PCMK_XA_IMAGE, bundle_data->image,
PCMK_XA_UNIQUE, unique,
PCMK_XA_MAINTENANCE, maintenance,
PCMK_XA_MANAGED, managed,
PCMK_XA_FAILED, failed,
PCMK_XA_DESCRIPTION, desc,
NULL);
CRM_ASSERT(rc == pcmk_rc_ok);
}
id = pcmk__itoa(replica->offset);
rc = pe__name_and_nvpairs_xml(out, true, PCMK_XE_REPLICA,
PCMK_XA_ID, id,
NULL);
free(id);
CRM_ASSERT(rc == pcmk_rc_ok);
if (print_ip) {
out->message(out, (const char *) ip->private->xml->name, show_opts,
ip, only_node, only_rsc);
}
if (print_child) {
out->message(out, (const char *) child->private->xml->name,
show_opts, child, only_node, only_rsc);
}
if (print_ctnr) {
out->message(out, (const char *) container->private->xml->name,
show_opts, container, only_node, only_rsc);
}
if (print_remote) {
out->message(out, (const char *) remote->private->xml->name,
show_opts, remote, only_node, only_rsc);
}
pcmk__output_xml_pop_parent(out); // replica
}
if (printed_header) {
pcmk__output_xml_pop_parent(out); // bundle
}
return rc;
}
static void
pe__bundle_replica_output_html(pcmk__output_t *out,
pcmk__bundle_replica_t *replica,
pcmk_node_t *node, uint32_t show_opts)
{
pcmk_resource_t *rsc = replica->child;
int offset = 0;
char buffer[LINE_MAX];
if(rsc == NULL) {
rsc = replica->container;
}
if (replica->remote) {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->remote));
} else {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->container));
}
if (replica->ipaddr) {
offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)",
replica->ipaddr);
}
pe__common_output_html(out, rsc, buffer, node, show_opts);
}
/*!
* \internal
* \brief Get a string describing a resource's unmanaged state or lack thereof
*
* \param[in] rsc Resource to describe
*
* \return A string indicating that a resource is in maintenance mode or
* otherwise unmanaged, or an empty string otherwise
*/
static const char *
get_unmanaged_str(const pcmk_resource_t *rsc)
{
if (pcmk_is_set(rsc->flags, pcmk__rsc_maintenance)) {
return " (maintenance)";
}
if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
return " (unmanaged)";
}
return "";
}
PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pcmk_resource_t *", "GList *",
"GList *")
int
pe__bundle_html(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
const char *desc = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
int rc = pcmk_rc_no_output;
gboolean print_everything = TRUE;
CRM_ASSERT(rsc != NULL);
get_bundle_variant_data(bundle_data, rsc);
desc = pe__resource_description(rsc, show_opts);
if (rsc->private->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
pcmk_resource_t *ip = replica->ip;
pcmk_resource_t *child = replica->child;
pcmk_resource_t *container = replica->container;
pcmk_resource_t *remote = replica->remote;
gboolean print_ip, print_child, print_ctnr, print_remote;
CRM_ASSERT(replica);
if (pcmk__rsc_filtered_by_node(container, only_node)) {
continue;
}
print_ip = (ip != NULL)
&& !ip->private->fns->is_filtered(ip, only_rsc,
print_everything);
print_child = (child != NULL)
&& !child->private->fns->is_filtered(child, only_rsc,
print_everything);
print_ctnr = !container->private->fns->is_filtered(container, only_rsc,
print_everything);
print_remote = (remote != NULL)
&& !remote->private->fns->is_filtered(remote, only_rsc,
print_everything);
if (pcmk_is_set(show_opts, pcmk_show_implicit_rscs) ||
(print_everything == FALSE && (print_ip || print_child || print_ctnr || print_remote))) {
/* The text output messages used below require pe_print_implicit to
* be set to do anything.
*/
uint32_t new_show_opts = show_opts | pcmk_show_implicit_rscs;
PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s",
(bundle_data->nreplicas > 1)? " set" : "",
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pcmk__rsc_unique)? " (unique)" : "",
desc ? " (" : "", desc ? desc : "", desc ? ")" : "",
get_unmanaged_str(rsc));
if (pcmk__list_of_multiple(bundle_data->replicas)) {
out->begin_list(out, NULL, NULL, "Replica[%d]", replica->offset);
}
if (print_ip) {
out->message(out, (const char *) ip->private->xml->name,
new_show_opts, ip, only_node, only_rsc);
}
if (print_child) {
out->message(out, (const char *) child->private->xml->name,
new_show_opts, child, only_node, only_rsc);
}
if (print_ctnr) {
out->message(out, (const char *) container->private->xml->name,
new_show_opts, container, only_node, only_rsc);
}
if (print_remote) {
out->message(out, (const char *) remote->private->xml->name,
new_show_opts, remote, only_node, only_rsc);
}
if (pcmk__list_of_multiple(bundle_data->replicas)) {
out->end_list(out);
}
} else if (print_everything == FALSE && !(print_ip || print_child || print_ctnr || print_remote)) {
continue;
} else {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s",
(bundle_data->nreplicas > 1)? " set" : "",
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pcmk__rsc_unique)? " (unique)" : "",
desc ? " (" : "", desc ? desc : "", desc ? ")" : "",
get_unmanaged_str(rsc));
pe__bundle_replica_output_html(out, replica,
pcmk__current_node(container),
show_opts);
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
static void
pe__bundle_replica_output_text(pcmk__output_t *out,
pcmk__bundle_replica_t *replica,
pcmk_node_t *node, uint32_t show_opts)
{
const pcmk_resource_t *rsc = replica->child;
int offset = 0;
char buffer[LINE_MAX];
if(rsc == NULL) {
rsc = replica->container;
}
if (replica->remote) {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->remote));
} else {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->container));
}
if (replica->ipaddr) {
offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)",
replica->ipaddr);
}
pe__common_output_text(out, rsc, buffer, node, show_opts);
}
PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pcmk_resource_t *", "GList *",
"GList *")
int
pe__bundle_text(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
const char *desc = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
int rc = pcmk_rc_no_output;
gboolean print_everything = TRUE;
desc = pe__resource_description(rsc, show_opts);
CRM_ASSERT(rsc != NULL);
get_bundle_variant_data(bundle_data, rsc);
if (rsc->private->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
pcmk_resource_t *ip = replica->ip;
pcmk_resource_t *child = replica->child;
pcmk_resource_t *container = replica->container;
pcmk_resource_t *remote = replica->remote;
gboolean print_ip, print_child, print_ctnr, print_remote;
CRM_ASSERT(replica);
if (pcmk__rsc_filtered_by_node(container, only_node)) {
continue;
}
print_ip = (ip != NULL)
&& !ip->private->fns->is_filtered(ip, only_rsc,
print_everything);
print_child = (child != NULL)
&& !child->private->fns->is_filtered(child, only_rsc,
print_everything);
print_ctnr = !container->private->fns->is_filtered(container, only_rsc,
print_everything);
print_remote = (remote != NULL)
&& !remote->private->fns->is_filtered(remote, only_rsc,
print_everything);
if (pcmk_is_set(show_opts, pcmk_show_implicit_rscs) ||
(print_everything == FALSE && (print_ip || print_child || print_ctnr || print_remote))) {
/* The text output messages used below require pe_print_implicit to
* be set to do anything.
*/
uint32_t new_show_opts = show_opts | pcmk_show_implicit_rscs;
PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s",
(bundle_data->nreplicas > 1)? " set" : "",
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pcmk__rsc_unique)? " (unique)" : "",
desc ? " (" : "", desc ? desc : "", desc ? ")" : "",
get_unmanaged_str(rsc));
if (pcmk__list_of_multiple(bundle_data->replicas)) {
out->list_item(out, NULL, "Replica[%d]", replica->offset);
}
out->begin_list(out, NULL, NULL, NULL);
if (print_ip) {
out->message(out, (const char *) ip->private->xml->name,
new_show_opts, ip, only_node, only_rsc);
}
if (print_child) {
out->message(out, (const char *) child->private->xml->name,
new_show_opts, child, only_node, only_rsc);
}
if (print_ctnr) {
out->message(out, (const char *) container->private->xml->name,
new_show_opts, container, only_node, only_rsc);
}
if (print_remote) {
out->message(out, (const char *) remote->private->xml->name,
new_show_opts, remote, only_node, only_rsc);
}
out->end_list(out);
} else if (print_everything == FALSE && !(print_ip || print_child || print_ctnr || print_remote)) {
continue;
} else {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s",
(bundle_data->nreplicas > 1)? " set" : "",
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pcmk__rsc_unique)? " (unique)" : "",
desc ? " (" : "", desc ? desc : "", desc ? ")" : "",
get_unmanaged_str(rsc));
pe__bundle_replica_output_text(out, replica,
pcmk__current_node(container),
show_opts);
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
static void
free_bundle_replica(pcmk__bundle_replica_t *replica)
{
if (replica == NULL) {
return;
}
if (replica->node) {
free(replica->node);
replica->node = NULL;
}
if (replica->ip) {
pcmk__xml_free(replica->ip->private->xml);
replica->ip->private->xml = NULL;
replica->ip->private->fns->free(replica->ip);
}
if (replica->container) {
pcmk__xml_free(replica->container->private->xml);
replica->container->private->xml = NULL;
replica->container->private->fns->free(replica->container);
}
if (replica->remote) {
pcmk__xml_free(replica->remote->private->xml);
replica->remote->private->xml = NULL;
replica->remote->private->fns->free(replica->remote);
}
free(replica->ipaddr);
free(replica);
}
void
pe__free_bundle(pcmk_resource_t *rsc)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_bundle_variant_data(bundle_data, rsc);
pcmk__rsc_trace(rsc, "Freeing %s", rsc->id);
free(bundle_data->prefix);
free(bundle_data->image);
free(bundle_data->control_port);
free(bundle_data->host_network);
free(bundle_data->host_netmask);
free(bundle_data->ip_range_start);
free(bundle_data->container_network);
free(bundle_data->launcher_options);
free(bundle_data->container_command);
g_free(bundle_data->container_host_options);
g_list_free_full(bundle_data->replicas,
(GDestroyNotify) free_bundle_replica);
g_list_free_full(bundle_data->mounts, (GDestroyNotify)mount_free);
g_list_free_full(bundle_data->ports, (GDestroyNotify)port_free);
g_list_free(rsc->private->children);
if(bundle_data->child) {
pcmk__xml_free(bundle_data->child->private->xml);
bundle_data->child->private->xml = NULL;
bundle_data->child->private->fns->free(bundle_data->child);
}
common_free(rsc);
}
enum rsc_role_e
pe__bundle_resource_state(const pcmk_resource_t *rsc, gboolean current)
{
enum rsc_role_e container_role = pcmk_role_unknown;
return container_role;
}
/*!
* \brief Get the number of configured replicas in a bundle
*
* \param[in] rsc Bundle resource
*
* \return Number of configured replicas, or 0 on error
*/
int
pe_bundle_replicas(const pcmk_resource_t *rsc)
{
if (pcmk__is_bundle(rsc)) {
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, rsc);
return bundle_data->nreplicas;
}
return 0;
}
void
pe__count_bundle(pcmk_resource_t *rsc)
{
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, rsc);
for (GList *item = bundle_data->replicas; item != NULL; item = item->next) {
pcmk__bundle_replica_t *replica = item->data;
if (replica->ip) {
replica->ip->private->fns->count(replica->ip);
}
if (replica->child) {
replica->child->private->fns->count(replica->child);
}
if (replica->container) {
replica->container->private->fns->count(replica->container);
}
if (replica->remote) {
replica->remote->private->fns->count(replica->remote);
}
}
}
gboolean
pe__bundle_is_filtered(const pcmk_resource_t *rsc, GList *only_rsc,
gboolean check_parent)
{
gboolean passes = FALSE;
pe__bundle_variant_data_t *bundle_data = NULL;
if (pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches)) {
passes = TRUE;
} else {
get_bundle_variant_data(bundle_data, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
pcmk_resource_t *ip = replica->ip;
pcmk_resource_t *child = replica->child;
pcmk_resource_t *container = replica->container;
pcmk_resource_t *remote = replica->remote;
if ((ip != NULL)
&& !ip->private->fns->is_filtered(ip, only_rsc, FALSE)) {
passes = TRUE;
break;
}
if ((child != NULL)
&& !child->private->fns->is_filtered(child, only_rsc, FALSE)) {
passes = TRUE;
break;
}
if (!container->private->fns->is_filtered(container, only_rsc,
FALSE)) {
passes = TRUE;
break;
}
if ((remote != NULL)
&& !remote->private->fns->is_filtered(remote, only_rsc,
FALSE)) {
passes = TRUE;
break;
}
}
}
return !passes;
}
/*!
* \internal
* \brief Get a list of a bundle's containers
*
* \param[in] bundle Bundle resource
*
* \return Newly created list of \p bundle's containers
* \note It is the caller's responsibility to free the result with
* g_list_free().
*/
GList *
pe__bundle_containers(const pcmk_resource_t *bundle)
{
GList *containers = NULL;
const pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, bundle);
for (GList *iter = data->replicas; iter != NULL; iter = iter->next) {
pcmk__bundle_replica_t *replica = iter->data;
containers = g_list_append(containers, replica->container);
}
return containers;
}
// Bundle implementation of pcmk__rsc_methods_t:active_node()
pcmk_node_t *
pe__bundle_active_node(const pcmk_resource_t *rsc, unsigned int *count_all,
unsigned int *count_clean)
{
pcmk_node_t *active = NULL;
pcmk_node_t *node = NULL;
pcmk_resource_t *container = NULL;
GList *containers = NULL;
GList *iter = NULL;
GHashTable *nodes = NULL;
const pe__bundle_variant_data_t *data = NULL;
if (count_all != NULL) {
*count_all = 0;
}
if (count_clean != NULL) {
*count_clean = 0;
}
if (rsc == NULL) {
return NULL;
}
/* For the purposes of this method, we only care about where the bundle's
* containers are active, so build a list of active containers.
*/
get_bundle_variant_data(data, rsc);
for (iter = data->replicas; iter != NULL; iter = iter->next) {
pcmk__bundle_replica_t *replica = iter->data;
if (replica->container->private->active_nodes != NULL) {
containers = g_list_append(containers, replica->container);
}
}
if (containers == NULL) {
return NULL;
}
/* If the bundle has only a single active container, just use that
* container's method. If live migration is ever supported for bundle
* containers, this will allow us to prefer the migration source when there
* is only one container and it is migrating. For now, this just lets us
* avoid creating the nodes table.
*/
if (pcmk__list_of_1(containers)) {
container = containers->data;
node = container->private->fns->active_node(container, count_all,
count_clean);
g_list_free(containers);
return node;
}
// Add all containers' active nodes to a hash table (for uniqueness)
nodes = g_hash_table_new(NULL, NULL);
for (iter = containers; iter != NULL; iter = iter->next) {
container = iter->data;
for (GList *node_iter = container->private->active_nodes;
node_iter != NULL; node_iter = node_iter->next) {
node = node_iter->data;
// If insert returns true, we haven't counted this node yet
if (g_hash_table_insert(nodes, (gpointer) node->details,
(gpointer) node)
&& !pe__count_active_node(rsc, node, &active, count_all,
count_clean)) {
goto done;
}
}
}
done:
g_list_free(containers);
g_hash_table_destroy(nodes);
return active;
}
/*!
* \internal
* \brief Get maximum bundle resource instances per node
*
* \param[in] rsc Bundle resource to check
*
* \return Maximum number of \p rsc instances that can be active on one node
*/
unsigned int
pe__bundle_max_per_node(const pcmk_resource_t *rsc)
{
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, rsc);
CRM_ASSERT(bundle_data->nreplicas_per_host >= 0);
return (unsigned int) bundle_data->nreplicas_per_host;
}
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
index 182fcfab15..466572c831 100644
--- a/lib/pengine/utils.c
+++ b/lib/pengine/utils.c
@@ -1,944 +1,944 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <stdbool.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/internal.h>
#include "pe_status_private.h"
extern bool pcmk__is_daemon;
gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data);
/*!
* \internal
* \brief Check whether we can fence a particular node
*
* \param[in] scheduler Scheduler data
* \param[in] node Name of node to check
*
* \return true if node can be fenced, false otherwise
*/
bool
pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
{
if (pcmk__is_guest_or_bundle_node(node)) {
/* A guest or bundle node is fenced by stopping its launcher, which is
* possible if the launcher's host is either online or fenceable.
*/
pcmk_resource_t *rsc = node->private->remote->private->launcher;
for (GList *n = rsc->private->active_nodes; n != NULL; n = n->next) {
pcmk_node_t *launcher_node = n->data;
if (!launcher_node->details->online
&& !pe_can_fence(scheduler, launcher_node)) {
return false;
}
}
return true;
} else if (!pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
return false; /* Turned off */
} else if (!pcmk_is_set(scheduler->flags, pcmk_sched_have_fencing)) {
return false; /* No devices */
} else if (pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
return true;
} else if (scheduler->no_quorum_policy == pcmk_no_quorum_ignore) {
return true;
} else if(node == NULL) {
return false;
} else if(node->details->online) {
crm_notice("We can fence %s without quorum because they're in our membership",
pcmk__node_name(node));
return true;
}
crm_trace("Cannot fence %s", pcmk__node_name(node));
return false;
}
/*!
* \internal
* \brief Copy a node object
*
* \param[in] this_node Node object to copy
*
* \return Newly allocated shallow copy of this_node
* \note This function asserts on errors and is guaranteed to return non-NULL.
*/
pcmk_node_t *
pe__copy_node(const pcmk_node_t *this_node)
{
pcmk_node_t *new_node = NULL;
CRM_ASSERT(this_node != NULL);
new_node = pcmk__assert_alloc(1, sizeof(pcmk_node_t));
new_node->assign = pcmk__assert_alloc(1,
sizeof(struct pcmk__node_assignment));
new_node->assign->probe_mode = this_node->assign->probe_mode;
new_node->assign->score = this_node->assign->score;
new_node->assign->count = this_node->assign->count;
new_node->details = this_node->details;
new_node->private = this_node->private;
return new_node;
}
/*!
* \internal
* \brief Create a node hash table from a node list
*
* \param[in] list Node list
*
* \return Hash table equivalent of node list
*/
GHashTable *
pe__node_list2table(const GList *list)
{
GHashTable *result = NULL;
result = pcmk__strkey_table(NULL, free);
for (const GList *gIter = list; gIter != NULL; gIter = gIter->next) {
pcmk_node_t *new_node = NULL;
new_node = pe__copy_node((const pcmk_node_t *) gIter->data);
g_hash_table_insert(result, (gpointer) new_node->private->id, new_node);
}
return result;
}
/*!
* \internal
* \brief Compare two nodes by name, with numeric portions sorted numerically
*
* Sort two node names case-insensitively like strcasecmp(), but with any
* numeric portions of the name sorted numerically. For example, "node10" will
* sort higher than "node9" but lower than "remotenode9".
*
* \param[in] a First node to compare (can be \c NULL)
* \param[in] b Second node to compare (can be \c NULL)
*
* \retval -1 \c a comes before \c b (or \c a is \c NULL and \c b is not)
* \retval 0 \c a and \c b are equal (or both are \c NULL)
* \retval 1 \c a comes after \c b (or \c b is \c NULL and \c a is not)
*/
gint
pe__cmp_node_name(gconstpointer a, gconstpointer b)
{
const pcmk_node_t *node1 = (const pcmk_node_t *) a;
const pcmk_node_t *node2 = (const pcmk_node_t *) b;
if ((node1 == NULL) && (node2 == NULL)) {
return 0;
}
if (node1 == NULL) {
return -1;
}
if (node2 == NULL) {
return 1;
}
return pcmk__numeric_strcasecmp(node1->private->name, node2->private->name);
}
/*!
* \internal
* \brief Output node weights to stdout
*
* \param[in] rsc Use allowed nodes for this resource
* \param[in] comment Text description to prefix lines with
* \param[in] nodes If rsc is not specified, use these nodes
* \param[in,out] scheduler Scheduler data
*/
static void
pe__output_node_weights(const pcmk_resource_t *rsc, const char *comment,
GHashTable *nodes, pcmk_scheduler_t *scheduler)
{
pcmk__output_t *out = scheduler->priv;
// Sort the nodes so the output is consistent for regression tests
GList *list = g_list_sort(g_hash_table_get_values(nodes),
pe__cmp_node_name);
for (const GList *gIter = list; gIter != NULL; gIter = gIter->next) {
const pcmk_node_t *node = (const pcmk_node_t *) gIter->data;
out->message(out, "node-weight", rsc, comment, node->private->name,
pcmk_readable_score(node->assign->score));
}
g_list_free(list);
}
/*!
* \internal
* \brief Log node weights at trace level
*
* \param[in] file Caller's filename
* \param[in] function Caller's function name
* \param[in] line Caller's line number
* \param[in] rsc If not NULL, include this resource's ID in logs
* \param[in] comment Text description to prefix lines with
* \param[in] nodes Nodes whose scores should be logged
*/
static void
pe__log_node_weights(const char *file, const char *function, int line,
const pcmk_resource_t *rsc, const char *comment,
GHashTable *nodes)
{
GHashTableIter iter;
pcmk_node_t *node = NULL;
// Don't waste time if we're not tracing at this point
pcmk__if_tracing({}, return);
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (rsc) {
qb_log_from_external_source(function, file,
"%s: %s allocation score on %s: %s",
LOG_TRACE, line, 0,
comment, rsc->id,
pcmk__node_name(node),
pcmk_readable_score(node->assign->score));
} else {
qb_log_from_external_source(function, file, "%s: %s = %s",
LOG_TRACE, line, 0,
comment, pcmk__node_name(node),
pcmk_readable_score(node->assign->score));
}
}
}
/*!
* \internal
* \brief Log or output node weights
*
* \param[in] file Caller's filename
* \param[in] function Caller's function name
* \param[in] line Caller's line number
* \param[in] to_log Log if true, otherwise output
* \param[in] rsc If not NULL, use this resource's ID in logs,
* and show scores recursively for any children
* \param[in] comment Text description to prefix lines with
* \param[in] nodes Nodes whose scores should be shown
* \param[in,out] scheduler Scheduler data
*/
void
pe__show_node_scores_as(const char *file, const char *function, int line,
bool to_log, const pcmk_resource_t *rsc,
const char *comment, GHashTable *nodes,
pcmk_scheduler_t *scheduler)
{
if ((rsc != NULL) && pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
// Don't show allocation scores for orphans
return;
}
if (nodes == NULL) {
// Nothing to show
return;
}
if (to_log) {
pe__log_node_weights(file, function, line, rsc, comment, nodes);
} else {
pe__output_node_weights(rsc, comment, nodes, scheduler);
}
if (rsc == NULL) {
return;
}
// If this resource has children, repeat recursively for each
for (GList *gIter = rsc->private->children;
gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *child = (pcmk_resource_t *) gIter->data;
pe__show_node_scores_as(file, function, line, to_log, child, comment,
child->private->allowed_nodes, scheduler);
}
}
/*!
* \internal
* \brief Compare two resources by priority
*
* \param[in] a First resource to compare (can be \c NULL)
* \param[in] b Second resource to compare (can be \c NULL)
*
* \retval -1 a's priority > b's priority (or \c b is \c NULL and \c a is not)
* \retval 0 a's priority == b's priority (or both \c a and \c b are \c NULL)
* \retval 1 a's priority < b's priority (or \c a is \c NULL and \c b is not)
*/
gint
pe__cmp_rsc_priority(gconstpointer a, gconstpointer b)
{
const pcmk_resource_t *resource1 = (const pcmk_resource_t *)a;
const pcmk_resource_t *resource2 = (const pcmk_resource_t *)b;
if (a == NULL && b == NULL) {
return 0;
}
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (resource1->private->priority > resource2->private->priority) {
return -1;
}
if (resource1->private->priority < resource2->private->priority) {
return 1;
}
return 0;
}
static void
resource_node_score(pcmk_resource_t *rsc, const pcmk_node_t *node, int score,
const char *tag)
{
pcmk_node_t *match = NULL;
if ((pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes)
- || (node->assign->probe_mode == pcmk_probe_never))
+ || (node->assign->probe_mode == pcmk__probe_never))
&& pcmk__str_eq(tag, "symmetric_default", pcmk__str_casei)) {
/* This string comparision may be fragile, but exclusive resources and
* exclusive nodes should not have the symmetric_default constraint
* applied to them.
*/
return;
} else {
for (GList *gIter = rsc->private->children;
gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *child_rsc = (pcmk_resource_t *) gIter->data;
resource_node_score(child_rsc, node, score, tag);
}
}
match = g_hash_table_lookup(rsc->private->allowed_nodes, node->private->id);
if (match == NULL) {
match = pe__copy_node(node);
g_hash_table_insert(rsc->private->allowed_nodes,
(gpointer) match->private->id, match);
}
match->assign->score = pcmk__add_scores(match->assign->score, score);
pcmk__rsc_trace(rsc,
"Enabling %s preference (%s) for %s on %s (now %s)",
tag, pcmk_readable_score(score), rsc->id,
pcmk__node_name(node),
pcmk_readable_score(match->assign->score));
}
void
resource_location(pcmk_resource_t *rsc, const pcmk_node_t *node, int score,
const char *tag, pcmk_scheduler_t *scheduler)
{
if (node != NULL) {
resource_node_score(rsc, node, score, tag);
} else if (scheduler != NULL) {
GList *gIter = scheduler->nodes;
for (; gIter != NULL; gIter = gIter->next) {
pcmk_node_t *node_iter = (pcmk_node_t *) gIter->data;
resource_node_score(rsc, node_iter, score, tag);
}
} else {
GHashTableIter iter;
pcmk_node_t *node_iter = NULL;
g_hash_table_iter_init(&iter, rsc->private->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) {
resource_node_score(rsc, node_iter, score, tag);
}
}
if ((node == NULL) && (score == -PCMK_SCORE_INFINITY)
&& (rsc->private->assigned_node != NULL)) {
// @TODO Should this be more like pcmk__unassign_resource()?
crm_info("Unassigning %s from %s",
rsc->id, pcmk__node_name(rsc->private->assigned_node));
free(rsc->private->assigned_node);
rsc->private->assigned_node = NULL;
}
}
time_t
get_effective_time(pcmk_scheduler_t *scheduler)
{
if(scheduler) {
if (scheduler->now == NULL) {
crm_trace("Recording a new 'now'");
scheduler->now = crm_time_new(NULL);
}
return crm_time_get_seconds_since_epoch(scheduler->now);
}
crm_trace("Defaulting to 'now'");
return time(NULL);
}
gboolean
get_target_role(const pcmk_resource_t *rsc, enum rsc_role_e *role)
{
enum rsc_role_e local_role = pcmk_role_unknown;
const char *value = g_hash_table_lookup(rsc->private->meta,
PCMK_META_TARGET_ROLE);
CRM_CHECK(role != NULL, return FALSE);
if (pcmk__str_eq(value, PCMK_ROLE_STARTED,
pcmk__str_null_matches|pcmk__str_casei)) {
return FALSE;
}
if (pcmk__str_eq(PCMK_VALUE_DEFAULT, value, pcmk__str_casei)) {
// @COMPAT Deprecated since 2.1.8
pcmk__config_warn("Support for setting " PCMK_META_TARGET_ROLE
" to the explicit value '" PCMK_VALUE_DEFAULT
"' is deprecated and will be removed in a "
"future release (just leave it unset)");
return FALSE;
}
local_role = pcmk_parse_role(value);
if (local_role == pcmk_role_unknown) {
pcmk__config_err("Ignoring '" PCMK_META_TARGET_ROLE "' for %s "
"because '%s' is not valid", rsc->id, value);
return FALSE;
} else if (local_role > pcmk_role_started) {
if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
pcmk__rsc_promotable)) {
if (local_role > pcmk_role_unpromoted) {
/* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */
return FALSE;
}
} else {
pcmk__config_err("Ignoring '" PCMK_META_TARGET_ROLE "' for %s "
"because '%s' only makes sense for promotable "
"clones", rsc->id, value);
return FALSE;
}
}
*role = local_role;
return TRUE;
}
gboolean
order_actions(pcmk_action_t *lh_action, pcmk_action_t *rh_action,
uint32_t flags)
{
GList *gIter = NULL;
pcmk__related_action_t *wrapper = NULL;
GList *list = NULL;
if (flags == pcmk__ar_none) {
return FALSE;
}
if (lh_action == NULL || rh_action == NULL) {
return FALSE;
}
crm_trace("Creating action wrappers for ordering: %s then %s",
lh_action->uuid, rh_action->uuid);
/* Ensure we never create a dependency on ourselves... it's happened */
CRM_ASSERT(lh_action != rh_action);
/* Filter dups, otherwise update_action_states() has too much work to do */
gIter = lh_action->actions_after;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__related_action_t *after = gIter->data;
if (after->action == rh_action && (after->type & flags)) {
return FALSE;
}
}
wrapper = pcmk__assert_alloc(1, sizeof(pcmk__related_action_t));
wrapper->action = rh_action;
wrapper->type = flags;
list = lh_action->actions_after;
list = g_list_prepend(list, wrapper);
lh_action->actions_after = list;
wrapper = pcmk__assert_alloc(1, sizeof(pcmk__related_action_t));
wrapper->action = lh_action;
wrapper->type = flags;
list = rh_action->actions_before;
list = g_list_prepend(list, wrapper);
rh_action->actions_before = list;
return TRUE;
}
void
destroy_ticket(gpointer data)
{
pcmk_ticket_t *ticket = data;
if (ticket->state) {
g_hash_table_destroy(ticket->state);
}
free(ticket->id);
free(ticket);
}
pcmk_ticket_t *
ticket_new(const char *ticket_id, pcmk_scheduler_t *scheduler)
{
pcmk_ticket_t *ticket = NULL;
if (pcmk__str_empty(ticket_id)) {
return NULL;
}
if (scheduler->tickets == NULL) {
scheduler->tickets = pcmk__strkey_table(free, destroy_ticket);
}
ticket = g_hash_table_lookup(scheduler->tickets, ticket_id);
if (ticket == NULL) {
ticket = calloc(1, sizeof(pcmk_ticket_t));
if (ticket == NULL) {
pcmk__sched_err("Cannot allocate ticket '%s'", ticket_id);
return NULL;
}
crm_trace("Creating ticket entry for %s", ticket_id);
ticket->id = strdup(ticket_id);
ticket->granted = FALSE;
ticket->last_granted = -1;
ticket->standby = FALSE;
ticket->state = pcmk__strkey_table(free, free);
g_hash_table_insert(scheduler->tickets, strdup(ticket->id), ticket);
}
return ticket;
}
const char *
rsc_printable_id(const pcmk_resource_t *rsc)
{
if (pcmk_is_set(rsc->flags, pcmk__rsc_unique)) {
return rsc->id;
}
return pcmk__xe_id(rsc->private->xml);
}
void
pe__clear_resource_flags_recursive(pcmk_resource_t *rsc, uint64_t flags)
{
pcmk__clear_rsc_flags(rsc, flags);
for (GList *gIter = rsc->private->children;
gIter != NULL; gIter = gIter->next) {
pe__clear_resource_flags_recursive((pcmk_resource_t *) gIter->data,
flags);
}
}
void
pe__clear_resource_flags_on_all(pcmk_scheduler_t *scheduler, uint64_t flag)
{
for (GList *lpc = scheduler->resources; lpc != NULL; lpc = lpc->next) {
pcmk_resource_t *r = (pcmk_resource_t *) lpc->data;
pe__clear_resource_flags_recursive(r, flag);
}
}
void
pe__set_resource_flags_recursive(pcmk_resource_t *rsc, uint64_t flags)
{
pcmk__set_rsc_flags(rsc, flags);
for (GList *gIter = rsc->private->children;
gIter != NULL; gIter = gIter->next) {
pe__set_resource_flags_recursive((pcmk_resource_t *) gIter->data,
flags);
}
}
void
trigger_unfencing(pcmk_resource_t *rsc, pcmk_node_t *node, const char *reason,
pcmk_action_t *dependency, pcmk_scheduler_t *scheduler)
{
if (!pcmk_is_set(scheduler->flags, pcmk_sched_enable_unfencing)) {
/* No resources require it */
return;
} else if ((rsc != NULL)
&& !pcmk_is_set(rsc->flags, pcmk__rsc_fence_device)) {
/* Wasn't a stonith device */
return;
} else if(node
&& node->details->online
&& node->details->unclean == FALSE
&& node->details->shutdown == FALSE) {
pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, FALSE,
reason, FALSE, scheduler);
if(dependency) {
order_actions(unfence, dependency, pcmk__ar_ordered);
}
} else if(rsc) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->private->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) {
trigger_unfencing(rsc, node, reason, dependency, scheduler);
}
}
}
}
gboolean
add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref)
{
pcmk_tag_t *tag = NULL;
GList *gIter = NULL;
gboolean is_existing = FALSE;
CRM_CHECK(tags && tag_name && obj_ref, return FALSE);
tag = g_hash_table_lookup(tags, tag_name);
if (tag == NULL) {
tag = calloc(1, sizeof(pcmk_tag_t));
if (tag == NULL) {
pcmk__sched_err("Could not allocate memory for tag %s", tag_name);
return FALSE;
}
tag->id = strdup(tag_name);
tag->refs = NULL;
g_hash_table_insert(tags, strdup(tag_name), tag);
}
for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) {
const char *existing_ref = (const char *) gIter->data;
if (pcmk__str_eq(existing_ref, obj_ref, pcmk__str_none)){
is_existing = TRUE;
break;
}
}
if (is_existing == FALSE) {
tag->refs = g_list_append(tag->refs, strdup(obj_ref));
crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref);
}
return TRUE;
}
/*!
* \internal
* \brief Check whether shutdown has been requested for a node
*
* \param[in] node Node to check
*
* \return TRUE if node has shutdown attribute set and nonzero, FALSE otherwise
* \note This differs from simply using node->details->shutdown in that it can
* be used before that has been determined (and in fact to determine it),
* and it can also be used to distinguish requested shutdown from implicit
* shutdown of remote nodes by virtue of their connection stopping.
*/
bool
pe__shutdown_requested(const pcmk_node_t *node)
{
const char *shutdown = pcmk__node_attr(node, PCMK__NODE_ATTR_SHUTDOWN, NULL,
pcmk__rsc_node_current);
return !pcmk__str_eq(shutdown, "0", pcmk__str_null_matches);
}
/*!
* \internal
* \brief Update "recheck by" time in scheduler data
*
* \param[in] recheck Epoch time when recheck should happen
* \param[in,out] scheduler Scheduler data
* \param[in] reason What time is being updated for (for logs)
*/
void
pe__update_recheck_time(time_t recheck, pcmk_scheduler_t *scheduler,
const char *reason)
{
if ((recheck > get_effective_time(scheduler))
&& ((scheduler->recheck_by == 0)
|| (scheduler->recheck_by > recheck))) {
scheduler->recheck_by = recheck;
crm_debug("Updated next scheduler recheck to %s for %s",
pcmk__trim(ctime(&recheck)), reason);
}
}
/*!
* \internal
* \brief Extract nvpair blocks contained by a CIB XML element into a hash table
*
* \param[in] xml_obj XML element containing blocks of nvpair elements
* \param[in] set_name If not NULL, only use blocks of this element
* \param[in] rule_data Matching parameters to use when unpacking
* \param[out] hash Where to store extracted name/value pairs
* \param[in] always_first If not NULL, process block with this ID first
* \param[in] overwrite Whether to replace existing values with same name
* \param[in,out] scheduler Scheduler data containing \p xml_obj
*/
void
pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name,
const pe_rule_eval_data_t *rule_data,
GHashTable *hash, const char *always_first,
gboolean overwrite, pcmk_scheduler_t *scheduler)
{
crm_time_t *next_change = crm_time_new_undefined();
pe_eval_nvpairs(scheduler->input, xml_obj, set_name, rule_data, hash,
always_first, overwrite, next_change);
if (crm_time_is_defined(next_change)) {
time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change);
pe__update_recheck_time(recheck, scheduler, "rule evaluation");
}
crm_time_free(next_change);
}
bool
pe__resource_is_disabled(const pcmk_resource_t *rsc)
{
const char *target_role = NULL;
CRM_CHECK(rsc != NULL, return false);
target_role = g_hash_table_lookup(rsc->private->meta,
PCMK_META_TARGET_ROLE);
if (target_role) {
// If invalid, we've already logged an error when unpacking
enum rsc_role_e target_role_e = pcmk_parse_role(target_role);
if ((target_role_e == pcmk_role_stopped)
|| ((target_role_e == pcmk_role_unpromoted)
&& pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
pcmk__rsc_promotable))) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether a resource is running only on given node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p rsc is running only on \p node, otherwise false
*/
bool
pe__rsc_running_on_only(const pcmk_resource_t *rsc, const pcmk_node_t *node)
{
return (rsc != NULL) && pcmk__list_of_1(rsc->private->active_nodes)
&& pcmk__same_node((const pcmk_node_t *)
rsc->private->active_nodes->data, node);
}
bool
pe__rsc_running_on_any(pcmk_resource_t *rsc, GList *node_list)
{
if (rsc != NULL) {
for (GList *ele = rsc->private->active_nodes; ele; ele = ele->next) {
pcmk_node_t *node = (pcmk_node_t *) ele->data;
if (pcmk__str_in_list(node->private->name, node_list,
pcmk__str_star_matches|pcmk__str_casei)) {
return true;
}
}
}
return false;
}
bool
pcmk__rsc_filtered_by_node(pcmk_resource_t *rsc, GList *only_node)
{
return rsc->private->fns->active(rsc, FALSE)
&& !pe__rsc_running_on_any(rsc, only_node);
}
GList *
pe__filter_rsc_list(GList *rscs, GList *filter)
{
GList *retval = NULL;
for (GList *gIter = rscs; gIter; gIter = gIter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
/* I think the second condition is safe here for all callers of this
* function. If not, it needs to move into pe__node_text.
*/
if (pcmk__str_in_list(rsc_printable_id(rsc), filter, pcmk__str_star_matches) ||
((rsc->private->parent != NULL)
&& pcmk__str_in_list(rsc_printable_id(rsc->private->parent),
filter, pcmk__str_star_matches))) {
retval = g_list_prepend(retval, rsc);
}
}
return retval;
}
GList *
pe__build_node_name_list(pcmk_scheduler_t *scheduler, const char *s)
{
GList *nodes = NULL;
if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) {
/* Nothing was given so return a list of all node names. Or, '*' was
* given. This would normally fall into the pe__unames_with_tag branch
* where it will return an empty list. Catch it here instead.
*/
nodes = g_list_prepend(nodes, strdup("*"));
} else {
pcmk_node_t *node = pcmk_find_node(scheduler, s);
if (node) {
/* The given string was a valid uname for a node. Return a
* singleton list containing just that uname.
*/
nodes = g_list_prepend(nodes, strdup(s));
} else {
/* The given string was not a valid uname. It's either a tag or
* it's a typo or something. In the first case, we'll return a
* list of all the unames of the nodes with the given tag. In the
* second case, we'll return a NULL pointer and nothing will
* get displayed.
*/
nodes = pe__unames_with_tag(scheduler, s);
}
}
return nodes;
}
GList *
pe__build_rsc_list(pcmk_scheduler_t *scheduler, const char *s)
{
GList *resources = NULL;
if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) {
resources = g_list_prepend(resources, strdup("*"));
} else {
const uint32_t flags = pcmk_rsc_match_history|pcmk_rsc_match_basename;
pcmk_resource_t *rsc = pe_find_resource_with_flags(scheduler->resources,
s, flags);
if (rsc) {
/* A colon in the name we were given means we're being asked to filter
* on a specific instance of a cloned resource. Put that exact string
* into the filter list. Otherwise, use the printable ID of whatever
* resource was found that matches what was asked for.
*/
if (strstr(s, ":") != NULL) {
resources = g_list_prepend(resources, strdup(rsc->id));
} else {
resources = g_list_prepend(resources, strdup(rsc_printable_id(rsc)));
}
} else {
/* The given string was not a valid resource name. It's a tag or a
* typo or something. See pe__build_node_name_list() for more
* detail.
*/
resources = pe__rscs_with_tag(scheduler, s);
}
}
return resources;
}
xmlNode *
pe__failed_probe_for_rsc(const pcmk_resource_t *rsc, const char *name)
{
const pcmk_resource_t *parent = pe__const_top_resource(rsc, false);
const char *rsc_id = rsc->id;
if (pcmk__is_clone(parent)) {
rsc_id = pe__clone_child_id(parent);
}
for (xmlNode *xml_op = pcmk__xe_first_child(rsc->private->scheduler->failed,
NULL, NULL, NULL);
xml_op != NULL; xml_op = pcmk__xe_next(xml_op)) {
const char *value = NULL;
char *op_id = NULL;
/* This resource operation is not a failed probe. */
if (!pcmk_xe_mask_probe_failure(xml_op)) {
continue;
}
/* This resource operation was not run on the given node. Note that if name is
* NULL, this will always succeed.
*/
value = crm_element_value(xml_op, PCMK__META_ON_NODE);
if (value == NULL || !pcmk__str_eq(value, name, pcmk__str_casei|pcmk__str_null_matches)) {
continue;
}
if (!parse_op_key(pcmk__xe_history_key(xml_op), &op_id, NULL, NULL)) {
continue; // This history entry is missing an operation key
}
/* This resource operation's ID does not match the rsc_id we are looking for. */
if (!pcmk__str_eq(op_id, rsc_id, pcmk__str_none)) {
free(op_id);
continue;
}
free(op_id);
return xml_op;
}
return NULL;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Apr 21, 7:11 PM (12 h, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665431
Default Alt Text
(236 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment