Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4624371
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
166 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/include/crm/common/actions.h b/include/crm/common/actions.h
index 32106d2d3b..f9f8932bd0 100644
--- a/include/crm/common/actions.h
+++ b/include/crm/common/actions.h
@@ -1,47 +1,48 @@
/*
* Copyright 2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_COMMON_ACTIONS__H
#define PCMK__CRM_COMMON_ACTIONS__H
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \file
* \brief APIs related to actions
* \ingroup core
*/
// Action names as strings
#define PCMK_ACTION_CANCEL "cancel"
#define PCMK_ACTION_DELETE "delete"
#define PCMK_ACTION_DEMOTE "demote"
#define PCMK_ACTION_DEMOTED "demoted"
#define PCMK_ACTION_META_DATA "meta-data"
#define PCMK_ACTION_MIGRATE_FROM "migrate_from"
#define PCMK_ACTION_MIGRATE_TO "migrate_to"
#define PCMK_ACTION_MONITOR "monitor"
+#define PCMK_ACTION_NOTIFIED "notified"
#define PCMK_ACTION_NOTIFY "notify"
#define PCMK_ACTION_PROMOTE "promote"
#define PCMK_ACTION_PROMOTED "promoted"
#define PCMK_ACTION_RELOAD "reload"
#define PCMK_ACTION_RELOAD_AGENT "reload-agent"
#define PCMK_ACTION_RUNNING "running"
#define PCMK_ACTION_START "start"
#define PCMK_ACTION_STOP "stop"
#define PCMK_ACTION_STOPPED "stopped"
#define PCMK_ACTION_VALIDATE_ALL "validate-all"
#ifdef __cplusplus
}
#endif
#endif // PCMK__CRM_COMMON_ACTIONS__H
diff --git a/include/crm/crm.h b/include/crm/crm.h
index 4f0d0cde41..7f5c34c0b3 100644
--- a/include/crm/crm.h
+++ b/include/crm/crm.h
@@ -1,196 +1,196 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_CRM__H
# define PCMK__CRM_CRM__H
# include <crm_config.h>
# include <stdlib.h>
# include <glib.h>
# include <stdbool.h>
# include <string.h>
# include <libxml/tree.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief A dumping ground
* \ingroup core
*/
#ifndef PCMK_ALLOW_DEPRECATED
/*!
* \brief Allow use of deprecated Pacemaker APIs
*
* By default, external code using Pacemaker headers is allowed to use
* deprecated Pacemaker APIs. If PCMK_ALLOW_DEPRECATED is defined to 0 before
* including any Pacemaker headers, deprecated APIs will be unusable. It is
* strongly recommended to leave this unchanged for production and release
* builds, to avoid breakage when users upgrade to new Pacemaker releases that
* deprecate more APIs. This should be defined to 0 only for development and
* testing builds when desiring to check for usage of currently deprecated APIs.
*/
#define PCMK_ALLOW_DEPRECATED 1
#endif
/*!
* The CRM feature set assists with compatibility in mixed-version clusters.
* The major version number increases when nodes with different versions
* would not work (rolling upgrades are not allowed). The minor version
* number increases when mixed-version clusters are allowed only during
* rolling upgrades (a node with the oldest feature set will be elected DC). The
* minor-minor version number is ignored, but allows resource agents to detect
* cluster support for various features.
*
* The feature set also affects the processing of old saved CIBs (such as for
* many scheduler regression tests).
*
* Particular feature points currently tested by Pacemaker code:
*
* >2.1: Operation updates include timing data
* >=3.0.5: XML v2 digests are created
* >=3.0.8: Peers do not need acks for cancellations
* >=3.0.9: DC will send its own shutdown request to all peers
* XML v2 patchsets are created by default
* >=3.0.13: Fail counts include operation name and interval
* >=3.2.0: DC supports PCMK_EXEC_INVALID and PCMK_EXEC_NOT_CONNECTED
*/
# define CRM_FEATURE_SET "3.18.0"
/* Pacemaker's CPG protocols use fixed-width binary fields for the sender and
* recipient of a CPG message. This imposes an arbitrary limit on cluster node
* names.
*/
//! \brief Maximum length of a Corosync cluster node name (in bytes)
#define MAX_NAME 256
# define CRM_META "CRM_meta"
extern char *crm_system_name;
/* *INDENT-OFF* */
// How we represent "infinite" scores
# define CRM_SCORE_INFINITY 1000000
# define CRM_INFINITY_S "INFINITY"
# define CRM_PLUS_INFINITY_S "+" CRM_INFINITY_S
# define CRM_MINUS_INFINITY_S "-" CRM_INFINITY_S
/* @COMPAT API < 2.0.0 Deprecated "infinity" aliases
*
* INFINITY might be defined elsewhere (e.g. math.h), so undefine it first.
* This, of course, complicates any attempt to use the other definition in any
* code that includes this header.
*/
# undef INFINITY
# define INFINITY_S "INFINITY"
# define MINUS_INFINITY_S "-INFINITY"
# define INFINITY 1000000
/* Sub-systems */
# define CRM_SYSTEM_DC "dc"
#define CRM_SYSTEM_DCIB "dcib" // Primary instance of CIB manager
# define CRM_SYSTEM_CIB "cib"
# define CRM_SYSTEM_CRMD "crmd"
# define CRM_SYSTEM_LRMD "lrmd"
# define CRM_SYSTEM_PENGINE "pengine"
# define CRM_SYSTEM_TENGINE "tengine"
# define CRM_SYSTEM_STONITHD "stonithd"
# define CRM_SYSTEM_MCP "pacemakerd"
// Names of internally generated node attributes
# define CRM_ATTR_UNAME "#uname"
# define CRM_ATTR_ID "#id"
# define CRM_ATTR_KIND "#kind"
# define CRM_ATTR_ROLE "#role"
# define CRM_ATTR_IS_DC "#is_dc"
# define CRM_ATTR_CLUSTER_NAME "#cluster-name"
# define CRM_ATTR_SITE_NAME "#site-name"
# define CRM_ATTR_UNFENCED "#node-unfenced"
# define CRM_ATTR_DIGESTS_ALL "#digests-all"
# define CRM_ATTR_DIGESTS_SECURE "#digests-secure"
# define CRM_ATTR_PROTOCOL "#attrd-protocol"
# define CRM_ATTR_FEATURE_SET "#feature-set"
/* Valid operations */
# define CRM_OP_NOOP "noop"
# define CRM_OP_JOIN_ANNOUNCE "join_announce"
# define CRM_OP_JOIN_OFFER "join_offer"
# define CRM_OP_JOIN_REQUEST "join_request"
# define CRM_OP_JOIN_ACKNAK "join_ack_nack"
# define CRM_OP_JOIN_CONFIRM "join_confirm"
# define CRM_OP_PING "ping"
# define CRM_OP_NODE_INFO "node-info"
# define CRM_OP_THROTTLE "throttle"
# define CRM_OP_VOTE "vote"
# define CRM_OP_NOVOTE "no-vote"
# define CRM_OP_HELLO "hello"
# define CRM_OP_PECALC "pe_calc"
# define CRM_OP_QUIT "quit"
# define CRM_OP_LOCAL_SHUTDOWN "start_shutdown"
# define CRM_OP_SHUTDOWN_REQ "req_shutdown"
# define CRM_OP_SHUTDOWN "do_shutdown"
# define CRM_OP_FENCE "stonith"
# define CRM_OP_REGISTER "register"
# define CRM_OP_IPC_FWD "ipc_fwd"
# define CRM_OP_INVOKE_LRM "lrm_invoke"
# define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10
# define CRM_OP_LRM_DELETE "lrm_delete"
# define CRM_OP_LRM_FAIL "lrm_fail"
# define CRM_OP_PROBED "probe_complete"
# define CRM_OP_REPROBE "probe_again"
# define CRM_OP_CLEAR_FAILCOUNT "clear_failcount"
# define CRM_OP_REMOTE_STATE "remote_state"
# define CRM_OP_RELAXED_SET "one-or-more"
# define CRM_OP_RELAXED_CLONE "clone-one-or-more"
# define CRM_OP_RM_NODE_CACHE "rm_node_cache"
# define CRM_OP_MAINTENANCE_NODES "maintenance_nodes"
/* Possible cluster membership states */
# define CRMD_JOINSTATE_DOWN "down"
# define CRMD_JOINSTATE_PENDING "pending"
# define CRMD_JOINSTATE_MEMBER "member"
# define CRMD_JOINSTATE_NACK "banned"
-# define CRMD_ACTION_NOTIFIED "notified"
+# define CRMD_ACTION_NOTIFIED PCMK_ACTION_NOTIFIED
# define CRMD_METADATA_CALL_TIMEOUT 30000
/* short names */
-# define RSC_NOTIFIED CRMD_ACTION_NOTIFIED
+# define RSC_NOTIFIED PCMK_ACTION_NOTIFIED
/* *INDENT-ON* */
# include <crm/common/actions.h>
# include <crm/common/cib.h>
# include <crm/common/logging.h>
# include <crm/common/util.h>
static inline const char *
crm_action_str(const char *task, guint interval_ms) {
if ((task != NULL) && (interval_ms == 0)
&& (strcasecmp(task, PCMK_ACTION_MONITOR) == 0)) {
return "probe";
}
return task;
}
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
#include <crm/crm_compat.h>
#endif
#ifdef __cplusplus
}
#endif
#endif
diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c
index 3cf6b9fb11..19dbde9e6c 100644
--- a/lib/pacemaker/pcmk_sched_instances.c
+++ b/lib/pacemaker/pcmk_sched_instances.c
@@ -1,1668 +1,1668 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
/* This file is intended for code usable with both clone instances and bundle
* replica containers.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Check whether a node is allowed to run an instance
*
* \param[in] instance Clone instance or bundle container to check
* \param[in] node Node to check
* \param[in] max_per_node Maximum number of instances allowed to run on a node
*
* \return true if \p node is allowed to run \p instance, otherwise false
*/
static bool
can_run_instance(const pe_resource_t *instance, const pe_node_t *node,
int max_per_node)
{
pe_node_t *allowed_node = NULL;
if (pcmk_is_set(instance->flags, pe_rsc_orphan)) {
pe_rsc_trace(instance, "%s cannot run on %s: orphaned",
instance->id, pe__node_name(node));
return false;
}
if (!pcmk__node_available(node, false, false)) {
pe_rsc_trace(instance,
"%s cannot run on %s: node cannot run resources",
instance->id, pe__node_name(node));
return false;
}
allowed_node = pcmk__top_allowed_node(instance, node);
if (allowed_node == NULL) {
crm_warn("%s cannot run on %s: node not allowed",
instance->id, pe__node_name(node));
return false;
}
if (allowed_node->weight < 0) {
pe_rsc_trace(instance, "%s cannot run on %s: parent score is %s there",
instance->id, pe__node_name(node),
pcmk_readable_score(allowed_node->weight));
return false;
}
if (allowed_node->count >= max_per_node) {
pe_rsc_trace(instance,
"%s cannot run on %s: node already has %d instance%s",
instance->id, pe__node_name(node), max_per_node,
pcmk__plural_s(max_per_node));
return false;
}
pe_rsc_trace(instance, "%s can run on %s (%d already running)",
instance->id, pe__node_name(node), allowed_node->count);
return true;
}
/*!
* \internal
* \brief Ban a clone instance or bundle replica from unavailable allowed nodes
*
* \param[in,out] instance Clone instance or bundle replica to ban
* \param[in] max_per_node Maximum instances allowed to run on a node
*/
static void
ban_unavailable_allowed_nodes(pe_resource_t *instance, int max_per_node)
{
if (instance->allowed_nodes != NULL) {
GHashTableIter iter;
pe_node_t *node = NULL;
g_hash_table_iter_init(&iter, instance->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (!can_run_instance(instance, node, max_per_node)) {
pe_rsc_trace(instance, "Banning %s from unavailable node %s",
instance->id, pe__node_name(node));
node->weight = -INFINITY;
for (GList *child_iter = instance->children;
child_iter != NULL; child_iter = child_iter->next) {
pe_resource_t *child = (pe_resource_t *) child_iter->data;
pe_node_t *child_node = NULL;
child_node = g_hash_table_lookup(child->allowed_nodes,
node->details->id);
if (child_node != NULL) {
pe_rsc_trace(instance,
"Banning %s child %s "
"from unavailable node %s",
instance->id, child->id,
pe__node_name(node));
child_node->weight = -INFINITY;
}
}
}
}
}
}
/*!
* \internal
* \brief Create a hash table with a single node in it
*
* \param[in] node Node to copy into new table
*
* \return Newly created hash table containing a copy of \p node
* \note The caller is responsible for freeing the result with
* g_hash_table_destroy().
*/
static GHashTable *
new_node_table(pe_node_t *node)
{
GHashTable *table = pcmk__strkey_table(NULL, free);
node = pe__copy_node(node);
g_hash_table_insert(table, (gpointer) node->details->id, node);
return table;
}
/*!
* \internal
* \brief Apply a resource's parent's colocation scores to a node table
*
* \param[in] rsc Resource whose colocations should be applied
* \param[in,out] nodes Node table to apply colocations to
*/
static void
apply_parent_colocations(const pe_resource_t *rsc, GHashTable **nodes)
{
GList *colocations = pcmk__this_with_colocations(rsc);
for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
const pcmk__colocation_t *colocation = iter->data;
pe_resource_t *other = colocation->primary;
float factor = colocation->score / (float) INFINITY;
other->cmds->add_colocated_node_scores(other, rsc, rsc->id, nodes,
colocation, factor,
pcmk__coloc_select_default);
}
g_list_free(colocations);
colocations = pcmk__with_this_colocations(rsc);
for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
const pcmk__colocation_t *colocation = iter->data;
pe_resource_t *other = colocation->dependent;
float factor = colocation->score / (float) INFINITY;
if (!pcmk__colocation_has_influence(colocation, rsc)) {
continue;
}
other->cmds->add_colocated_node_scores(other, rsc, rsc->id, nodes,
colocation, factor,
pcmk__coloc_select_nonnegative);
}
g_list_free(colocations);
}
/*!
* \internal
* \brief Compare clone or bundle instances based on colocation scores
*
* Determine the relative order in which two clone or bundle instances should be
* assigned to nodes, considering the scores of colocation constraints directly
* or indirectly involving them.
*
* \param[in] instance1 First instance to compare
* \param[in] instance2 Second instance to compare
*
* \return A negative number if \p instance1 should be assigned first,
* a positive number if \p instance2 should be assigned first,
* or 0 if assignment order doesn't matter
*/
static int
cmp_instance_by_colocation(const pe_resource_t *instance1,
const pe_resource_t *instance2)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
pe_node_t *current_node1 = pe__current_node(instance1);
pe_node_t *current_node2 = pe__current_node(instance2);
GHashTable *colocated_scores1 = NULL;
GHashTable *colocated_scores2 = NULL;
CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL)
&& (instance2 != NULL) && (instance2->parent != NULL)
&& (current_node1 != NULL) && (current_node2 != NULL));
// Create node tables initialized with each node
colocated_scores1 = new_node_table(current_node1);
colocated_scores2 = new_node_table(current_node2);
// Apply parental colocations
apply_parent_colocations(instance1, &colocated_scores1);
apply_parent_colocations(instance2, &colocated_scores2);
// Find original nodes again, with scores updated for colocations
node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id);
node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id);
// Compare nodes by updated scores
if (node1->weight < node2->weight) {
crm_trace("Assign %s (%d on %s) after %s (%d on %s)",
instance1->id, node1->weight, pe__node_name(node1),
instance2->id, node2->weight, pe__node_name(node2));
rc = 1;
} else if (node1->weight > node2->weight) {
crm_trace("Assign %s (%d on %s) before %s (%d on %s)",
instance1->id, node1->weight, pe__node_name(node1),
instance2->id, node2->weight, pe__node_name(node2));
rc = -1;
}
g_hash_table_destroy(colocated_scores1);
g_hash_table_destroy(colocated_scores2);
return rc;
}
/*!
* \internal
* \brief Check whether a resource or any of its children are failed
*
* \param[in] rsc Resource to check
*
* \return true if \p rsc or any of its children are failed, otherwise false
*/
static bool
did_fail(const pe_resource_t *rsc)
{
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
return true;
}
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
if (did_fail((const pe_resource_t *) iter->data)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether a node is allowed to run a resource
*
* \param[in] rsc Resource to check
* \param[in,out] node Node to check (will be set NULL if not allowed)
*
* \return true if *node is either NULL or allowed for \p rsc, otherwise false
*/
static bool
node_is_allowed(const pe_resource_t *rsc, pe_node_t **node)
{
if (*node != NULL) {
pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes,
(*node)->details->id);
if ((allowed == NULL) || (allowed->weight < 0)) {
pe_rsc_trace(rsc, "%s: current location (%s) is unavailable",
rsc->id, pe__node_name(*node));
*node = NULL;
return false;
}
}
return true;
}
/*!
* \internal
* \brief Compare two clone or bundle instances' instance numbers
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a's instance number is lower,
* a positive number if \p b's instance number is lower,
* or 0 if their instance numbers are the same
*/
gint
pcmk__cmp_instance_number(gconstpointer a, gconstpointer b)
{
const pe_resource_t *instance1 = (const pe_resource_t *) a;
const pe_resource_t *instance2 = (const pe_resource_t *) b;
char *div1 = NULL;
char *div2 = NULL;
CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
// Clone numbers are after a colon, bundle numbers after a dash
div1 = strrchr(instance1->id, ':');
if (div1 == NULL) {
div1 = strrchr(instance1->id, '-');
}
div2 = strrchr(instance2->id, ':');
if (div2 == NULL) {
div2 = strrchr(instance2->id, '-');
}
CRM_ASSERT((div1 != NULL) && (div2 != NULL));
return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10));
}
/*!
* \internal
* \brief Compare clone or bundle instances according to assignment order
*
* Compare two clone or bundle instances according to the order they should be
* assigned to nodes, preferring (in order):
*
* - Active instance that is less multiply active
* - Instance that is not active on a disallowed node
* - Instance with higher configured priority
* - Active instance whose current node can run resources
* - Active instance whose parent is allowed on current node
* - Active instance whose current node has fewer other instances
* - Active instance
* - Instance that isn't failed
* - Instance whose colocations result in higher score on current node
* - Instance with lower ID in lexicographic order
*
* \param[in] a First instance to compare
* \param[in] b Second instance to compare
*
* \return A negative number if \p a should be assigned first,
* a positive number if \p b should be assigned first,
* or 0 if assignment order doesn't matter
*/
gint
pcmk__cmp_instance(gconstpointer a, gconstpointer b)
{
int rc = 0;
pe_node_t *node1 = NULL;
pe_node_t *node2 = NULL;
unsigned int nnodes1 = 0;
unsigned int nnodes2 = 0;
bool can1 = true;
bool can2 = true;
const pe_resource_t *instance1 = (const pe_resource_t *) a;
const pe_resource_t *instance2 = (const pe_resource_t *) b;
CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
node1 = instance1->fns->active_node(instance1, &nnodes1, NULL);
node2 = instance2->fns->active_node(instance2, &nnodes2, NULL);
/* If both instances are running and at least one is multiply
* active, prefer instance that's running on fewer nodes.
*/
if ((nnodes1 > 0) && (nnodes2 > 0)) {
if (nnodes1 < nnodes2) {
crm_trace("Assign %s (active on %d) before %s (active on %d): "
"less multiply active",
instance1->id, nnodes1, instance2->id, nnodes2);
return -1;
} else if (nnodes1 > nnodes2) {
crm_trace("Assign %s (active on %d) after %s (active on %d): "
"more multiply active",
instance1->id, nnodes1, instance2->id, nnodes2);
return 1;
}
}
/* An instance that is either inactive or active on an allowed node is
* preferred over an instance that is active on a no-longer-allowed node.
*/
can1 = node_is_allowed(instance1, &node1);
can2 = node_is_allowed(instance2, &node2);
if (can1 && !can2) {
crm_trace("Assign %s before %s: not active on a disallowed node",
instance1->id, instance2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("Assign %s after %s: active on a disallowed node",
instance1->id, instance2->id);
return 1;
}
// Prefer instance with higher configured priority
if (instance1->priority > instance2->priority) {
crm_trace("Assign %s before %s: priority (%d > %d)",
instance1->id, instance2->id,
instance1->priority, instance2->priority);
return -1;
} else if (instance1->priority < instance2->priority) {
crm_trace("Assign %s after %s: priority (%d < %d)",
instance1->id, instance2->id,
instance1->priority, instance2->priority);
return 1;
}
// Prefer active instance
if ((node1 == NULL) && (node2 == NULL)) {
crm_trace("No assignment preference for %s vs. %s: inactive",
instance1->id, instance2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("Assign %s after %s: active", instance1->id, instance2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("Assign %s before %s: active", instance1->id, instance2->id);
return -1;
}
// Prefer instance whose current node can run resources
can1 = pcmk__node_available(node1, false, false);
can2 = pcmk__node_available(node2, false, false);
if (can1 && !can2) {
crm_trace("Assign %s before %s: current node can run resources",
instance1->id, instance2->id);
return -1;
} else if (!can1 && can2) {
crm_trace("Assign %s after %s: current node can't run resources",
instance1->id, instance2->id);
return 1;
}
// Prefer instance whose parent is allowed to run on instance's current node
node1 = pcmk__top_allowed_node(instance1, node1);
node2 = pcmk__top_allowed_node(instance2, node2);
if ((node1 == NULL) && (node2 == NULL)) {
crm_trace("No assignment preference for %s vs. %s: "
"parent not allowed on either instance's current node",
instance1->id, instance2->id);
return 0;
} else if (node1 == NULL) {
crm_trace("Assign %s after %s: parent not allowed on current node",
instance1->id, instance2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("Assign %s before %s: parent allowed on current node",
instance1->id, instance2->id);
return -1;
}
// Prefer instance whose current node is running fewer other instances
if (node1->count < node2->count) {
crm_trace("Assign %s before %s: fewer active instances on current node",
instance1->id, instance2->id);
return -1;
} else if (node1->count > node2->count) {
crm_trace("Assign %s after %s: more active instances on current node",
instance1->id, instance2->id);
return 1;
}
// Prefer instance that isn't failed
can1 = did_fail(instance1);
can2 = did_fail(instance2);
if (!can1 && can2) {
crm_trace("Assign %s before %s: not failed",
instance1->id, instance2->id);
return -1;
} else if (can1 && !can2) {
crm_trace("Assign %s after %s: failed",
instance1->id, instance2->id);
return 1;
}
// Prefer instance with higher cumulative colocation score on current node
rc = cmp_instance_by_colocation(instance1, instance2);
if (rc != 0) {
return rc;
}
// Prefer instance with lower instance number
rc = pcmk__cmp_instance_number(instance1, instance2);
if (rc < 0) {
crm_trace("Assign %s before %s: instance number",
instance1->id, instance2->id);
} else if (rc > 0) {
crm_trace("Assign %s after %s: instance number",
instance1->id, instance2->id);
} else {
crm_trace("No assignment preference for %s vs. %s",
instance1->id, instance2->id);
}
return rc;
}
/*!
* \internal
* \brief Increment the parent's instance count after assigning an instance
*
* An instance's parent tracks how many instances have been assigned to each
* node via its pe_node_t:count member. After assigning an instance to a node,
* find the corresponding node in the parent's allowed table and increment it.
*
* \param[in,out] instance Instance whose parent to update
* \param[in] assigned_to Node to which the instance was assigned
*/
static void
increment_parent_count(pe_resource_t *instance, const pe_node_t *assigned_to)
{
pe_node_t *allowed = NULL;
if (assigned_to == NULL) {
return;
}
allowed = pcmk__top_allowed_node(instance, assigned_to);
if (allowed == NULL) {
/* The instance is allowed on the node, but its parent isn't. This
* shouldn't be possible if the resource is managed, and we won't be
* able to limit the number of instances assigned to the node.
*/
CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pe_rsc_managed));
} else {
allowed->count++;
}
}
/*!
* \internal
* \brief Assign an instance to a node
*
* \param[in,out] instance Clone instance or bundle replica container
* \param[in] prefer If not NULL, attempt early assignment to this
* node, if still the best choice; otherwise,
* perform final assignment
* \param[in] max_per_node Assign at most this many instances to one node
*
* \return Node to which \p instance is assigned
*/
static const pe_node_t *
assign_instance(pe_resource_t *instance, const pe_node_t *prefer,
int max_per_node)
{
pe_node_t *chosen = NULL;
pe_rsc_trace(instance, "Assigning %s (preferring %s)", instance->id,
((prefer == NULL)? "no node" : prefer->details->uname));
if (pcmk_is_set(instance->flags, pe_rsc_allocating)) {
pe_rsc_debug(instance,
"Assignment loop detected involving %s colocations",
instance->id);
return NULL;
}
ban_unavailable_allowed_nodes(instance, max_per_node);
// Failed early assignments are reversible (stop_if_fail=false)
chosen = instance->cmds->assign(instance, prefer, (prefer == NULL));
increment_parent_count(instance, chosen);
return chosen;
}
/*!
* \internal
* \brief Try to assign an instance to its current node early
*
* \param[in] rsc Clone or bundle being assigned (for logs only)
* \param[in] instance Clone instance or bundle replica container
* \param[in] current Instance's current node
* \param[in] max_per_node Maximum number of instances per node
* \param[in] available Number of instances still available for assignment
*
* \return \c true if \p instance was successfully assigned to its current node,
* or \c false otherwise
*/
static bool
assign_instance_early(const pe_resource_t *rsc, pe_resource_t *instance,
const pe_node_t *current, int max_per_node, int available)
{
const pe_node_t *chosen = NULL;
int reserved = 0;
pe_resource_t *parent = instance->parent;
GHashTable *allowed_orig = NULL;
GHashTable *allowed_orig_parent = parent->allowed_nodes;
const pe_node_t *allowed_node = g_hash_table_lookup(instance->allowed_nodes,
current->details->id);
pe_rsc_trace(instance, "Trying to assign %s to its current node %s",
instance->id, pe__node_name(current));
if (!pcmk__node_available(allowed_node, true, false)) {
pe_rsc_info(instance,
"Not assigning %s to current node %s: unavailable",
instance->id, pe__node_name(current));
return false;
}
/* On each iteration, if instance gets assigned to a node other than its
* current one, we reserve one instance for the chosen node, unassign
* instance, restore instance's original node tables, and try again. This
* way, instances are proportionally assigned to nodes based on preferences,
* but shuffling of specific instances is minimized. If a node will be
* assigned instances at all, it preferentially receives instances that are
* currently active there.
*
* parent->allowed_nodes tracks the number of instances assigned to each
* node. If a node already has max_per_node instances assigned,
* ban_unavailable_allowed_nodes() marks it as unavailable.
*
* In the end, we restore the original parent->allowed_nodes to undo the
* changes to counts during tentative assignments. If we successfully
* assigned instance to its current node, we increment that node's counter.
*/
// Back up the allowed node tables of instance and its children recursively
pcmk__copy_node_tables(instance, &allowed_orig);
// Update instances-per-node counts in a scratch table
parent->allowed_nodes = pcmk__copy_node_table(parent->allowed_nodes);
while (reserved < available) {
chosen = assign_instance(instance, current, max_per_node);
if (pe__same_node(chosen, current)) {
// Successfully assigned to current node
break;
}
// Assignment updates scores, so restore to original state
pe_rsc_debug(instance, "Rolling back node scores for %s", instance->id);
pcmk__restore_node_tables(instance, allowed_orig);
if (chosen == NULL) {
// Assignment failed, so give up
pe_rsc_info(instance,
"Not assigning %s to current node %s: unavailable",
instance->id, pe__node_name(current));
pe__set_resource_flags(instance, pe_rsc_provisional);
break;
}
// We prefer more strongly to assign an instance to the chosen node
pe_rsc_debug(instance,
"Not assigning %s to current node %s: %s is better",
instance->id, pe__node_name(current),
pe__node_name(chosen));
// Reserve one instance for the chosen node and try again
if (++reserved >= available) {
pe_rsc_info(instance,
"Not assigning %s to current node %s: "
"other assignments are more important",
instance->id, pe__node_name(current));
} else {
pe_rsc_debug(instance,
"Reserved an instance of %s for %s. Retrying "
"assignment of %s to %s",
rsc->id, pe__node_name(chosen), instance->id,
pe__node_name(current));
}
// Clear this assignment (frees chosen); leave instance counts in parent
pcmk__unassign_resource(instance);
chosen = NULL;
}
g_hash_table_destroy(allowed_orig);
// Restore original instances-per-node counts
g_hash_table_destroy(parent->allowed_nodes);
parent->allowed_nodes = allowed_orig_parent;
if (chosen == NULL) {
// Couldn't assign instance to current node
return false;
}
pe_rsc_trace(instance, "Assigned %s to current node %s",
instance->id, pe__node_name(current));
increment_parent_count(instance, chosen);
return true;
}
/*!
* \internal
* \brief Reset the node counts of a resource's allowed nodes to zero
*
* \param[in,out] rsc Resource to reset
*
* \return Number of nodes that are available to run resources
*/
static unsigned int
reset_allowed_node_counts(pe_resource_t *rsc)
{
unsigned int available_nodes = 0;
pe_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
node->count = 0;
if (pcmk__node_available(node, false, false)) {
available_nodes++;
}
}
return available_nodes;
}
/*!
* \internal
* \brief Check whether an instance has a preferred node
*
* \param[in] instance Clone instance or bundle replica container
* \param[in] optimal_per_node Optimal number of instances per node
*
* \return Instance's current node if still available, otherwise NULL
*/
static const pe_node_t *
preferred_node(const pe_resource_t *instance, int optimal_per_node)
{
const pe_node_t *node = NULL;
const pe_node_t *parent_node = NULL;
// Check whether instance is active, healthy, and not yet assigned
if ((instance->running_on == NULL)
|| !pcmk_is_set(instance->flags, pe_rsc_provisional)
|| pcmk_is_set(instance->flags, pe_rsc_failed)) {
return NULL;
}
// Check whether instance's current node can run resources
node = pe__current_node(instance);
if (!pcmk__node_available(node, true, false)) {
pe_rsc_trace(instance, "Not assigning %s to %s early (unavailable)",
instance->id, pe__node_name(node));
return NULL;
}
// Check whether node already has optimal number of instances assigned
parent_node = pcmk__top_allowed_node(instance, node);
if ((parent_node != NULL) && (parent_node->count >= optimal_per_node)) {
pe_rsc_trace(instance,
"Not assigning %s to %s early "
"(optimal instances already assigned)",
instance->id, pe__node_name(node));
return NULL;
}
return node;
}
/*!
* \internal
* \brief Assign collective instances to nodes
*
* \param[in,out] collective Clone or bundle resource being assigned
* \param[in,out] instances List of clone instances or bundle containers
* \param[in] max_total Maximum instances to assign in total
* \param[in] max_per_node Maximum instances to assign to any one node
*/
void
pcmk__assign_instances(pe_resource_t *collective, GList *instances,
int max_total, int max_per_node)
{
// Reuse node count to track number of assigned instances
unsigned int available_nodes = reset_allowed_node_counts(collective);
int optimal_per_node = 0;
int assigned = 0;
GList *iter = NULL;
pe_resource_t *instance = NULL;
const pe_node_t *current = NULL;
if (available_nodes > 0) {
optimal_per_node = max_total / available_nodes;
}
if (optimal_per_node < 1) {
optimal_per_node = 1;
}
pe_rsc_debug(collective,
"Assigning up to %d %s instance%s to up to %u node%s "
"(at most %d per host, %d optimal)",
max_total, collective->id, pcmk__plural_s(max_total),
available_nodes, pcmk__plural_s(available_nodes),
max_per_node, optimal_per_node);
// Assign as many instances as possible to their current location
for (iter = instances; (iter != NULL) && (assigned < max_total);
iter = iter->next) {
int available = max_total - assigned;
instance = iter->data;
if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) {
continue; // Already assigned
}
current = preferred_node(instance, optimal_per_node);
if ((current != NULL)
&& assign_instance_early(collective, instance, current,
max_per_node, available)) {
assigned++;
}
}
pe_rsc_trace(collective, "Assigned %d of %d instance%s to current node",
assigned, max_total, pcmk__plural_s(max_total));
for (iter = instances; iter != NULL; iter = iter->next) {
instance = (pe_resource_t *) iter->data;
if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) {
continue; // Already assigned
}
if (instance->running_on != NULL) {
current = pe__current_node(instance);
if (pcmk__top_allowed_node(instance, current) == NULL) {
const char *unmanaged = "";
if (!pcmk_is_set(instance->flags, pe_rsc_managed)) {
unmanaged = "Unmanaged resource ";
}
crm_notice("%s%s is running on %s which is no longer allowed",
unmanaged, instance->id, pe__node_name(current));
}
}
if (assigned >= max_total) {
pe_rsc_debug(collective,
"Not assigning %s because maximum %d instances "
"already assigned",
instance->id, max_total);
resource_location(instance, NULL, -INFINITY,
"collective_limit_reached", collective->cluster);
} else if (assign_instance(instance, NULL, max_per_node) != NULL) {
assigned++;
}
}
pe_rsc_debug(collective, "Assigned %d of %d possible instance%s of %s",
assigned, max_total, pcmk__plural_s(max_total),
collective->id);
}
enum instance_state {
instance_starting = (1 << 0),
instance_stopping = (1 << 1),
/* This indicates that some instance is restarting. It's not the same as
* instance_starting|instance_stopping, which would indicate that some
* instance is starting, and some instance (not necessarily the same one) is
* stopping.
*/
instance_restarting = (1 << 2),
instance_active = (1 << 3),
instance_all = instance_starting|instance_stopping
|instance_restarting|instance_active,
};
/*!
* \internal
* \brief Check whether an instance is active, starting, and/or stopping
*
* \param[in] instance Clone instance or bundle replica container
* \param[in,out] state Whether any instance is starting, stopping, etc.
*/
static void
check_instance_state(const pe_resource_t *instance, uint32_t *state)
{
const GList *iter = NULL;
uint32_t instance_state = 0; // State of just this instance
// No need to check further if all conditions have already been detected
if (pcmk_all_flags_set(*state, instance_all)) {
return;
}
// If instance is a collective (a cloned group), check its children instead
if (instance->variant > pe_native) {
for (iter = instance->children;
(iter != NULL) && !pcmk_all_flags_set(*state, instance_all);
iter = iter->next) {
check_instance_state((const pe_resource_t *) iter->data, state);
}
return;
}
// If we get here, instance is a primitive
if (instance->running_on != NULL) {
instance_state |= instance_active;
}
// Check each of the instance's actions for runnable start or stop
for (iter = instance->actions;
(iter != NULL) && !pcmk_all_flags_set(instance_state,
instance_starting
|instance_stopping);
iter = iter->next) {
const pe_action_t *action = (const pe_action_t *) iter->data;
const bool optional = pcmk_is_set(action->flags, pe_action_optional);
if (pcmk__str_eq(PCMK_ACTION_START, action->task, pcmk__str_none)) {
if (!optional && pcmk_is_set(action->flags, pe_action_runnable)) {
pe_rsc_trace(instance, "Instance is starting due to %s",
action->uuid);
instance_state |= instance_starting;
} else {
pe_rsc_trace(instance, "%s doesn't affect %s state (%s)",
action->uuid, instance->id,
(optional? "optional" : "unrunnable"));
}
} else if (pcmk__str_eq(PCMK_ACTION_STOP, action->task,
pcmk__str_none)) {
/* Only stop actions can be pseudo-actions for primitives. That
* indicates that the node they are on is being fenced, so the stop
* is implied rather than actually executed.
*/
if (!optional
&& pcmk_any_flags_set(action->flags,
pe_action_pseudo|pe_action_runnable)) {
pe_rsc_trace(instance, "Instance is stopping due to %s",
action->uuid);
instance_state |= instance_stopping;
} else {
pe_rsc_trace(instance, "%s doesn't affect %s state (%s)",
action->uuid, instance->id,
(optional? "optional" : "unrunnable"));
}
}
}
if (pcmk_all_flags_set(instance_state,
instance_starting|instance_stopping)) {
instance_state |= instance_restarting;
}
*state |= instance_state;
}
/*!
* \internal
* \brief Create actions for collective resource instances
*
* \param[in,out] collective Clone or bundle resource to create actions for
* \param[in,out] instances List of clone instances or bundle containers
*/
void
pcmk__create_instance_actions(pe_resource_t *collective, GList *instances)
{
uint32_t state = 0;
pe_action_t *stop = NULL;
pe_action_t *stopped = NULL;
pe_action_t *start = NULL;
pe_action_t *started = NULL;
pe_rsc_trace(collective, "Creating collective instance actions for %s",
collective->id);
// Create actions for each instance appropriate to its variant
for (GList *iter = instances; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
instance->cmds->create_actions(instance);
check_instance_state(instance, &state);
}
// Create pseudo-actions for rsc start and started
start = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_START,
!pcmk_is_set(state, instance_starting),
true);
started = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_RUNNING,
!pcmk_is_set(state, instance_starting),
false);
started->priority = INFINITY;
if (pcmk_any_flags_set(state, instance_active|instance_starting)) {
pe__set_action_flags(started, pe_action_runnable);
}
// Create pseudo-actions for rsc stop and stopped
stop = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOP,
!pcmk_is_set(state, instance_stopping),
true);
stopped = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOPPED,
!pcmk_is_set(state, instance_stopping),
true);
stopped->priority = INFINITY;
if (!pcmk_is_set(state, instance_restarting)) {
pe__set_action_flags(stop, pe_action_migrate_runnable);
}
if (collective->variant == pe_clone) {
pe__create_clone_notif_pseudo_ops(collective, start, started, stop,
stopped);
}
}
/*!
* \internal
* \brief Get a list of clone instances or bundle replica containers
*
* \param[in] rsc Clone or bundle resource
*
* \return Clone instances if \p rsc is a clone, or a newly created list of
* \p rsc's replica containers if \p rsc is a bundle
* \note The caller must call free_instance_list() on the result when the list
* is no longer needed.
*/
static inline GList *
get_instance_list(const pe_resource_t *rsc)
{
if (rsc->variant == pe_container) {
return pe__bundle_containers(rsc);
} else {
return rsc->children;
}
}
/*!
* \internal
* \brief Free any memory created by get_instance_list()
*
* \param[in] rsc Clone or bundle resource passed to get_instance_list()
* \param[in,out] list Return value of get_instance_list() for \p rsc
*/
static inline void
free_instance_list(const pe_resource_t *rsc, GList *list)
{
if (list != rsc->children) {
g_list_free(list);
}
}
/*!
* \internal
* \brief Check whether an instance is compatible with a role and node
*
* \param[in] instance Clone instance or bundle replica container
* \param[in] node Instance must match this node
* \param[in] role If not RSC_ROLE_UNKNOWN, instance must match this role
* \param[in] current If true, compare instance's original node and role,
* otherwise compare assigned next node and role
*
* \return true if \p instance is compatible with \p node and \p role,
* otherwise false
*/
bool
pcmk__instance_matches(const pe_resource_t *instance, const pe_node_t *node,
enum rsc_role_e role, bool current)
{
pe_node_t *instance_node = NULL;
CRM_CHECK((instance != NULL) && (node != NULL), return false);
if ((role != RSC_ROLE_UNKNOWN)
&& (role != instance->fns->state(instance, current))) {
pe_rsc_trace(instance,
"%s is not a compatible instance (role is not %s)",
instance->id, role2text(role));
return false;
}
if (!is_set_recursive(instance, pe_rsc_block, true)) {
// We only want instances that haven't failed
instance_node = instance->fns->location(instance, NULL, current);
}
if (instance_node == NULL) {
pe_rsc_trace(instance,
"%s is not a compatible instance (not assigned to a node)",
instance->id);
return false;
}
if (!pe__same_node(instance_node, node)) {
pe_rsc_trace(instance,
"%s is not a compatible instance (assigned to %s not %s)",
instance->id, pe__node_name(instance_node),
pe__node_name(node));
return false;
}
return true;
}
/*!
* \internal
* \brief Find an instance that matches a given resource by node and role
*
* \param[in] match_rsc Resource that instance must match (for logging only)
* \param[in] rsc Clone or bundle resource to check for matching instance
* \param[in] node Instance must match this node
* \param[in] role If not RSC_ROLE_UNKNOWN, instance must match this role
* \param[in] current If true, compare instance's original node and role,
* otherwise compare assigned next node and role
*
* \return \p rsc instance matching \p node and \p role if any, otherwise NULL
*/
static pe_resource_t *
find_compatible_instance_on_node(const pe_resource_t *match_rsc,
const pe_resource_t *rsc,
const pe_node_t *node, enum rsc_role_e role,
bool current)
{
GList *instances = NULL;
instances = get_instance_list(rsc);
for (GList *iter = instances; iter != NULL; iter = iter->next) {
pe_resource_t *instance = (pe_resource_t *) iter->data;
if (pcmk__instance_matches(instance, node, role, current)) {
pe_rsc_trace(match_rsc,
"Found %s %s instance %s compatible with %s on %s",
role == RSC_ROLE_UNKNOWN? "matching" : role2text(role),
rsc->id, instance->id, match_rsc->id,
pe__node_name(node));
free_instance_list(rsc, instances); // Only frees list, not contents
return instance;
}
}
free_instance_list(rsc, instances);
pe_rsc_trace(match_rsc, "No %s %s instance found compatible with %s on %s",
((role == RSC_ROLE_UNKNOWN)? "matching" : role2text(role)),
rsc->id, match_rsc->id, pe__node_name(node));
return NULL;
}
/*!
* \internal
* \brief Find a clone instance or bundle container compatible with a resource
*
* \param[in] match_rsc Resource that instance must match
* \param[in] rsc Clone or bundle resource to check for matching instance
* \param[in] role If not RSC_ROLE_UNKNOWN, instance must match this role
* \param[in] current If true, compare instance's original node and role,
* otherwise compare assigned next node and role
*
* \return Compatible (by \p role and \p match_rsc location) instance of \p rsc
* if any, otherwise NULL
*/
pe_resource_t *
pcmk__find_compatible_instance(const pe_resource_t *match_rsc,
const pe_resource_t *rsc, enum rsc_role_e role,
bool current)
{
pe_resource_t *instance = NULL;
GList *nodes = NULL;
const pe_node_t *node = match_rsc->fns->location(match_rsc, NULL, current);
// If match_rsc has a node, check only that node
if (node != NULL) {
return find_compatible_instance_on_node(match_rsc, rsc, node, role,
current);
}
// Otherwise check for an instance matching any of match_rsc's allowed nodes
nodes = pcmk__sort_nodes(g_hash_table_get_values(match_rsc->allowed_nodes),
NULL);
for (GList *iter = nodes; (iter != NULL) && (instance == NULL);
iter = iter->next) {
instance = find_compatible_instance_on_node(match_rsc, rsc,
(pe_node_t *) iter->data,
role, current);
}
if (instance == NULL) {
pe_rsc_debug(rsc, "No %s instance found compatible with %s",
rsc->id, match_rsc->id);
}
g_list_free(nodes);
return instance;
}
/*!
* \internal
* \brief Unassign an instance if mandatory ordering has no interleave match
*
* \param[in] first 'First' action in an ordering
* \param[in] then 'Then' action in an ordering
* \param[in,out] then_instance 'Then' instance that has no interleave match
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in] current If true, "then" action is stopped or demoted
*
* \return true if \p then_instance was unassigned, otherwise false
*/
static bool
unassign_if_mandatory(const pe_action_t *first, const pe_action_t *then,
pe_resource_t *then_instance, uint32_t type, bool current)
{
// Allow "then" instance to go down even without an interleave match
if (current) {
pe_rsc_trace(then->rsc,
"%s has no instance to order before stopping "
"or demoting %s",
first->rsc->id, then_instance->id);
/* If the "first" action must be runnable, but there is no "first"
* instance, the "then" instance must not be allowed to come up.
*/
} else if (pcmk_any_flags_set(type, pe_order_runnable_left
|pe_order_implies_then)) {
pe_rsc_info(then->rsc,
"Inhibiting %s from being active "
"because there is no %s instance to interleave",
then_instance->id, first->rsc->id);
return pcmk__assign_resource(then_instance, NULL, true, true);
}
return false;
}
/*!
* \internal
* \brief Find first matching action for a clone instance or bundle container
*
* \param[in] action Action in an interleaved ordering
* \param[in] instance Clone instance or bundle container being interleaved
* \param[in] action_name Action to look for
* \param[in] node If not NULL, require action to be on this node
* \param[in] for_first If true, \p instance is the 'first' resource in the
* ordering, otherwise it is the 'then' resource
*
* \return First action for \p instance (or in some cases if \p instance is a
* bundle container, its containerized resource) that matches
* \p action_name and \p node if any, otherwise NULL
*/
static pe_action_t *
find_instance_action(const pe_action_t *action, const pe_resource_t *instance,
const char *action_name, const pe_node_t *node,
bool for_first)
{
const pe_resource_t *rsc = NULL;
pe_action_t *matching_action = NULL;
/* If instance is a bundle container, sometimes we should interleave the
* action for the container itself, and sometimes for the containerized
* resource.
*
* For example, given "start bundle A then bundle B", B likely requires the
* service inside A's container to be active, rather than just the
* container, so we should interleave the action for A's containerized
* resource. On the other hand, it's possible B's container itself requires
* something from A, so we should interleave the action for B's container.
*
* Essentially, for 'first', we should use the containerized resource for
* everything except stop, and for 'then', we should use the container for
* everything except promote and demote (which can only be performed on the
* containerized resource).
*/
if ((for_first && !pcmk__str_any_of(action->task, PCMK_ACTION_STOP,
PCMK_ACTION_STOPPED, NULL))
|| (!for_first && pcmk__str_any_of(action->task, PCMK_ACTION_PROMOTE,
PCMK_ACTION_PROMOTED,
PCMK_ACTION_DEMOTE,
PCMK_ACTION_DEMOTED, NULL))) {
rsc = pe__get_rsc_in_container(instance);
}
if (rsc == NULL) {
rsc = instance; // No containerized resource, use instance itself
} else {
node = NULL; // Containerized actions are on bundle-created guest
}
matching_action = find_first_action(rsc->actions, NULL, action_name, node);
if (matching_action != NULL) {
return matching_action;
}
if (pcmk_is_set(instance->flags, pe_rsc_orphan)
|| pcmk__str_any_of(action_name, PCMK_ACTION_STOP, PCMK_ACTION_DEMOTE,
NULL)) {
crm_trace("No %s action found for %s%s",
action_name,
pcmk_is_set(instance->flags, pe_rsc_orphan)? "orphan " : "",
instance->id);
} else {
crm_err("No %s action found for %s to interleave (bug?)",
action_name, instance->id);
}
return NULL;
}
/*!
* \internal
* \brief Get the original action name of a bundle or clone action
*
* Given an action for a bundle or clone, get the original action name,
* mapping notify to the action being notified, and if the instances are
* primitives, mapping completion actions to the action that was completed
* (for example, stopped to stop).
*
* \param[in] action Clone or bundle action to check
*
* \return Original action name for \p action
*/
static const char *
orig_action_name(const pe_action_t *action)
{
const pe_resource_t *instance = action->rsc->children->data; // Any instance
char *action_type = NULL;
const char *action_name = action->task;
enum action_tasks orig_task = no_action;
if (pcmk__strcase_any_of(action->task, PCMK_ACTION_NOTIFY,
- CRMD_ACTION_NOTIFIED, NULL)) {
+ PCMK_ACTION_NOTIFIED, NULL)) {
// action->uuid is RSC_(confirmed-){pre,post}_notify_ACTION_INTERVAL
CRM_CHECK(parse_op_key(action->uuid, NULL, &action_type, NULL),
return task2text(no_action));
action_name = strstr(action_type, "_notify_");
CRM_CHECK(action_name != NULL, return task2text(no_action));
action_name += strlen("_notify_");
}
orig_task = get_complex_task(instance, action_name);
free(action_type);
return task2text(orig_task);
}
/*!
* \internal
* \brief Update two interleaved actions according to an ordering between them
*
* Given information about an ordering of two interleaved actions, update the
* actions' flags (and runnable_before members if appropriate) as appropriate
* for the ordering. Effects may cascade to other orderings involving the
* actions as well.
*
* \param[in,out] first 'First' action in an ordering
* \param[in,out] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
static uint32_t
update_interleaved_actions(pe_action_t *first, pe_action_t *then,
const pe_node_t *node, uint32_t filter,
uint32_t type)
{
GList *instances = NULL;
uint32_t changed = pcmk__updated_none;
const char *orig_first_task = orig_action_name(first);
// Stops and demotes must be interleaved with instance on current node
bool current = pcmk__ends_with(first->uuid, "_" PCMK_ACTION_STOPPED "_0")
|| pcmk__ends_with(first->uuid,
"_" PCMK_ACTION_DEMOTED "_0");
// Update the specified actions for each "then" instance individually
instances = get_instance_list(then->rsc);
for (GList *iter = instances; iter != NULL; iter = iter->next) {
pe_resource_t *first_instance = NULL;
pe_resource_t *then_instance = iter->data;
pe_action_t *first_action = NULL;
pe_action_t *then_action = NULL;
// Find a "first" instance to interleave with this "then" instance
first_instance = pcmk__find_compatible_instance(then_instance,
first->rsc,
RSC_ROLE_UNKNOWN,
current);
if (first_instance == NULL) { // No instance can be interleaved
if (unassign_if_mandatory(first, then, then_instance, type,
current)) {
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
continue;
}
first_action = find_instance_action(first, first_instance,
orig_first_task, node, true);
if (first_action == NULL) {
continue;
}
then_action = find_instance_action(then, then_instance, then->task,
node, false);
if (then_action == NULL) {
continue;
}
if (order_actions(first_action, then_action, type)) {
pcmk__set_updated_flags(changed, first,
pcmk__updated_first|pcmk__updated_then);
}
changed |= then_instance->cmds->update_ordered_actions(
first_action, then_action, node,
first_instance->cmds->action_flags(first_action, node), filter,
type, then->rsc->cluster);
}
free_instance_list(then->rsc, instances);
return changed;
}
/*!
* \internal
* \brief Check whether two actions in an ordering can be interleaved
*
* \param[in] first 'First' action in the ordering
* \param[in] then 'Then' action in the ordering
*
* \return true if \p first and \p then can be interleaved, otherwise false
*/
static bool
can_interleave_actions(const pe_action_t *first, const pe_action_t *then)
{
bool interleave = false;
pe_resource_t *rsc = NULL;
if ((first->rsc == NULL) || (then->rsc == NULL)) {
crm_trace("Not interleaving %s with %s: not resource actions",
first->uuid, then->uuid);
return false;
}
if (first->rsc == then->rsc) {
crm_trace("Not interleaving %s with %s: same resource",
first->uuid, then->uuid);
return false;
}
if ((first->rsc->variant < pe_clone) || (then->rsc->variant < pe_clone)) {
crm_trace("Not interleaving %s with %s: not clones or bundles",
first->uuid, then->uuid);
return false;
}
if (pcmk__ends_with(then->uuid, "_stop_0")
|| pcmk__ends_with(then->uuid, "_demote_0")) {
rsc = first->rsc;
} else {
rsc = then->rsc;
}
interleave = crm_is_true(g_hash_table_lookup(rsc->meta,
XML_RSC_ATTR_INTERLEAVE));
pe_rsc_trace(rsc, "'%s then %s' will %sbe interleaved (based on %s)",
first->uuid, then->uuid, (interleave? "" : "not "), rsc->id);
return interleave;
}
/*!
* \internal
* \brief Update non-interleaved instance actions according to an ordering
*
* Given information about an ordering of two non-interleaved actions, update
* the actions' flags (and runnable_before members if appropriate) as
* appropriate for the ordering. Effects may cascade to other orderings
* involving the actions as well.
*
* \param[in,out] instance Clone instance or bundle container
* \param[in,out] first "First" action in ordering
* \param[in] then "Then" action in ordering (for \p instance's parent)
* \param[in] node If not NULL, limit scope of ordering to this node
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
static uint32_t
update_noninterleaved_actions(pe_resource_t *instance, pe_action_t *first,
const pe_action_t *then, const pe_node_t *node,
uint32_t flags, uint32_t filter, uint32_t type)
{
pe_action_t *instance_action = NULL;
uint32_t instance_flags = 0;
uint32_t changed = pcmk__updated_none;
// Check whether instance has an equivalent of "then" action
instance_action = find_first_action(instance->actions, NULL, then->task,
node);
if (instance_action == NULL) {
return changed;
}
// Check whether action is runnable
instance_flags = instance->cmds->action_flags(instance_action, node);
if (!pcmk_is_set(instance_flags, pe_action_runnable)) {
return changed;
}
// If so, update actions for the instance
changed = instance->cmds->update_ordered_actions(first, instance_action,
node, flags, filter, type,
instance->cluster);
// Propagate any changes to later actions
if (pcmk_is_set(changed, pcmk__updated_then)) {
for (GList *after_iter = instance_action->actions_after;
after_iter != NULL; after_iter = after_iter->next) {
pe_action_wrapper_t *after = after_iter->data;
pcmk__update_action_for_orderings(after->action, instance->cluster);
}
}
return changed;
}
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two clone or bundle actions, update
* the actions' flags (and runnable_before members if appropriate) as
* appropriate for the ordering. Effects may cascade to other orderings
* involving the actions as well.
*
* \param[in,out] first 'First' action in an ordering
* \param[in,out] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in,out] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t
pcmk__instance_update_ordered_actions(pe_action_t *first, pe_action_t *then,
const pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set)
{
CRM_ASSERT((first != NULL) && (then != NULL) && (data_set != NULL));
if (then->rsc == NULL) {
return pcmk__updated_none;
} else if (can_interleave_actions(first, then)) {
return update_interleaved_actions(first, then, node, filter, type);
} else {
uint32_t changed = pcmk__updated_none;
GList *instances = get_instance_list(then->rsc);
// Update actions for the clone or bundle resource itself
changed |= pcmk__update_ordered_actions(first, then, node, flags,
filter, type, data_set);
// Update the 'then' clone instances or bundle containers individually
for (GList *iter = instances; iter != NULL; iter = iter->next) {
pe_resource_t *instance = iter->data;
changed |= update_noninterleaved_actions(instance, first, then,
node, flags, filter, type);
}
free_instance_list(then->rsc, instances);
return changed;
}
}
#define pe__clear_action_summary_flags(flags, action, flag) do { \
flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Action summary", action->rsc->id, \
flags, flag, #flag); \
} while (0)
/*!
* \internal
* \brief Return action flags for a given clone or bundle action
*
* \param[in,out] action Action for a clone or bundle
* \param[in] instances Clone instances or bundle containers
* \param[in] node If not NULL, limit effects to this node
*
* \return Flags appropriate to \p action on \p node
*/
uint32_t
pcmk__collective_action_flags(pe_action_t *action, const GList *instances,
const pe_node_t *node)
{
bool any_runnable = false;
const char *action_name = orig_action_name(action);
// Set original assumptions (optional and runnable may be cleared below)
uint32_t flags = pe_action_optional|pe_action_runnable|pe_action_pseudo;
for (const GList *iter = instances; iter != NULL; iter = iter->next) {
const pe_resource_t *instance = iter->data;
const pe_node_t *instance_node = NULL;
pe_action_t *instance_action = NULL;
uint32_t instance_flags;
// Node is relevant only to primitive instances
if (instance->variant == pe_native) {
instance_node = node;
}
instance_action = find_first_action(instance->actions, NULL,
action_name, instance_node);
if (instance_action == NULL) {
pe_rsc_trace(action->rsc, "%s has no %s action on %s",
instance->id, action_name, pe__node_name(node));
continue;
}
pe_rsc_trace(action->rsc, "%s has %s for %s on %s",
instance->id, instance_action->uuid, action_name,
pe__node_name(node));
instance_flags = instance->cmds->action_flags(instance_action, node);
// If any instance action is mandatory, so is the collective action
if (pcmk_is_set(flags, pe_action_optional)
&& !pcmk_is_set(instance_flags, pe_action_optional)) {
pe_rsc_trace(instance, "%s is mandatory because %s is",
action->uuid, instance_action->uuid);
pe__clear_action_summary_flags(flags, action, pe_action_optional);
pe__clear_action_flags(action, pe_action_optional);
}
// If any instance action is runnable, so is the collective action
if (pcmk_is_set(instance_flags, pe_action_runnable)) {
any_runnable = true;
}
}
if (!any_runnable) {
pe_rsc_trace(action->rsc,
"%s is not runnable because no instance can run %s",
action->uuid, action_name);
pe__clear_action_summary_flags(flags, action, pe_action_runnable);
if (node == NULL) {
pe__clear_action_flags(action, pe_action_runnable);
}
}
return flags;
}
diff --git a/lib/pacemaker/pcmk_simulate.c b/lib/pacemaker/pcmk_simulate.c
index 90d8533f1d..61ba443f07 100644
--- a/lib/pacemaker/pcmk_simulate.c
+++ b/lib/pacemaker/pcmk_simulate.c
@@ -1,1003 +1,1003 @@
/*
* Copyright 2021-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/cib/internal.h>
#include <crm/common/output.h>
#include <crm/common/results.h>
#include <crm/pengine/pe_types.h>
#include <pacemaker-internal.h>
#include <pacemaker.h>
#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include "libpacemaker_private.h"
static pcmk__output_t *out = NULL;
static cib_t *fake_cib = NULL;
static GList *fake_resource_list = NULL;
static const GList *fake_op_fail_list = NULL;
static void set_effective_date(pe_working_set_t *data_set, bool print_original,
const char *use_date);
/*!
* \internal
* \brief Create an action name for use in a dot graph
*
* \param[in] action Action to create name for
* \param[in] verbose If true, add action ID to name
*
* \return Newly allocated string with action name
* \note It is the caller's responsibility to free the result.
*/
static char *
create_action_name(const pe_action_t *action, bool verbose)
{
char *action_name = NULL;
const char *prefix = "";
const char *action_host = NULL;
const char *clone_name = NULL;
const char *task = action->task;
if (action->node != NULL) {
action_host = action->node->details->uname;
} else if (!pcmk_is_set(action->flags, pe_action_pseudo)) {
action_host = "<none>";
}
if (pcmk__str_eq(action->task, PCMK_ACTION_CANCEL, pcmk__str_none)) {
prefix = "Cancel ";
task = action->cancel_task;
}
if (action->rsc != NULL) {
clone_name = action->rsc->clone_name;
}
if (clone_name != NULL) {
char *key = NULL;
guint interval_ms = 0;
if (pcmk__guint_from_hash(action->meta,
XML_LRM_ATTR_INTERVAL_MS, 0,
&interval_ms) != pcmk_rc_ok) {
interval_ms = 0;
}
- if (pcmk__strcase_any_of(action->task, PCMK_ACTION_NOTIFY, RSC_NOTIFIED,
- NULL)) {
+ if (pcmk__strcase_any_of(action->task, PCMK_ACTION_NOTIFY,
+ PCMK_ACTION_NOTIFIED, NULL)) {
const char *n_type = g_hash_table_lookup(action->meta,
"notify_key_type");
const char *n_task = g_hash_table_lookup(action->meta,
"notify_key_operation");
CRM_ASSERT(n_type != NULL);
CRM_ASSERT(n_task != NULL);
key = pcmk__notify_key(clone_name, n_type, n_task);
} else {
key = pcmk__op_key(clone_name, task, interval_ms);
}
if (action_host != NULL) {
action_name = crm_strdup_printf("%s%s %s",
prefix, key, action_host);
} else {
action_name = crm_strdup_printf("%s%s", prefix, key);
}
free(key);
} else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_none)) {
const char *op = g_hash_table_lookup(action->meta, "stonith_action");
action_name = crm_strdup_printf("%s%s '%s' %s",
prefix, action->task, op, action_host);
} else if (action->rsc && action_host) {
action_name = crm_strdup_printf("%s%s %s",
prefix, action->uuid, action_host);
} else if (action_host) {
action_name = crm_strdup_printf("%s%s %s",
prefix, action->task, action_host);
} else {
action_name = crm_strdup_printf("%s", action->uuid);
}
if (verbose) {
char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id);
free(action_name);
action_name = with_id;
}
return action_name;
}
/*!
* \internal
* \brief Display the status of a cluster
*
* \param[in,out] data_set Cluster working set
* \param[in] show_opts How to modify display (as pcmk_show_opt_e flags)
* \param[in] section_opts Sections to display (as pcmk_section_e flags)
* \param[in] title What to use as list title
* \param[in] print_spacer Whether to display a spacer first
*/
static void
print_cluster_status(pe_working_set_t *data_set, uint32_t show_opts,
uint32_t section_opts, const char *title,
bool print_spacer)
{
pcmk__output_t *out = data_set->priv;
GList *all = NULL;
crm_exit_t stonith_rc = 0;
enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid;
section_opts |= pcmk_section_nodes | pcmk_section_resources;
show_opts |= pcmk_show_inactive_rscs | pcmk_show_failed_detail;
all = g_list_prepend(all, (gpointer) "*");
PCMK__OUTPUT_SPACER_IF(out, print_spacer);
out->begin_list(out, NULL, NULL, "%s", title);
out->message(out, "cluster-status",
data_set, state, stonith_rc, NULL,
false, section_opts, show_opts, NULL, all, all);
out->end_list(out);
g_list_free(all);
}
/*!
* \internal
* \brief Display a summary of all actions scheduled in a transition
*
* \param[in,out] data_set Cluster working set (fully scheduled)
* \param[in] print_spacer Whether to display a spacer first
*/
static void
print_transition_summary(pe_working_set_t *data_set, bool print_spacer)
{
pcmk__output_t *out = data_set->priv;
PCMK__OUTPUT_SPACER_IF(out, print_spacer);
out->begin_list(out, NULL, NULL, "Transition Summary");
pcmk__output_actions(data_set);
out->end_list(out);
}
/*!
* \internal
* \brief Reset a cluster working set's input, output, date, and flags
*
* \param[in,out] data_set Cluster working set
* \param[in] input What to set as cluster input
* \param[in] out What to set as cluster output object
* \param[in] use_date What to set as cluster's current timestamp
* \param[in] flags Cluster flags to add (pe_flag_*)
*/
static void
reset(pe_working_set_t *data_set, xmlNodePtr input, pcmk__output_t *out,
const char *use_date, unsigned int flags)
{
data_set->input = input;
data_set->priv = out;
set_effective_date(data_set, true, use_date);
if (pcmk_is_set(flags, pcmk_sim_sanitized)) {
pe__set_working_set_flags(data_set, pe_flag_sanitized);
}
if (pcmk_is_set(flags, pcmk_sim_show_scores)) {
pe__set_working_set_flags(data_set, pe_flag_show_scores);
}
if (pcmk_is_set(flags, pcmk_sim_show_utilization)) {
pe__set_working_set_flags(data_set, pe_flag_show_utilization);
}
}
/*!
* \brief Write out a file in dot(1) format describing the actions that will
* be taken by the scheduler in response to an input CIB file.
*
* \param[in,out] data_set Working set for the cluster
* \param[in] dot_file The filename to write
* \param[in] all_actions Write all actions, even those that are optional
* or are on unmanaged resources
* \param[in] verbose Add extra information, such as action IDs, to the
* output
*
* \return Standard Pacemaker return code
*/
static int
write_sim_dotfile(pe_working_set_t *data_set, const char *dot_file,
bool all_actions, bool verbose)
{
GList *iter = NULL;
FILE *dot_strm = fopen(dot_file, "w");
if (dot_strm == NULL) {
return errno;
}
fprintf(dot_strm, " digraph \"g\" {\n");
for (iter = data_set->actions; iter != NULL; iter = iter->next) {
pe_action_t *action = (pe_action_t *) iter->data;
const char *style = "dashed";
const char *font = "black";
const char *color = "black";
char *action_name = create_action_name(action, verbose);
if (pcmk_is_set(action->flags, pe_action_pseudo)) {
font = "orange";
}
if (pcmk_is_set(action->flags, pe_action_dumped)) {
style = "bold";
color = "green";
} else if ((action->rsc != NULL)
&& !pcmk_is_set(action->rsc->flags, pe_rsc_managed)) {
color = "red";
font = "purple";
if (!all_actions) {
goto do_not_write;
}
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
color = "blue";
if (!all_actions) {
goto do_not_write;
}
} else {
color = "red";
CRM_LOG_ASSERT(!pcmk_is_set(action->flags, pe_action_runnable));
}
pe__set_action_flags(action, pe_action_dumped);
fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n",
action_name, style, color, font);
do_not_write:
free(action_name);
}
for (iter = data_set->actions; iter != NULL; iter = iter->next) {
pe_action_t *action = (pe_action_t *) iter->data;
for (GList *before_iter = action->actions_before;
before_iter != NULL; before_iter = before_iter->next) {
pe_action_wrapper_t *before = before_iter->data;
char *before_name = NULL;
char *after_name = NULL;
const char *style = "dashed";
bool optional = true;
if (before->state == pe_link_dumped) {
optional = false;
style = "bold";
} else if (before->type == pe_order_none) {
continue;
} else if (pcmk_is_set(before->action->flags, pe_action_dumped)
&& pcmk_is_set(action->flags, pe_action_dumped)
&& before->type != pe_order_load) {
optional = false;
}
if (all_actions || !optional) {
before_name = create_action_name(before->action, verbose);
after_name = create_action_name(action, verbose);
fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n",
before_name, after_name, style);
free(before_name);
free(after_name);
}
}
}
fprintf(dot_strm, "}\n");
fflush(dot_strm);
fclose(dot_strm);
return pcmk_rc_ok;
}
/*!
* \brief Profile the configuration updates and scheduler actions in a single
* CIB file, printing the profiling timings.
*
* \note \p data_set->priv must have been set to a valid \p pcmk__output_t
* object before this function is called.
*
* \param[in] xml_file The CIB file to profile
* \param[in] repeat Number of times to run
* \param[in,out] data_set Working set for the cluster
* \param[in] use_date The date to set the cluster's time to (may be NULL)
*/
static void
profile_file(const char *xml_file, long long repeat, pe_working_set_t *data_set,
const char *use_date)
{
pcmk__output_t *out = data_set->priv;
xmlNode *cib_object = NULL;
clock_t start = 0;
clock_t end;
unsigned long long data_set_flags = pe_flag_no_compat;
CRM_ASSERT(out != NULL);
cib_object = filename2xml(xml_file);
start = clock();
if (pcmk_find_cib_element(cib_object, XML_CIB_TAG_STATUS) == NULL) {
create_xml_node(cib_object, XML_CIB_TAG_STATUS);
}
if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) {
free_xml(cib_object);
return;
}
if (validate_xml(cib_object, NULL, FALSE) != TRUE) {
free_xml(cib_object);
return;
}
if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) {
data_set_flags |= pe_flag_show_scores;
}
if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
data_set_flags |= pe_flag_show_utilization;
}
for (int i = 0; i < repeat; ++i) {
xmlNode *input = (repeat == 1)? cib_object : copy_xml(cib_object);
data_set->input = input;
set_effective_date(data_set, false, use_date);
pcmk__schedule_actions(input, data_set_flags, data_set);
pe_reset_working_set(data_set);
}
end = clock();
out->message(out, "profile", xml_file, start, end);
}
void
pcmk__profile_dir(const char *dir, long long repeat, pe_working_set_t *data_set,
const char *use_date)
{
pcmk__output_t *out = data_set->priv;
struct dirent **namelist;
int file_num = scandir(dir, &namelist, 0, alphasort);
CRM_ASSERT(out != NULL);
if (file_num > 0) {
struct stat prop;
char buffer[FILENAME_MAX];
out->begin_list(out, NULL, NULL, "Timings");
while (file_num--) {
if ('.' == namelist[file_num]->d_name[0]) {
free(namelist[file_num]);
continue;
} else if (!pcmk__ends_with_ext(namelist[file_num]->d_name,
".xml")) {
free(namelist[file_num]);
continue;
}
snprintf(buffer, sizeof(buffer), "%s/%s",
dir, namelist[file_num]->d_name);
if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) {
profile_file(buffer, repeat, data_set, use_date);
}
free(namelist[file_num]);
}
free(namelist);
out->end_list(out);
}
}
/*!
* \brief Set the date of the cluster, either to the value given by
* \p use_date, or to the "execution-date" value in the CIB.
*
* \note \p data_set->priv must have been set to a valid \p pcmk__output_t
* object before this function is called.
*
* \param[in,out] data_set Working set for the cluster
* \param[in] print_original If \p true, the "execution-date" should
* also be printed
* \param[in] use_date The date to set the cluster's time to
* (may be NULL)
*/
static void
set_effective_date(pe_working_set_t *data_set, bool print_original,
const char *use_date)
{
pcmk__output_t *out = data_set->priv;
time_t original_date = 0;
CRM_ASSERT(out != NULL);
crm_element_value_epoch(data_set->input, "execution-date", &original_date);
if (use_date) {
data_set->now = crm_time_new(use_date);
out->info(out, "Setting effective cluster time: %s", use_date);
crm_time_log(LOG_NOTICE, "Pretending 'now' is", data_set->now,
crm_time_log_date | crm_time_log_timeofday);
} else if (original_date != 0) {
data_set->now = pcmk__copy_timet(original_date);
if (print_original) {
char *when = crm_time_as_string(data_set->now,
crm_time_log_date|crm_time_log_timeofday);
out->info(out, "Using the original execution date of: %s", when);
free(when);
}
}
}
/*!
* \internal
* \brief Simulate successfully executing a pseudo-action in a graph
*
* \param[in,out] graph Graph to update with pseudo-action result
* \param[in,out] action Pseudo-action to simulate executing
*
* \return Standard Pacemaker return code
*/
static int
simulate_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
out->message(out, "inject-pseudo-action", node, task);
pcmk__update_graph(graph, action);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Simulate executing a resource action in a graph
*
* \param[in,out] graph Graph to update with resource action result
* \param[in,out] action Resource action to simulate executing
*
* \return Standard Pacemaker return code
*/
static int
simulate_resource_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
int rc;
lrmd_event_data_t *op = NULL;
int target_outcome = PCMK_OCF_OK;
const char *rtype = NULL;
const char *rclass = NULL;
const char *resource = NULL;
const char *rprovider = NULL;
const char *resource_config_name = NULL;
const char *operation = crm_element_value(action->xml, "operation");
const char *target_rc_s = crm_meta_value(action->params,
XML_ATTR_TE_TARGET_RC);
xmlNode *cib_node = NULL;
xmlNode *cib_resource = NULL;
xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
char *uuid = NULL;
const char *router_node = crm_element_value(action->xml,
XML_LRM_ATTR_ROUTER_NODE);
// Certain actions don't need to be displayed or history entries
if (pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) {
crm_debug("No history injection for %s op on %s", operation, node);
goto done; // Confirm action and update graph
}
if (action_rsc == NULL) { // Shouldn't be possible
crm_log_xml_err(action->xml, "Bad");
free(node);
return EPROTO;
}
/* A resource might be known by different names in the configuration and in
* the action (for example, a clone instance). Grab the configuration name
* (which is preferred when writing history), and if necessary, the instance
* name.
*/
resource_config_name = crm_element_value(action_rsc, XML_ATTR_ID);
if (resource_config_name == NULL) { // Shouldn't be possible
crm_log_xml_err(action->xml, "No ID");
free(node);
return EPROTO;
}
resource = resource_config_name;
if (pe_find_resource(fake_resource_list, resource) == NULL) {
const char *longname = crm_element_value(action_rsc, XML_ATTR_ID_LONG);
if ((longname != NULL)
&& (pe_find_resource(fake_resource_list, longname) != NULL)) {
resource = longname;
}
}
// Certain actions need to be displayed but don't need history entries
if (pcmk__strcase_any_of(operation, PCMK_ACTION_DELETE,
PCMK_ACTION_META_DATA, NULL)) {
out->message(out, "inject-rsc-action", resource, operation, node,
(guint) 0);
goto done; // Confirm action and update graph
}
rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS);
rtype = crm_element_value(action_rsc, XML_ATTR_TYPE);
rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER);
pcmk__scan_min_int(target_rc_s, &target_outcome, 0);
CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL,
cib_sync_call|cib_scope_local) == pcmk_ok);
// Ensure the action node is in the CIB
uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID);
cib_node = pcmk__inject_node(fake_cib, node,
((router_node == NULL)? uuid: node));
free(uuid);
CRM_ASSERT(cib_node != NULL);
// Add a history entry for the action
cib_resource = pcmk__inject_resource_history(out, cib_node, resource,
resource_config_name,
rclass, rtype, rprovider);
if (cib_resource == NULL) {
crm_err("Could not simulate action %d history for resource %s",
action->id, resource);
free(node);
free_xml(cib_node);
return EINVAL;
}
// Simulate and display an executor event for the action result
op = pcmk__event_from_graph_action(cib_resource, action, PCMK_EXEC_DONE,
target_outcome, "User-injected result");
out->message(out, "inject-rsc-action", resource, op->op_type, node,
op->interval_ms);
// Check whether action is in a list of desired simulated failures
for (const GList *iter = fake_op_fail_list;
iter != NULL; iter = iter->next) {
const char *spec = (const char *) iter->data;
char *key = NULL;
const char *match_name = NULL;
// Allow user to specify anonymous clone with or without instance number
key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource, op->op_type,
op->interval_ms, node);
if (strncasecmp(key, spec, strlen(key)) == 0) {
match_name = resource;
}
free(key);
// If not found, try the resource's name in the configuration
if ((match_name == NULL)
&& (strcmp(resource, resource_config_name) != 0)) {
key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource_config_name,
op->op_type, op->interval_ms, node);
if (strncasecmp(key, spec, strlen(key)) == 0) {
match_name = resource_config_name;
}
free(key);
}
if (match_name == NULL) {
continue; // This failed action entry doesn't match
}
// ${match_name}_${task}_${interval_in_ms}@${node}=${rc}
rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc);
if (rc != 1) {
out->err(out, "Invalid failed operation '%s' "
"(result code must be integer)", spec);
continue; // Keep checking other list entries
}
out->info(out, "Pretending action %d failed with rc=%d",
action->id, op->rc);
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
graph->abort_priority = INFINITY;
pcmk__inject_failcount(out, cib_node, match_name, op->op_type,
op->interval_ms, op->rc);
break;
}
pcmk__inject_action_result(cib_resource, op, target_outcome);
lrmd_free_event(op);
rc = fake_cib->cmds->modify(fake_cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call|cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
done:
free(node);
free_xml(cib_node);
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(graph, action);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Simulate successfully executing a cluster action
*
* \param[in,out] graph Graph to update with action result
* \param[in,out] action Cluster action to simulate
*
* \return Standard Pacemaker return code
*/
static int
simulate_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
out->message(out, "inject-cluster-action", node, task, rsc);
pcmk__update_graph(graph, action);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Simulate successfully executing a fencing action
*
* \param[in,out] graph Graph to update with action result
* \param[in,out] action Fencing action to simulate
*
* \return Standard Pacemaker return code
*/
static int
simulate_fencing_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *op = crm_meta_value(action->params, "stonith_action");
char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
out->message(out, "inject-fencing-action", target, op);
if (!pcmk__str_eq(op, "on", pcmk__str_casei)) {
int rc = pcmk_ok;
GString *xpath = g_string_sized_new(512);
// Set node state to offline
xmlNode *cib_node = pcmk__inject_node_state_change(fake_cib, target,
false);
CRM_ASSERT(cib_node != NULL);
crm_xml_add(cib_node, XML_ATTR_ORIGIN, __func__);
rc = fake_cib->cmds->replace(fake_cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call|cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
// Simulate controller clearing node's resource history and attributes
pcmk__g_strcat(xpath,
"//" XML_CIB_TAG_STATE
"[@" XML_ATTR_UNAME "='", target, "']/" XML_CIB_TAG_LRM,
NULL);
fake_cib->cmds->remove(fake_cib, (const char *) xpath->str, NULL,
cib_xpath|cib_sync_call|cib_scope_local);
g_string_truncate(xpath, 0);
pcmk__g_strcat(xpath,
"//" XML_CIB_TAG_STATE
"[@" XML_ATTR_UNAME "='", target, "']"
"/" XML_TAG_TRANSIENT_NODEATTRS, NULL);
fake_cib->cmds->remove(fake_cib, (const char *) xpath->str, NULL,
cib_xpath|cib_sync_call|cib_scope_local);
free_xml(cib_node);
g_string_free(xpath, TRUE);
}
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(graph, action);
free(target);
return pcmk_rc_ok;
}
enum pcmk__graph_status
pcmk__simulate_transition(pe_working_set_t *data_set, cib_t *cib,
const GList *op_fail_list)
{
pcmk__graph_t *transition = NULL;
enum pcmk__graph_status graph_rc;
pcmk__graph_functions_t simulation_fns = {
simulate_pseudo_action,
simulate_resource_action,
simulate_cluster_action,
simulate_fencing_action,
};
out = data_set->priv;
fake_cib = cib;
fake_op_fail_list = op_fail_list;
if (!out->is_quiet(out)) {
out->begin_list(out, NULL, NULL, "Executing Cluster Transition");
}
pcmk__set_graph_functions(&simulation_fns);
transition = pcmk__unpack_graph(data_set->graph, crm_system_name);
pcmk__log_graph(LOG_DEBUG, transition);
fake_resource_list = data_set->resources;
do {
graph_rc = pcmk__execute_graph(transition);
} while (graph_rc == pcmk__graph_active);
fake_resource_list = NULL;
if (graph_rc != pcmk__graph_complete) {
out->err(out, "Transition failed: %s",
pcmk__graph_status2text(graph_rc));
pcmk__log_graph(LOG_ERR, transition);
out->err(out, "An invalid transition was produced");
}
pcmk__free_graph(transition);
if (!out->is_quiet(out)) {
// If not quiet, we'll need the resulting CIB for later display
xmlNode *cib_object = NULL;
int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object,
cib_sync_call|cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
pe_reset_working_set(data_set);
data_set->input = cib_object;
out->end_list(out);
}
return graph_rc;
}
int
pcmk__simulate(pe_working_set_t *data_set, pcmk__output_t *out,
const pcmk_injections_t *injections, unsigned int flags,
uint32_t section_opts, const char *use_date,
const char *input_file, const char *graph_file,
const char *dot_file)
{
int printed = pcmk_rc_no_output;
int rc = pcmk_rc_ok;
xmlNodePtr input = NULL;
cib_t *cib = NULL;
rc = cib__signon_query(out, &cib, &input);
if (rc != pcmk_rc_ok) {
goto simulate_done;
}
reset(data_set, input, out, use_date, flags);
cluster_status(data_set);
if ((cib->variant == cib_native)
&& pcmk_is_set(section_opts, pcmk_section_times)) {
if (pcmk__our_nodename == NULL) {
// Currently used only in the times section
pcmk__query_node_name(out, 0, &pcmk__our_nodename, 0);
}
data_set->localhost = pcmk__our_nodename;
}
if (!out->is_quiet(out)) {
const bool show_pending = pcmk_is_set(flags, pcmk_sim_show_pending);
if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) {
printed = out->message(out, "maint-mode", data_set->flags);
}
if (data_set->disabled_resources || data_set->blocked_resources) {
PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
printed = out->info(out,
"%d of %d resource instances DISABLED and "
"%d BLOCKED from further action due to failure",
data_set->disabled_resources,
data_set->ninstances,
data_set->blocked_resources);
}
/* Most formatted output headers use caps for each word, but this one
* only has the first word capitalized for compatibility with pcs.
*/
print_cluster_status(data_set, (show_pending? pcmk_show_pending : 0),
section_opts, "Current cluster status",
(printed == pcmk_rc_ok));
printed = pcmk_rc_ok;
}
// If the user requested any injections, handle them
if ((injections->node_down != NULL)
|| (injections->node_fail != NULL)
|| (injections->node_up != NULL)
|| (injections->op_inject != NULL)
|| (injections->ticket_activate != NULL)
|| (injections->ticket_grant != NULL)
|| (injections->ticket_revoke != NULL)
|| (injections->ticket_standby != NULL)
|| (injections->watchdog != NULL)) {
PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
pcmk__inject_scheduler_input(data_set, cib, injections);
printed = pcmk_rc_ok;
rc = cib->cmds->query(cib, NULL, &input, cib_sync_call);
if (rc != pcmk_rc_ok) {
rc = pcmk_legacy2rc(rc);
goto simulate_done;
}
cleanup_calculations(data_set);
reset(data_set, input, out, use_date, flags);
cluster_status(data_set);
}
if (input_file != NULL) {
rc = write_xml_file(input, input_file, FALSE);
if (rc < 0) {
rc = pcmk_legacy2rc(rc);
goto simulate_done;
}
}
if (pcmk_any_flags_set(flags, pcmk_sim_process | pcmk_sim_simulate)) {
pcmk__output_t *logger_out = NULL;
unsigned long long data_set_flags = pe_flag_no_compat;
if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) {
data_set_flags |= pe_flag_show_scores;
}
if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
data_set_flags |= pe_flag_show_utilization;
}
if (pcmk_all_flags_set(data_set->flags,
pe_flag_show_scores|pe_flag_show_utilization)) {
PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
out->begin_list(out, NULL, NULL,
"Assignment Scores and Utilization Information");
printed = pcmk_rc_ok;
} else if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) {
PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
out->begin_list(out, NULL, NULL, "Assignment Scores");
printed = pcmk_rc_ok;
} else if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
out->begin_list(out, NULL, NULL, "Utilization Information");
printed = pcmk_rc_ok;
} else {
rc = pcmk__log_output_new(&logger_out);
if (rc != pcmk_rc_ok) {
goto simulate_done;
}
pe__register_messages(logger_out);
pcmk__register_lib_messages(logger_out);
data_set->priv = logger_out;
}
pcmk__schedule_actions(input, data_set_flags, data_set);
if (logger_out == NULL) {
out->end_list(out);
} else {
logger_out->finish(logger_out, CRM_EX_OK, true, NULL);
pcmk__output_free(logger_out);
data_set->priv = out;
}
input = NULL; /* Don't try and free it twice */
if (graph_file != NULL) {
rc = write_xml_file(data_set->graph, graph_file, FALSE);
if (rc < 0) {
rc = pcmk_rc_graph_error;
goto simulate_done;
}
}
if (dot_file != NULL) {
rc = write_sim_dotfile(data_set, dot_file,
pcmk_is_set(flags, pcmk_sim_all_actions),
pcmk_is_set(flags, pcmk_sim_verbose));
if (rc != pcmk_rc_ok) {
rc = pcmk_rc_dot_error;
goto simulate_done;
}
}
if (!out->is_quiet(out)) {
print_transition_summary(data_set, printed == pcmk_rc_ok);
}
}
rc = pcmk_rc_ok;
if (!pcmk_is_set(flags, pcmk_sim_simulate)) {
goto simulate_done;
}
PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
if (pcmk__simulate_transition(data_set, cib, injections->op_fail)
!= pcmk__graph_complete) {
rc = pcmk_rc_invalid_transition;
}
if (out->is_quiet(out)) {
goto simulate_done;
}
set_effective_date(data_set, true, use_date);
if (pcmk_is_set(flags, pcmk_sim_show_scores)) {
pe__set_working_set_flags(data_set, pe_flag_show_scores);
}
if (pcmk_is_set(flags, pcmk_sim_show_utilization)) {
pe__set_working_set_flags(data_set, pe_flag_show_utilization);
}
cluster_status(data_set);
print_cluster_status(data_set, 0, section_opts, "Revised Cluster Status",
true);
simulate_done:
cib__clean_up_connection(&cib);
return rc;
}
int
pcmk_simulate(xmlNodePtr *xml, pe_working_set_t *data_set,
const pcmk_injections_t *injections, unsigned int flags,
unsigned int section_opts, const char *use_date,
const char *input_file, const char *graph_file,
const char *dot_file)
{
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__xml_output_new(&out, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
pe__register_messages(out);
pcmk__register_lib_messages(out);
rc = pcmk__simulate(data_set, out, injections, flags, section_opts,
use_date, input_file, graph_file, dot_file);
pcmk__xml_output_finish(out, xml);
return rc;
}
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
index 4b142a6857..e7191dbece 100644
--- a/lib/pengine/common.c
+++ b/lib/pengine/common.c
@@ -1,627 +1,627 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/util.h>
#include <glib.h>
#include <crm/pengine/internal.h>
gboolean was_processing_error = FALSE;
gboolean was_processing_warning = FALSE;
static bool
check_placement_strategy(const char *value)
{
return pcmk__strcase_any_of(value, "default", "utilization", "minimal",
"balanced", NULL);
}
static pcmk__cluster_option_t pe_opts[] = {
/* name, old name, type, allowed values,
* default value, validator,
* short description,
* long description
*/
{
"no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide",
"stop", pcmk__valid_quorum,
N_("What to do when the cluster does not have quorum"),
NULL
},
{
"symmetric-cluster", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("Whether resources can run on any node by default"),
NULL
},
{
"maintenance-mode", NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
N_("Whether the cluster should refrain from monitoring, starting, "
"and stopping resources"),
NULL
},
{
"start-failure-is-fatal", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("Whether a start failure should prevent a resource from being "
"recovered on the same node"),
N_("When true, the cluster will immediately ban a resource from a node "
"if it fails to start there. When false, the cluster will instead "
"check the resource's fail count against its migration-threshold.")
},
{
"enable-startup-probes", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("Whether the cluster should check for active resources during start-up"),
NULL
},
{
XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
N_("Whether to lock resources to a cleanly shut down node"),
N_("When true, resources active on a node when it is cleanly shut down "
"are kept \"locked\" to that node (not allowed to run elsewhere) "
"until they start again on that node after it rejoins (or for at "
"most shutdown-lock-limit, if set). Stonith resources and "
"Pacemaker Remote connections are never locked. Clone and bundle "
"instances and the promoted role of promotable clones are "
"currently never locked, though support could be added in a future "
"release.")
},
{
XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
"0", pcmk__valid_interval_spec,
N_("Do not lock resources to a cleanly shut down node longer than "
"this"),
N_("If shutdown-lock is true and this is set to a nonzero time "
"duration, shutdown locks will expire after this much time has "
"passed since the shutdown was initiated, even if the node has not "
"rejoined.")
},
// Fencing-related options
{
"stonith-enabled", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("*** Advanced Use Only *** "
"Whether nodes may be fenced as part of recovery"),
N_("If false, unresponsive nodes are immediately assumed to be harmless, "
"and resources that were active on them may be recovered "
"elsewhere. This can result in a \"split-brain\" situation, "
"potentially leading to data loss and/or service unavailability.")
},
{
"stonith-action", NULL, "select", "reboot, off, poweroff",
"reboot", pcmk__is_fencing_action,
N_("Action to send to fence device when a node needs to be fenced "
"(\"poweroff\" is a deprecated alias for \"off\")"),
NULL
},
{
"stonith-timeout", NULL, "time", NULL,
"60s", pcmk__valid_interval_spec,
N_("*** Advanced Use Only *** Unused by Pacemaker"),
N_("This value is not used by Pacemaker, but is kept for backward "
"compatibility, and certain legacy fence agents might use it.")
},
{
XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
N_("Whether watchdog integration is enabled"),
N_("This is set automatically by the cluster according to whether SBD "
"is detected to be in use. User-configured values are ignored. "
"The value `true` is meaningful if diskless SBD is used and "
"`stonith-watchdog-timeout` is nonzero. In that case, if fencing "
"is required, watchdog-based self-fencing will be performed via "
"SBD without requiring a fencing resource explicitly configured.")
},
{
"concurrent-fencing", NULL, "boolean", NULL,
PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean,
N_("Allow performing fencing operations in parallel"),
NULL
},
{
"startup-fencing", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("*** Advanced Use Only *** Whether to fence unseen nodes at start-up"),
N_("Setting this to false may lead to a \"split-brain\" situation,"
"potentially leading to data loss and/or service unavailability.")
},
{
XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL,
"0", pcmk__valid_interval_spec,
N_("Apply fencing delay targeting the lost nodes with the highest total resource priority"),
N_("Apply specified delay for the fencings that are targeting the lost "
"nodes with the highest total resource priority in case we don't "
"have the majority of the nodes in our cluster partition, so that "
"the more significant nodes potentially win any fencing match, "
"which is especially meaningful under split-brain of 2-node "
"cluster. A promoted resource instance takes the base priority + 1 "
"on calculation if the base priority is not 0. Any static/random "
"delays that are introduced by `pcmk_delay_base/max` configured "
"for the corresponding fencing resources will be added to this "
"delay. This delay should be significantly greater than, safely "
"twice, the maximum `pcmk_delay_base/max`. By default, priority "
"fencing delay is disabled.")
},
{
XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL,
"10min", pcmk__valid_interval_spec,
N_("How long to wait for a node that has joined the cluster to join "
"the process group"),
N_("A node that has joined the cluster can be pending on joining the "
"process group. We wait up to this much time for it. If it times "
"out, fencing targeting the node will be issued if enabled.")
},
{
"cluster-delay", NULL, "time", NULL,
"60s", pcmk__valid_interval_spec,
N_("Maximum time for node-to-node communication"),
N_("The node elected Designated Controller (DC) will consider an action "
"failed if it does not get a response from the node executing the "
"action within this time (after considering the action's own "
"timeout). The \"correct\" value will depend on the speed and "
"load of your network and cluster nodes.")
},
{
"batch-limit", NULL, "integer", NULL,
"0", pcmk__valid_number,
N_("Maximum number of jobs that the cluster may execute in parallel "
"across all nodes"),
N_("The \"correct\" value will depend on the speed and load of your "
"network and cluster nodes. If set to 0, the cluster will "
"impose a dynamically calculated limit when any node has a "
"high load.")
},
{
"migration-limit", NULL, "integer", NULL,
"-1", pcmk__valid_number,
N_("The number of live migration actions that the cluster is allowed "
"to execute in parallel on a node (-1 means no limit)")
},
/* Orphans and stopping */
{
"stop-all-resources", NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
N_("Whether the cluster should stop all active resources"),
NULL
},
{
"stop-orphan-resources", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("Whether to stop resources that were removed from the configuration"),
NULL
},
{
"stop-orphan-actions", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("Whether to cancel recurring actions removed from the configuration"),
NULL
},
{
"remove-after-stop", NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
N_("*** Deprecated *** Whether to remove stopped resources from "
"the executor"),
N_("Values other than default are poorly tested and potentially dangerous."
" This option will be removed in a future release.")
},
/* Storing inputs */
{
"pe-error-series-max", NULL, "integer", NULL,
"-1", pcmk__valid_number,
N_("The number of scheduler inputs resulting in errors to save"),
N_("Zero to disable, -1 to store unlimited.")
},
{
"pe-warn-series-max", NULL, "integer", NULL,
"5000", pcmk__valid_number,
N_("The number of scheduler inputs resulting in warnings to save"),
N_("Zero to disable, -1 to store unlimited.")
},
{
"pe-input-series-max", NULL, "integer", NULL,
"4000", pcmk__valid_number,
N_("The number of scheduler inputs without errors or warnings to save"),
N_("Zero to disable, -1 to store unlimited.")
},
/* Node health */
{
PCMK__OPT_NODE_HEALTH_STRATEGY, NULL, "select",
PCMK__VALUE_NONE ", " PCMK__VALUE_MIGRATE_ON_RED ", "
PCMK__VALUE_ONLY_GREEN ", " PCMK__VALUE_PROGRESSIVE ", "
PCMK__VALUE_CUSTOM,
PCMK__VALUE_NONE, pcmk__validate_health_strategy,
N_("How cluster should react to node health attributes"),
N_("Requires external entities to create node attributes (named with "
"the prefix \"#health\") with values \"red\", "
"\"yellow\", or \"green\".")
},
{
PCMK__OPT_NODE_HEALTH_BASE, NULL, "integer", NULL,
"0", pcmk__valid_number,
N_("Base health score assigned to a node"),
N_("Only used when \"node-health-strategy\" is set to \"progressive\".")
},
{
PCMK__OPT_NODE_HEALTH_GREEN, NULL, "integer", NULL,
"0", pcmk__valid_number,
N_("The score to use for a node health attribute whose value is \"green\""),
N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".")
},
{
PCMK__OPT_NODE_HEALTH_YELLOW, NULL, "integer", NULL,
"0", pcmk__valid_number,
N_("The score to use for a node health attribute whose value is \"yellow\""),
N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".")
},
{
PCMK__OPT_NODE_HEALTH_RED, NULL, "integer", NULL,
"-INFINITY", pcmk__valid_number,
N_("The score to use for a node health attribute whose value is \"red\""),
N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".")
},
/*Placement Strategy*/
{
"placement-strategy", NULL, "select",
"default, utilization, minimal, balanced",
"default", check_placement_strategy,
N_("How the cluster should allocate resources to nodes"),
NULL
},
};
void
pe_metadata(pcmk__output_t *out)
{
const char *desc_short = "Pacemaker scheduler options";
const char *desc_long = "Cluster options used by Pacemaker's scheduler";
gchar *s = pcmk__format_option_metadata("pacemaker-schedulerd", desc_short,
desc_long, pe_opts,
PCMK__NELEM(pe_opts));
out->output_xml(out, "metadata", s);
g_free(s);
}
void
verify_pe_options(GHashTable * options)
{
pcmk__validate_cluster_options(options, pe_opts, PCMK__NELEM(pe_opts));
}
const char *
pe_pref(GHashTable * options, const char *name)
{
return pcmk__cluster_option(options, pe_opts, PCMK__NELEM(pe_opts), name);
}
const char *
fail2text(enum action_fail_response fail)
{
const char *result = "<unknown>";
switch (fail) {
case action_fail_ignore:
result = "ignore";
break;
case action_fail_demote:
result = "demote";
break;
case action_fail_block:
result = "block";
break;
case action_fail_recover:
result = "recover";
break;
case action_fail_migrate:
result = "migrate";
break;
case action_fail_stop:
result = "stop";
break;
case action_fail_fence:
result = "fence";
break;
case action_fail_standby:
result = "standby";
break;
case action_fail_restart_container:
result = "restart-container";
break;
case action_fail_reset_remote:
result = "reset-remote";
break;
}
return result;
}
enum action_tasks
text2task(const char *task)
{
if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)) {
return stop_rsc;
} else if (pcmk__str_eq(task, PCMK_ACTION_STOPPED, pcmk__str_casei)) {
return stopped_rsc;
} else if (pcmk__str_eq(task, PCMK_ACTION_START, pcmk__str_casei)) {
return start_rsc;
} else if (pcmk__str_eq(task, PCMK_ACTION_RUNNING, pcmk__str_casei)) {
return started_rsc;
} else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
return shutdown_crm;
} else if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
return stonith_node;
} else if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
return monitor_rsc;
} else if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
return action_notify;
- } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFIED, pcmk__str_casei)) {
+ } else if (pcmk__str_eq(task, PCMK_ACTION_NOTIFIED, pcmk__str_casei)) {
return action_notified;
} else if (pcmk__str_eq(task, PCMK_ACTION_PROMOTE, pcmk__str_casei)) {
return action_promote;
} else if (pcmk__str_eq(task, PCMK_ACTION_DEMOTE, pcmk__str_casei)) {
return action_demote;
} else if (pcmk__str_eq(task, PCMK_ACTION_PROMOTED, pcmk__str_casei)) {
return action_promoted;
} else if (pcmk__str_eq(task, PCMK_ACTION_DEMOTED, pcmk__str_casei)) {
return action_demoted;
}
#if SUPPORT_TRACING
if (pcmk__str_eq(task, PCMK_ACTION_CANCEL, pcmk__str_casei)) {
return no_action;
} else if (pcmk__str_eq(task, PCMK_ACTION_DELETE, pcmk__str_casei)) {
return no_action;
} else if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
return no_action;
} else if (pcmk__str_eq(task, PCMK_ACTION_MIGRATE_TO, pcmk__str_casei)) {
return no_action;
} else if (pcmk__str_eq(task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_casei)) {
return no_action;
}
crm_trace("Unsupported action: %s", task);
#endif
return no_action;
}
const char *
task2text(enum action_tasks task)
{
const char *result = "<unknown>";
switch (task) {
case no_action:
result = "no_action";
break;
case stop_rsc:
result = PCMK_ACTION_STOP;
break;
case stopped_rsc:
result = PCMK_ACTION_STOPPED;
break;
case start_rsc:
result = PCMK_ACTION_START;
break;
case started_rsc:
result = PCMK_ACTION_RUNNING;
break;
case shutdown_crm:
result = CRM_OP_SHUTDOWN;
break;
case stonith_node:
result = CRM_OP_FENCE;
break;
case monitor_rsc:
result = PCMK_ACTION_MONITOR;
break;
case action_notify:
result = PCMK_ACTION_NOTIFY;
break;
case action_notified:
- result = CRMD_ACTION_NOTIFIED;
+ result = PCMK_ACTION_NOTIFIED;
break;
case action_promote:
result = PCMK_ACTION_PROMOTE;
break;
case action_promoted:
result = PCMK_ACTION_PROMOTED;
break;
case action_demote:
result = PCMK_ACTION_DEMOTE;
break;
case action_demoted:
result = PCMK_ACTION_DEMOTED;
break;
}
return result;
}
const char *
role2text(enum rsc_role_e role)
{
switch (role) {
case RSC_ROLE_UNKNOWN:
return RSC_ROLE_UNKNOWN_S;
case RSC_ROLE_STOPPED:
return RSC_ROLE_STOPPED_S;
case RSC_ROLE_STARTED:
return RSC_ROLE_STARTED_S;
case RSC_ROLE_UNPROMOTED:
#ifdef PCMK__COMPAT_2_0
return RSC_ROLE_UNPROMOTED_LEGACY_S;
#else
return RSC_ROLE_UNPROMOTED_S;
#endif
case RSC_ROLE_PROMOTED:
#ifdef PCMK__COMPAT_2_0
return RSC_ROLE_PROMOTED_LEGACY_S;
#else
return RSC_ROLE_PROMOTED_S;
#endif
}
CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S);
CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S);
// coverity[dead_error_line]
return RSC_ROLE_UNKNOWN_S;
}
enum rsc_role_e
text2role(const char *role)
{
CRM_ASSERT(role != NULL);
if (pcmk__str_eq(role, RSC_ROLE_STOPPED_S, pcmk__str_casei)) {
return RSC_ROLE_STOPPED;
} else if (pcmk__str_eq(role, RSC_ROLE_STARTED_S, pcmk__str_casei)) {
return RSC_ROLE_STARTED;
} else if (pcmk__strcase_any_of(role, RSC_ROLE_UNPROMOTED_S,
RSC_ROLE_UNPROMOTED_LEGACY_S, NULL)) {
return RSC_ROLE_UNPROMOTED;
} else if (pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S,
RSC_ROLE_PROMOTED_LEGACY_S, NULL)) {
return RSC_ROLE_PROMOTED;
} else if (pcmk__str_eq(role, RSC_ROLE_UNKNOWN_S, pcmk__str_casei)) {
return RSC_ROLE_UNKNOWN;
}
crm_err("Unknown role: %s", role);
return RSC_ROLE_UNKNOWN;
}
void
add_hash_param(GHashTable * hash, const char *name, const char *value)
{
CRM_CHECK(hash != NULL, return);
crm_trace("Adding name='%s' value='%s' to hash table",
pcmk__s(name, "<null>"), pcmk__s(value, "<null>"));
if (name == NULL || value == NULL) {
return;
} else if (pcmk__str_eq(value, "#default", pcmk__str_casei)) {
return;
} else if (g_hash_table_lookup(hash, name) == NULL) {
g_hash_table_insert(hash, strdup(name), strdup(value));
}
}
/*!
* \internal
* \brief Look up an attribute value on the appropriate node
*
* If \p node is a guest node and either the \c XML_RSC_ATTR_TARGET meta
* attribute is set to "host" for \p rsc or \p force_host is \c true, query the
* attribute on the node's host. Otherwise, query the attribute on \p node
* itself.
*
* \param[in] node Node to query attribute value on by default
* \param[in] name Name of attribute to query
* \param[in] rsc Resource on whose behalf we're querying
* \param[in] node_type Type of resource location lookup
* \param[in] force_host Force a lookup on the guest node's host, regardless of
* the \c XML_RSC_ATTR_TARGET value
*
* \return Value of the attribute on \p node or on the host of \p node
*
* \note If \p force_host is \c true, \p node \e must be a guest node.
*/
const char *
pe__node_attribute_calculated(const pe_node_t *node, const char *name,
const pe_resource_t *rsc,
enum pe__rsc_node node_type,
bool force_host)
{
// @TODO: Use pe__is_guest_node() after merging libpe_{rules,status}
bool is_guest = (node != NULL) && (node->details->type == node_remote)
&& (node->details->remote_rsc != NULL)
&& (node->details->remote_rsc->container != NULL);
const char *source = NULL;
const char *node_type_s = NULL;
const char *reason = NULL;
const pe_resource_t *container = NULL;
const pe_node_t *host = NULL;
CRM_ASSERT((node != NULL) && (name != NULL) && (rsc != NULL)
&& (!force_host || is_guest));
/* Ignore XML_RSC_ATTR_TARGET if node is not a guest node. This represents a
* user configuration error.
*/
source = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET);
if (!force_host
&& (!is_guest || !pcmk__str_eq(source, "host", pcmk__str_casei))) {
return g_hash_table_lookup(node->details->attrs, name);
}
container = node->details->remote_rsc->container;
switch (node_type) {
case pe__rsc_node_assigned:
node_type_s = "assigned";
host = container->allocated_to;
if (host == NULL) {
reason = "not assigned";
}
break;
case pe__rsc_node_current:
node_type_s = "current";
if (container->running_on != NULL) {
host = container->running_on->data;
}
if (host == NULL) {
reason = "inactive";
}
break;
default:
// Add support for other enum pe__rsc_node values if needed
CRM_ASSERT(false);
break;
}
if (host != NULL) {
const char *value = g_hash_table_lookup(host->details->attrs, name);
pe_rsc_trace(rsc,
"%s: Value lookup for %s on %s container host %s %s%s",
rsc->id, name, node_type_s, pe__node_name(host),
((value != NULL)? "succeeded: " : "failed"),
pcmk__s(value, ""));
return value;
}
pe_rsc_trace(rsc,
"%s: Not looking for %s on %s container host: %s is %s",
rsc->id, name, node_type_s, container->id, reason);
return NULL;
}
const char *
pe_node_attribute_raw(const pe_node_t *node, const char *name)
{
if(node == NULL) {
return NULL;
}
return g_hash_table_lookup(node->details->attrs, name);
}
diff --git a/lib/pengine/pe_notif.c b/lib/pengine/pe_notif.c
index be01d11c57..8fc4fcb2b1 100644
--- a/lib/pengine/pe_notif.c
+++ b/lib/pengine/pe_notif.c
@@ -1,1001 +1,1002 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <crm/pengine/internal.h>
#include <pacemaker-internal.h>
#include "pe_status_private.h"
typedef struct notify_entry_s {
const pe_resource_t *rsc;
const pe_node_t *node;
} notify_entry_t;
/*!
* \internal
* \brief Compare two notification entries
*
* Compare two notification entries, where the one with the alphabetically first
* resource name (or if equal, node name) sorts as first, with NULL sorting as
* less than non-NULL.
*
* \param[in] a First notification entry to compare
* \param[in] b Second notification entry to compare
*
* \return -1 if \p a sorts before \p b, 0 if they are equal, otherwise 1
*/
static gint
compare_notify_entries(gconstpointer a, gconstpointer b)
{
int tmp;
const notify_entry_t *entry_a = a;
const notify_entry_t *entry_b = b;
// NULL a or b is not actually possible
if ((entry_a == NULL) && (entry_b == NULL)) {
return 0;
}
if (entry_a == NULL) {
return 1;
}
if (entry_b == NULL) {
return -1;
}
// NULL resources sort first
if ((entry_a->rsc == NULL) && (entry_b->rsc == NULL)) {
return 0;
}
if (entry_a->rsc == NULL) {
return 1;
}
if (entry_b->rsc == NULL) {
return -1;
}
// Compare resource names
tmp = strcmp(entry_a->rsc->id, entry_b->rsc->id);
if (tmp != 0) {
return tmp;
}
// Otherwise NULL nodes sort first
if ((entry_a->node == NULL) && (entry_b->node == NULL)) {
return 0;
}
if (entry_a->node == NULL) {
return 1;
}
if (entry_b->node == NULL) {
return -1;
}
// Finally, compare node names
return strcmp(entry_a->node->details->id, entry_b->node->details->id);
}
/*!
* \internal
* \brief Duplicate a notification entry
*
* \param[in] entry Entry to duplicate
*
* \return Newly allocated duplicate of \p entry
* \note It is the caller's responsibility to free the return value.
*/
static notify_entry_t *
dup_notify_entry(const notify_entry_t *entry)
{
notify_entry_t *dup = calloc(1, sizeof(notify_entry_t));
CRM_ASSERT(dup != NULL);
dup->rsc = entry->rsc;
dup->node = entry->node;
return dup;
}
/*!
* \internal
* \brief Given a list of nodes, create strings with node names
*
* \param[in] list List of nodes (as pe_node_t *)
* \param[out] all_node_names If not NULL, will be set to space-separated list
* of the names of all nodes in \p list
* \param[out] host_node_names Same as \p all_node_names, except active
* guest nodes will list the name of their host
*
* \note The caller is responsible for freeing the output argument values using
* \p g_string_free().
*/
static void
get_node_names(const GList *list, GString **all_node_names,
GString **host_node_names)
{
if (all_node_names != NULL) {
*all_node_names = NULL;
}
if (host_node_names != NULL) {
*host_node_names = NULL;
}
for (const GList *iter = list; iter != NULL; iter = iter->next) {
const pe_node_t *node = (const pe_node_t *) iter->data;
if (node->details->uname == NULL) {
continue;
}
// Always add to list of all node names
if (all_node_names != NULL) {
pcmk__add_word(all_node_names, 1024, node->details->uname);
}
// Add to host node name list if appropriate
if (host_node_names != NULL) {
if (pe__is_guest_node(node)
&& (node->details->remote_rsc->container->running_on != NULL)) {
node = pe__current_node(node->details->remote_rsc->container);
if (node->details->uname == NULL) {
continue;
}
}
pcmk__add_word(host_node_names, 1024, node->details->uname);
}
}
if ((all_node_names != NULL) && (*all_node_names == NULL)) {
*all_node_names = g_string_new(" ");
}
if ((host_node_names != NULL) && (*host_node_names == NULL)) {
*host_node_names = g_string_new(" ");
}
}
/*!
* \internal
* \brief Create strings of instance and node names from notification entries
*
* \param[in,out] list List of notification entries (will be sorted here)
* \param[out] rsc_names If not NULL, will be set to space-separated list
* of clone instances from \p list
* \param[out] node_names If not NULL, will be set to space-separated list
* of node names from \p list
*
* \return (Possibly new) head of sorted \p list
* \note The caller is responsible for freeing the output argument values using
* \p g_list_free_full() and \p g_string_free().
*/
static GList *
notify_entries_to_strings(GList *list, GString **rsc_names,
GString **node_names)
{
const char *last_rsc_id = NULL;
// Initialize output lists to NULL
if (rsc_names != NULL) {
*rsc_names = NULL;
}
if (node_names != NULL) {
*node_names = NULL;
}
// Sort input list for user-friendliness (and ease of filtering duplicates)
list = g_list_sort(list, compare_notify_entries);
for (GList *gIter = list; gIter != NULL; gIter = gIter->next) {
notify_entry_t *entry = (notify_entry_t *) gIter->data;
// Entry must have a resource (with ID)
CRM_LOG_ASSERT((entry != NULL) && (entry->rsc != NULL)
&& (entry->rsc->id != NULL));
if ((entry == NULL) || (entry->rsc == NULL)
|| (entry->rsc->id == NULL)) {
continue;
}
// Entry must have a node unless listing inactive resources
CRM_LOG_ASSERT((node_names == NULL) || (entry->node != NULL));
if ((node_names != NULL) && (entry->node == NULL)) {
continue;
}
// Don't add duplicates of a particular clone instance
if (pcmk__str_eq(entry->rsc->id, last_rsc_id, pcmk__str_none)) {
continue;
}
last_rsc_id = entry->rsc->id;
if (rsc_names != NULL) {
pcmk__add_word(rsc_names, 1024, entry->rsc->id);
}
if ((node_names != NULL) && (entry->node->details->uname != NULL)) {
pcmk__add_word(node_names, 1024, entry->node->details->uname);
}
}
// If there are no entries, return "empty" lists
if ((rsc_names != NULL) && (*rsc_names == NULL)) {
*rsc_names = g_string_new(" ");
}
if ((node_names != NULL) && (*node_names == NULL)) {
*node_names = g_string_new(" ");
}
return list;
}
/*!
* \internal
* \brief Copy a meta-attribute into a notify action
*
* \param[in] key Name of meta-attribute to copy
* \param[in] value Value of meta-attribute to copy
* \param[in,out] user_data Notify action to copy into
*/
static void
copy_meta_to_notify(gpointer key, gpointer value, gpointer user_data)
{
pe_action_t *notify = (pe_action_t *) user_data;
/* Any existing meta-attributes (for example, the action timeout) are for
* the notify action itself, so don't override those.
*/
if (g_hash_table_lookup(notify->meta, (const char *) key) != NULL) {
return;
}
g_hash_table_insert(notify->meta, strdup((const char *) key),
strdup((const char *) value));
}
static void
add_notify_data_to_action_meta(const notify_data_t *n_data, pe_action_t *action)
{
for (const GSList *item = n_data->keys; item; item = item->next) {
const pcmk_nvpair_t *nvpair = (const pcmk_nvpair_t *) item->data;
add_hash_param(action->meta, nvpair->name, nvpair->value);
}
}
/*!
* \internal
* \brief Create a new notify pseudo-action for a clone resource
*
* \param[in,out] rsc Clone resource that notification is for
* \param[in] action Action to use in notify action key
- * \param[in] notif_action PCMK_ACTION_NOTIFY or RSC_NOTIFIED
+ * \param[in] notif_action PCMK_ACTION_NOTIFY or PCMK_ACTION_NOTIFIED
* \param[in] notif_type "pre", "post", "confirmed-pre", "confirmed-post"
*
* \return Newly created notify pseudo-action
*/
static pe_action_t *
new_notify_pseudo_action(pe_resource_t *rsc, const pe_action_t *action,
const char *notif_action, const char *notif_type)
{
pe_action_t *notify = NULL;
notify = custom_action(rsc,
pcmk__notify_key(rsc->id, notif_type, action->task),
notif_action, NULL,
pcmk_is_set(action->flags, pe_action_optional),
TRUE, rsc->cluster);
pe__set_action_flags(notify, pe_action_pseudo);
add_hash_param(notify->meta, "notify_key_type", notif_type);
add_hash_param(notify->meta, "notify_key_operation", action->task);
return notify;
}
/*!
* \internal
* \brief Create a new notify action for a clone instance
*
* \param[in,out] rsc Clone instance that notification is for
* \param[in] node Node that notification is for
* \param[in,out] op Action that notification is for
* \param[in,out] notify_done Parent pseudo-action for notifications complete
* \param[in] n_data Notification values to add to action meta-data
*
* \return Newly created notify action
*/
static pe_action_t *
new_notify_action(pe_resource_t *rsc, const pe_node_t *node, pe_action_t *op,
pe_action_t *notify_done, const notify_data_t *n_data)
{
char *key = NULL;
pe_action_t *notify_action = NULL;
const char *value = NULL;
const char *task = NULL;
const char *skip_reason = NULL;
CRM_CHECK((rsc != NULL) && (node != NULL), return NULL);
// Ensure we have all the info we need
if (op == NULL) {
skip_reason = "no action";
} else if (notify_done == NULL) {
skip_reason = "no parent notification";
} else if (!node->details->online) {
skip_reason = "node offline";
} else if (!pcmk_is_set(op->flags, pe_action_runnable)) {
skip_reason = "original action not runnable";
}
if (skip_reason != NULL) {
pe_rsc_trace(rsc, "Skipping notify action for %s on %s: %s",
rsc->id, pe__node_name(node), skip_reason);
return NULL;
}
value = g_hash_table_lookup(op->meta, "notify_type"); // "pre" or "post"
task = g_hash_table_lookup(op->meta, "notify_operation"); // original action
pe_rsc_trace(rsc, "Creating notify action for %s on %s (%s-%s)",
rsc->id, pe__node_name(node), value, task);
// Create the notify action
key = pcmk__notify_key(rsc->id, value, task);
notify_action = custom_action(rsc, key, op->task, node,
pcmk_is_set(op->flags, pe_action_optional),
TRUE, rsc->cluster);
// Add meta-data to notify action
g_hash_table_foreach(op->meta, copy_meta_to_notify, notify_action);
add_notify_data_to_action_meta(n_data, notify_action);
// Order notify after original action and before parent notification
order_actions(op, notify_action, pe_order_optional);
order_actions(notify_action, notify_done, pe_order_optional);
return notify_action;
}
/*!
* \internal
* \brief Create a new "post-" notify action for a clone instance
*
* \param[in,out] rsc Clone instance that notification is for
* \param[in] node Node that notification is for
* \param[in,out] n_data Notification values to add to action meta-data
*/
static void
new_post_notify_action(pe_resource_t *rsc, const pe_node_t *node,
notify_data_t *n_data)
{
pe_action_t *notify = NULL;
CRM_ASSERT(n_data != NULL);
// Create the "post-" notify action for specified instance
notify = new_notify_action(rsc, node, n_data->post, n_data->post_done,
n_data);
if (notify != NULL) {
notify->priority = INFINITY;
}
// Order recurring monitors after all "post-" notifications complete
if (n_data->post_done == NULL) {
return;
}
for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
pe_action_t *mon = (pe_action_t *) iter->data;
const char *interval_ms_s = NULL;
interval_ms_s = g_hash_table_lookup(mon->meta,
XML_LRM_ATTR_INTERVAL_MS);
if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)
|| pcmk__str_eq(mon->task, PCMK_ACTION_CANCEL, pcmk__str_none)) {
continue; // Not a recurring monitor
}
order_actions(n_data->post_done, mon, pe_order_optional);
}
}
/*!
* \internal
* \brief Create and order notification pseudo-actions for a clone action
*
* In addition to the actual notify actions needed for each clone instance,
* clone notifications also require pseudo-actions to provide ordering points
* in the notification process. This creates the notification data, along with
* appropriate pseudo-actions and their orderings.
*
* For example, the ordering sequence for starting a clone is:
*
* "pre-" notify pseudo-action for clone
* -> "pre-" notify actions for each clone instance
* -> "pre-" notifications complete pseudo-action for clone
* -> start actions for each clone instance
* -> "started" pseudo-action for clone
* -> "post-" notify pseudo-action for clone
* -> "post-" notify actions for each clone instance
* -> "post-" notifications complete pseudo-action for clone
*
* \param[in,out] rsc Clone that notifications are for
* \param[in] task Name of action that notifications are for
* \param[in,out] action If not NULL, create a "pre-" pseudo-action ordered
* before a "pre-" complete pseudo-action, ordered
* before this action
* \param[in,out] complete If not NULL, create a "post-" pseudo-action ordered
* after this action, and a "post-" complete
* pseudo-action ordered after that
*
* \return Newly created notification data
*/
notify_data_t *
pe__action_notif_pseudo_ops(pe_resource_t *rsc, const char *task,
pe_action_t *action, pe_action_t *complete)
{
notify_data_t *n_data = NULL;
if (!pcmk_is_set(rsc->flags, pe_rsc_notify)) {
return NULL;
}
n_data = calloc(1, sizeof(notify_data_t));
CRM_ASSERT(n_data != NULL);
n_data->action = task;
if (action != NULL) { // Need "pre-" pseudo-actions
// Create "pre-" notify pseudo-action for clone
n_data->pre = new_notify_pseudo_action(rsc, action, PCMK_ACTION_NOTIFY,
"pre");
pe__set_action_flags(n_data->pre, pe_action_runnable);
add_hash_param(n_data->pre->meta, "notify_type", "pre");
add_hash_param(n_data->pre->meta, "notify_operation", n_data->action);
// Create "pre-" notifications complete pseudo-action for clone
- n_data->pre_done = new_notify_pseudo_action(rsc, action, RSC_NOTIFIED,
+ n_data->pre_done = new_notify_pseudo_action(rsc, action,
+ PCMK_ACTION_NOTIFIED,
"confirmed-pre");
pe__set_action_flags(n_data->pre_done, pe_action_runnable);
add_hash_param(n_data->pre_done->meta, "notify_type", "pre");
add_hash_param(n_data->pre_done->meta,
"notify_operation", n_data->action);
// Order "pre-" -> "pre-" complete -> original action
order_actions(n_data->pre, n_data->pre_done, pe_order_optional);
order_actions(n_data->pre_done, action, pe_order_optional);
}
if (complete != NULL) { // Need "post-" pseudo-actions
// Create "post-" notify pseudo-action for clone
n_data->post = new_notify_pseudo_action(rsc, complete,
PCMK_ACTION_NOTIFY, "post");
n_data->post->priority = INFINITY;
if (pcmk_is_set(complete->flags, pe_action_runnable)) {
pe__set_action_flags(n_data->post, pe_action_runnable);
} else {
pe__clear_action_flags(n_data->post, pe_action_runnable);
}
add_hash_param(n_data->post->meta, "notify_type", "post");
add_hash_param(n_data->post->meta, "notify_operation", n_data->action);
// Create "post-" notifications complete pseudo-action for clone
n_data->post_done = new_notify_pseudo_action(rsc, complete,
- RSC_NOTIFIED,
+ PCMK_ACTION_NOTIFIED,
"confirmed-post");
n_data->post_done->priority = INFINITY;
if (pcmk_is_set(complete->flags, pe_action_runnable)) {
pe__set_action_flags(n_data->post_done, pe_action_runnable);
} else {
pe__clear_action_flags(n_data->post_done, pe_action_runnable);
}
add_hash_param(n_data->post_done->meta, "notify_type", "post");
add_hash_param(n_data->post_done->meta,
"notify_operation", n_data->action);
// Order original action complete -> "post-" -> "post-" complete
order_actions(complete, n_data->post, pe_order_implies_then);
order_actions(n_data->post, n_data->post_done, pe_order_implies_then);
}
// If we created both, order "pre-" complete -> "post-"
if ((action != NULL) && (complete != NULL)) {
order_actions(n_data->pre_done, n_data->post, pe_order_optional);
}
return n_data;
}
/*!
* \internal
* \brief Create a new notification entry
*
* \param[in] rsc Resource for notification
* \param[in] node Node for notification
*
* \return Newly allocated notification entry
* \note The caller is responsible for freeing the return value.
*/
static notify_entry_t *
new_notify_entry(const pe_resource_t *rsc, const pe_node_t *node)
{
notify_entry_t *entry = calloc(1, sizeof(notify_entry_t));
CRM_ASSERT(entry != NULL);
entry->rsc = rsc;
entry->node = node;
return entry;
}
/*!
* \internal
* \brief Add notification data for resource state and optionally actions
*
* \param[in] rsc Clone or clone instance being notified
* \param[in] activity Whether to add notification entries for actions
* \param[in,out] n_data Notification data for clone
*/
static void
collect_resource_data(const pe_resource_t *rsc, bool activity,
notify_data_t *n_data)
{
const GList *iter = NULL;
notify_entry_t *entry = NULL;
const pe_node_t *node = NULL;
if (n_data == NULL) {
return;
}
if (n_data->allowed_nodes == NULL) {
n_data->allowed_nodes = rsc->allowed_nodes;
}
// If this is a clone, call recursively for each instance
if (rsc->children != NULL) {
for (iter = rsc->children; iter != NULL; iter = iter->next) {
const pe_resource_t *child = (const pe_resource_t *) iter->data;
collect_resource_data(child, activity, n_data);
}
return;
}
// This is a notification for a single clone instance
if (rsc->running_on != NULL) {
node = rsc->running_on->data; // First is sufficient
}
entry = new_notify_entry(rsc, node);
// Add notification indicating the resource state
switch (rsc->role) {
case RSC_ROLE_STOPPED:
n_data->inactive = g_list_prepend(n_data->inactive, entry);
break;
case RSC_ROLE_STARTED:
n_data->active = g_list_prepend(n_data->active, entry);
break;
case RSC_ROLE_UNPROMOTED:
n_data->unpromoted = g_list_prepend(n_data->unpromoted, entry);
n_data->active = g_list_prepend(n_data->active,
dup_notify_entry(entry));
break;
case RSC_ROLE_PROMOTED:
n_data->promoted = g_list_prepend(n_data->promoted, entry);
n_data->active = g_list_prepend(n_data->active,
dup_notify_entry(entry));
break;
default:
crm_err("Resource %s role on %s (%s) is not supported for "
"notifications (bug?)",
rsc->id, pe__node_name(node), role2text(rsc->role));
free(entry);
break;
}
if (!activity) {
return;
}
// Add notification entries for each of the resource's actions
for (iter = rsc->actions; iter != NULL; iter = iter->next) {
const pe_action_t *op = (const pe_action_t *) iter->data;
if (!pcmk_is_set(op->flags, pe_action_optional) && (op->node != NULL)) {
enum action_tasks task = text2task(op->task);
if ((task == stop_rsc) && op->node->details->unclean) {
// Create anyway (additional noise if node can't be fenced)
} else if (!pcmk_is_set(op->flags, pe_action_runnable)) {
continue;
}
entry = new_notify_entry(rsc, op->node);
switch (task) {
case start_rsc:
n_data->start = g_list_prepend(n_data->start, entry);
break;
case stop_rsc:
n_data->stop = g_list_prepend(n_data->stop, entry);
break;
case action_promote:
n_data->promote = g_list_prepend(n_data->promote, entry);
break;
case action_demote:
n_data->demote = g_list_prepend(n_data->demote, entry);
break;
default:
free(entry);
break;
}
}
}
}
// For (char *) value
#define add_notify_env(n_data, key, value) do { \
n_data->keys = pcmk_prepend_nvpair(n_data->keys, key, value); \
} while (0)
// For (GString *) value
#define add_notify_env_gs(n_data, key, value) do { \
n_data->keys = pcmk_prepend_nvpair(n_data->keys, key, \
(const char *) value->str); \
} while (0)
// For (GString *) value
#define add_notify_env_free_gs(n_data, key, value) do { \
n_data->keys = pcmk_prepend_nvpair(n_data->keys, key, \
(const char *) value->str); \
g_string_free(value, TRUE); value = NULL; \
} while (0)
/*!
* \internal
* \brief Create notification name/value pairs from structured data
*
* \param[in] rsc Resource that notification is for
* \param[in,out] n_data Notification data
*/
static void
add_notif_keys(const pe_resource_t *rsc, notify_data_t *n_data)
{
bool required = false; // Whether to make notify actions required
GString *rsc_list = NULL;
GString *node_list = NULL;
GString *metal_list = NULL;
const char *source = NULL;
GList *nodes = NULL;
n_data->stop = notify_entries_to_strings(n_data->stop,
&rsc_list, &node_list);
if ((strcmp(" ", (const char *) rsc_list->str) != 0)
&& pcmk__str_eq(n_data->action, PCMK_ACTION_STOP, pcmk__str_none)) {
required = true;
}
add_notify_env_free_gs(n_data, "notify_stop_resource", rsc_list);
add_notify_env_free_gs(n_data, "notify_stop_uname", node_list);
if ((n_data->start != NULL)
&& pcmk__str_eq(n_data->action, PCMK_ACTION_START, pcmk__str_none)) {
required = true;
}
n_data->start = notify_entries_to_strings(n_data->start,
&rsc_list, &node_list);
add_notify_env_free_gs(n_data, "notify_start_resource", rsc_list);
add_notify_env_free_gs(n_data, "notify_start_uname", node_list);
if ((n_data->demote != NULL)
&& pcmk__str_eq(n_data->action, PCMK_ACTION_DEMOTE, pcmk__str_none)) {
required = true;
}
n_data->demote = notify_entries_to_strings(n_data->demote,
&rsc_list, &node_list);
add_notify_env_free_gs(n_data, "notify_demote_resource", rsc_list);
add_notify_env_free_gs(n_data, "notify_demote_uname", node_list);
if ((n_data->promote != NULL)
&& pcmk__str_eq(n_data->action, PCMK_ACTION_PROMOTE, pcmk__str_none)) {
required = true;
}
n_data->promote = notify_entries_to_strings(n_data->promote,
&rsc_list, &node_list);
add_notify_env_free_gs(n_data, "notify_promote_resource", rsc_list);
add_notify_env_free_gs(n_data, "notify_promote_uname", node_list);
n_data->active = notify_entries_to_strings(n_data->active,
&rsc_list, &node_list);
add_notify_env_free_gs(n_data, "notify_active_resource", rsc_list);
add_notify_env_free_gs(n_data, "notify_active_uname", node_list);
n_data->unpromoted = notify_entries_to_strings(n_data->unpromoted,
&rsc_list, &node_list);
add_notify_env_gs(n_data, "notify_unpromoted_resource", rsc_list);
add_notify_env_gs(n_data, "notify_unpromoted_uname", node_list);
// Deprecated: kept for backward compatibility with older resource agents
add_notify_env_free_gs(n_data, "notify_slave_resource", rsc_list);
add_notify_env_free_gs(n_data, "notify_slave_uname", node_list);
n_data->promoted = notify_entries_to_strings(n_data->promoted,
&rsc_list, &node_list);
add_notify_env_gs(n_data, "notify_promoted_resource", rsc_list);
add_notify_env_gs(n_data, "notify_promoted_uname", node_list);
// Deprecated: kept for backward compatibility with older resource agents
add_notify_env_free_gs(n_data, "notify_master_resource", rsc_list);
add_notify_env_free_gs(n_data, "notify_master_uname", node_list);
n_data->inactive = notify_entries_to_strings(n_data->inactive,
&rsc_list, NULL);
add_notify_env_free_gs(n_data, "notify_inactive_resource", rsc_list);
nodes = g_hash_table_get_values(n_data->allowed_nodes);
if (!pcmk__is_daemon) {
/* For display purposes, sort the node list, for consistent
* regression test output (while avoiding the performance hit
* for the live cluster).
*/
nodes = g_list_sort(nodes, pe__cmp_node_name);
}
get_node_names(nodes, &node_list, NULL);
add_notify_env_free_gs(n_data, "notify_available_uname", node_list);
g_list_free(nodes);
source = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET);
if (pcmk__str_eq("host", source, pcmk__str_none)) {
get_node_names(rsc->cluster->nodes, &node_list, &metal_list);
add_notify_env_free_gs(n_data, "notify_all_hosts", metal_list);
} else {
get_node_names(rsc->cluster->nodes, &node_list, NULL);
}
add_notify_env_free_gs(n_data, "notify_all_uname", node_list);
if (required && (n_data->pre != NULL)) {
pe__clear_action_flags(n_data->pre, pe_action_optional);
pe__clear_action_flags(n_data->pre_done, pe_action_optional);
}
if (required && (n_data->post != NULL)) {
pe__clear_action_flags(n_data->post, pe_action_optional);
pe__clear_action_flags(n_data->post_done, pe_action_optional);
}
}
/*
* \internal
* \brief Find any remote connection start relevant to an action
*
* \param[in] action Action to check
*
* \return If action is behind a remote connection, connection's start
*/
static pe_action_t *
find_remote_start(pe_action_t *action)
{
if ((action != NULL) && (action->node != NULL)) {
pe_resource_t *remote_rsc = action->node->details->remote_rsc;
if (remote_rsc != NULL) {
return find_first_action(remote_rsc->actions, NULL,
PCMK_ACTION_START,
NULL);
}
}
return NULL;
}
/*!
* \internal
* \brief Create notify actions, and add notify data to original actions
*
* \param[in,out] rsc Clone or clone instance that notification is for
* \param[in,out] n_data Clone notification data for some action
*/
static void
create_notify_actions(pe_resource_t *rsc, notify_data_t *n_data)
{
GList *iter = NULL;
pe_action_t *stop = NULL;
pe_action_t *start = NULL;
enum action_tasks task = text2task(n_data->action);
// If this is a clone, call recursively for each instance
if (rsc->children != NULL) {
g_list_foreach(rsc->children, (GFunc) create_notify_actions, n_data);
return;
}
// Add notification meta-attributes to original actions
for (iter = rsc->actions; iter != NULL; iter = iter->next) {
pe_action_t *op = (pe_action_t *) iter->data;
if (!pcmk_is_set(op->flags, pe_action_optional) && (op->node != NULL)) {
switch (text2task(op->task)) {
case start_rsc:
case stop_rsc:
case action_promote:
case action_demote:
add_notify_data_to_action_meta(n_data, op);
break;
default:
break;
}
}
}
// Skip notify action itself if original action was not needed
switch (task) {
case start_rsc:
if (n_data->start == NULL) {
pe_rsc_trace(rsc, "No notify action needed for %s %s",
rsc->id, n_data->action);
return;
}
break;
case action_promote:
if (n_data->promote == NULL) {
pe_rsc_trace(rsc, "No notify action needed for %s %s",
rsc->id, n_data->action);
return;
}
break;
case action_demote:
if (n_data->demote == NULL) {
pe_rsc_trace(rsc, "No notify action needed for %s %s",
rsc->id, n_data->action);
return;
}
break;
default:
// We cannot do same for stop because it might be implied by fencing
break;
}
pe_rsc_trace(rsc, "Creating notify actions for %s %s",
rsc->id, n_data->action);
// Create notify actions for stop or demote
if ((rsc->role != RSC_ROLE_STOPPED)
&& ((task == stop_rsc) || (task == action_demote))) {
stop = find_first_action(rsc->actions, NULL, PCMK_ACTION_STOP, NULL);
for (iter = rsc->running_on; iter != NULL; iter = iter->next) {
pe_node_t *current_node = (pe_node_t *) iter->data;
/* If a stop is a pseudo-action implied by fencing, don't try to
* notify the node getting fenced.
*/
if ((stop != NULL) && pcmk_is_set(stop->flags, pe_action_pseudo)
&& (current_node->details->unclean
|| current_node->details->remote_requires_reset)) {
continue;
}
new_notify_action(rsc, current_node, n_data->pre,
n_data->pre_done, n_data);
if ((task == action_demote) || (stop == NULL)
|| pcmk_is_set(stop->flags, pe_action_optional)) {
new_post_notify_action(rsc, current_node, n_data);
}
}
}
// Create notify actions for start or promote
if ((rsc->next_role != RSC_ROLE_STOPPED)
&& ((task == start_rsc) || (task == action_promote))) {
start = find_first_action(rsc->actions, NULL, PCMK_ACTION_START, NULL);
if (start != NULL) {
pe_action_t *remote_start = find_remote_start(start);
if ((remote_start != NULL)
&& !pcmk_is_set(remote_start->flags, pe_action_runnable)) {
/* Start and promote actions for a clone instance behind
* a Pacemaker Remote connection happen after the
* connection starts. If the connection start is blocked, do
* not schedule notifications for these actions.
*/
return;
}
}
if (rsc->allocated_to == NULL) {
pe_proc_err("Next role '%s' but %s is not allocated",
role2text(rsc->next_role), rsc->id);
return;
}
if ((task != start_rsc) || (start == NULL)
|| pcmk_is_set(start->flags, pe_action_optional)) {
new_notify_action(rsc, rsc->allocated_to, n_data->pre,
n_data->pre_done, n_data);
}
new_post_notify_action(rsc, rsc->allocated_to, n_data);
}
}
/*!
* \internal
* \brief Create notification data and actions for one clone action
*
* \param[in,out] rsc Clone resource that notification is for
* \param[in,out] n_data Clone notification data for some action
*/
void
pe__create_action_notifications(pe_resource_t *rsc, notify_data_t *n_data)
{
if ((rsc == NULL) || (n_data == NULL)) {
return;
}
collect_resource_data(rsc, true, n_data);
add_notif_keys(rsc, n_data);
create_notify_actions(rsc, n_data);
}
/*!
* \internal
* \brief Free notification data for one action
*
* \param[in,out] n_data Notification data to free
*/
void
pe__free_action_notification_data(notify_data_t *n_data)
{
if (n_data == NULL) {
return;
}
g_list_free_full(n_data->stop, free);
g_list_free_full(n_data->start, free);
g_list_free_full(n_data->demote, free);
g_list_free_full(n_data->promote, free);
g_list_free_full(n_data->promoted, free);
g_list_free_full(n_data->unpromoted, free);
g_list_free_full(n_data->active, free);
g_list_free_full(n_data->inactive, free);
pcmk_free_nvpairs(n_data->keys);
free(n_data);
}
/*!
* \internal
* \brief Order clone "notifications complete" pseudo-action after fencing
*
* If a stop action is implied by fencing, the usual notification pseudo-actions
* will not be sufficient to order things properly, or even create all needed
* notifications if the clone is also stopping on another node, and another
* clone is ordered after it. This function creates new notification
* pseudo-actions relative to the fencing to ensure everything works properly.
*
* \param[in] stop Stop action implied by fencing
* \param[in,out] rsc Clone resource that notification is for
* \param[in,out] stonith_op Fencing action that implies \p stop
*/
void
pe__order_notifs_after_fencing(const pe_action_t *stop, pe_resource_t *rsc,
pe_action_t *stonith_op)
{
notify_data_t *n_data;
crm_info("Ordering notifications for implied %s after fencing", stop->uuid);
n_data = pe__action_notif_pseudo_ops(rsc, PCMK_ACTION_STOP, NULL,
stonith_op);
if (n_data != NULL) {
collect_resource_data(rsc, false, n_data);
add_notify_env(n_data, "notify_stop_resource", rsc->id);
add_notify_env(n_data, "notify_stop_uname", stop->node->details->uname);
create_notify_actions(uber_parent(rsc), n_data);
pe__free_action_notification_data(n_data);
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:17 PM (17 h, 21 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2002561
Default Alt Text
(166 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment