Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F5726981
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
45 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/lib/pacemaker/pcmk_scheduler.c b/lib/pacemaker/pcmk_scheduler.c
index af189a2c7d..dc70cc6f04 100644
--- a/lib/pacemaker/pcmk_scheduler.c
+++ b/lib/pacemaker/pcmk_scheduler.c
@@ -1,888 +1,880 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/cib/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/scheduler_internal.h>
#include <glib.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
CRM_TRACE_INIT_DATA(pacemaker);
/*!
* \internal
* \brief Do deferred action checks after assignment
*
* When unpacking the resource history, the scheduler checks for resource
* configurations that have changed since an action was run. However, at that
* time, bundles using the REMOTE_CONTAINER_HACK don't have their final
* parameter information, so instead they add a deferred check to a list. This
* function processes one entry in that list.
*
* \param[in,out] rsc Resource that action history is for
* \param[in,out] node Node that action history is for
* \param[in] rsc_op Action history entry
* \param[in] check Type of deferred check to do
*/
static void
check_params(pcmk_resource_t *rsc, pcmk_node_t *node, const xmlNode *rsc_op,
enum pcmk__check_parameters check)
{
const char *reason = NULL;
pcmk__op_digest_t *digest_data = NULL;
switch (check) {
case pcmk__check_active:
if (pcmk__check_action_config(rsc, node, rsc_op)
&& pe_get_failcount(node, rsc, NULL, pcmk__fc_effective,
NULL)) {
reason = "action definition changed";
}
break;
case pcmk__check_last_failure:
digest_data = rsc_action_digest_cmp(rsc, rsc_op, node,
rsc->priv->scheduler);
switch (digest_data->rc) {
case pcmk__digest_unknown:
crm_trace("Resource %s history entry %s on %s has "
"no digest to compare",
rsc->id, pcmk__xe_id(rsc_op), node->priv->id);
break;
case pcmk__digest_match:
break;
default:
reason = "resource parameters have changed";
break;
}
break;
}
if (reason != NULL) {
pe__clear_failcount(rsc, node, reason, rsc->priv->scheduler);
}
}
/*!
* \internal
* \brief Check whether a resource has failcount clearing scheduled on a node
*
* \param[in] node Node to check
* \param[in] rsc Resource to check
*
* \return true if \p rsc has failcount clearing scheduled on \p node,
* otherwise false
*/
static bool
failcount_clear_action_exists(const pcmk_node_t *node,
const pcmk_resource_t *rsc)
{
GList *list = pe__resource_actions(rsc, node, PCMK_ACTION_CLEAR_FAILCOUNT,
TRUE);
if (list != NULL) {
g_list_free(list);
return true;
}
return false;
}
/*!
* \internal
* \brief Ban a resource from a node if it reached its failure threshold there
*
* \param[in,out] data Resource to check failure threshold for
* \param[in] user_data Node to check resource on
*/
static void
check_failure_threshold(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
const pcmk_node_t *node = user_data;
// If this is a collective resource, apply recursively to children instead
if (rsc->priv->children != NULL) {
g_list_foreach(rsc->priv->children, check_failure_threshold,
user_data);
return;
}
if (!failcount_clear_action_exists(node, rsc)) {
/* Don't force the resource away from this node due to a failcount
* that's going to be cleared.
*
* @TODO Failcount clearing can be scheduled in
* pcmk__handle_rsc_config_changes() via process_rsc_history(), or in
* schedule_resource_actions() via check_params(). This runs well before
* then, so it cannot detect those, meaning we might check the migration
* threshold when we shouldn't. Worst case, we stop or move the
* resource, then move it back in the next transition.
*/
pcmk_resource_t *failed = NULL;
if (pcmk__threshold_reached(rsc, node, &failed)) {
resource_location(failed, node, -PCMK_SCORE_INFINITY,
"__fail_limit__", rsc->priv->scheduler);
}
}
}
/*!
* \internal
* \brief If resource has exclusive discovery, ban node if not allowed
*
* Location constraints have a PCMK_XA_RESOURCE_DISCOVERY option that allows
* users to specify where probes are done for the affected resource. If this is
* set to \c exclusive, probes will only be done on nodes listed in exclusive
* constraints. This function bans the resource from the node if the node is not
* listed.
*
* \param[in,out] data Resource to check
* \param[in] user_data Node to check resource on
*/
static void
apply_exclusive_discovery(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
const pcmk_node_t *node = user_data;
if (pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes)
|| pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
pcmk__rsc_exclusive_probes)) {
pcmk_node_t *match = NULL;
// If this is a collective resource, apply recursively to children
g_list_foreach(rsc->priv->children, apply_exclusive_discovery,
user_data);
match = g_hash_table_lookup(rsc->priv->allowed_nodes,
node->priv->id);
if ((match != NULL)
&& (match->assign->probe_mode != pcmk__probe_exclusive)) {
match->assign->score = -PCMK_SCORE_INFINITY;
}
}
}
/*!
* \internal
* \brief Apply stickiness to a resource if appropriate
*
* \param[in,out] data Resource to check for stickiness
* \param[in] user_data Ignored
*/
static void
apply_stickiness(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
pcmk_node_t *node = NULL;
// If this is a collective resource, apply recursively to children instead
if (rsc->priv->children != NULL) {
g_list_foreach(rsc->priv->children, apply_stickiness, NULL);
return;
}
/* A resource is sticky if it is managed, has stickiness configured, and is
* active on a single node.
*/
if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)
|| (rsc->priv->stickiness < 1)
|| !pcmk__list_of_1(rsc->priv->active_nodes)) {
return;
}
node = rsc->priv->active_nodes->data;
/* In a symmetric cluster, stickiness can always be used. In an
* asymmetric cluster, we have to check whether the resource is still
* allowed on the node, so we don't keep the resource somewhere it is no
* longer explicitly enabled.
*/
if (!pcmk_is_set(rsc->priv->scheduler->flags,
pcmk__sched_symmetric_cluster)
&& (g_hash_table_lookup(rsc->priv->allowed_nodes,
node->priv->id) == NULL)) {
pcmk__rsc_debug(rsc,
"Ignoring %s stickiness because the cluster is "
"asymmetric and %s is not explicitly allowed",
rsc->id, pcmk__node_name(node));
return;
}
pcmk__rsc_debug(rsc, "Resource %s has %d stickiness on %s",
rsc->id, rsc->priv->stickiness, pcmk__node_name(node));
resource_location(rsc, node, rsc->priv->stickiness, "stickiness",
rsc->priv->scheduler);
}
/*!
* \internal
* \brief Apply shutdown locks for all resources as appropriate
*
* \param[in,out] scheduler Scheduler data
*/
static void
apply_shutdown_locks(pcmk_scheduler_t *scheduler)
{
if (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
return;
}
for (GList *iter = scheduler->priv->resources;
iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
rsc->priv->cmds->shutdown_lock(rsc);
}
}
/*
* \internal
* \brief Apply node-specific scheduling criteria
*
* After the CIB has been unpacked, process node-specific scheduling criteria
* including shutdown locks, location constraints, resource stickiness,
* migration thresholds, and exclusive resource discovery.
*/
static void
apply_node_criteria(pcmk_scheduler_t *scheduler)
{
crm_trace("Applying node-specific scheduling criteria");
apply_shutdown_locks(scheduler);
pcmk__apply_locations(scheduler);
g_list_foreach(scheduler->priv->resources, apply_stickiness, NULL);
for (GList *node_iter = scheduler->nodes; node_iter != NULL;
node_iter = node_iter->next) {
for (GList *rsc_iter = scheduler->priv->resources;
rsc_iter != NULL; rsc_iter = rsc_iter->next) {
check_failure_threshold(rsc_iter->data, node_iter->data);
apply_exclusive_discovery(rsc_iter->data, node_iter->data);
}
}
}
/*!
* \internal
* \brief Assign resources to nodes
*
* \param[in,out] scheduler Scheduler data
*/
static void
assign_resources(pcmk_scheduler_t *scheduler)
{
GList *iter = NULL;
crm_trace("Assigning resources to nodes");
if (!pcmk__str_eq(scheduler->priv->placement_strategy, PCMK_VALUE_DEFAULT,
pcmk__str_casei)) {
pcmk__sort_resources(scheduler);
}
pcmk__show_node_capacities("Original", scheduler);
if (pcmk_is_set(scheduler->flags, pcmk__sched_have_remote_nodes)) {
/* Assign remote connection resources first (which will also assign any
* colocation dependencies). If the connection is migrating, always
* prefer the partial migration target.
*/
for (iter = scheduler->priv->resources;
iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
const pcmk_node_t *target = rsc->priv->partial_migration_target;
if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
pcmk__rsc_trace(rsc, "Assigning remote connection resource '%s'",
rsc->id);
rsc->priv->cmds->assign(rsc, target, true);
}
}
}
/* now do the rest of the resources */
for (iter = scheduler->priv->resources; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
if (!pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
pcmk__rsc_trace(rsc, "Assigning %s resource '%s'",
rsc->priv->xml->name, rsc->id);
rsc->priv->cmds->assign(rsc, NULL, true);
}
}
pcmk__show_node_capacities("Remaining", scheduler);
}
/*!
* \internal
* \brief Schedule fail count clearing on online nodes if resource is orphaned
*
* \param[in,out] data Resource to check
* \param[in] user_data Ignored
*/
static void
clear_failcounts_if_orphaned(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
return;
}
crm_trace("Clear fail counts for orphaned resource %s", rsc->id);
/* There's no need to recurse into rsc->private->children because those
* should just be unassigned clone instances.
*/
for (GList *iter = rsc->priv->scheduler->nodes;
iter != NULL; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
pcmk_action_t *clear_op = NULL;
if (!node->details->online) {
continue;
}
if (pe_get_failcount(node, rsc, NULL, pcmk__fc_effective, NULL) == 0) {
continue;
}
clear_op = pe__clear_failcount(rsc, node, "it is orphaned",
rsc->priv->scheduler);
/* We can't use order_action_then_stop() here because its
* pcmk__ar_guest_allowed breaks things
*/
pcmk__new_ordering(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc),
NULL, pcmk__ar_ordered, rsc->priv->scheduler);
}
}
/*!
* \internal
* \brief Schedule any resource actions needed
*
* \param[in,out] scheduler Scheduler data
*/
static void
schedule_resource_actions(pcmk_scheduler_t *scheduler)
{
// Process deferred action checks
pe__foreach_param_check(scheduler, check_params);
pe__free_param_checks(scheduler);
if (pcmk_is_set(scheduler->flags, pcmk__sched_probe_resources)) {
crm_trace("Scheduling probes");
pcmk__schedule_probes(scheduler);
}
if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
g_list_foreach(scheduler->priv->resources, clear_failcounts_if_orphaned,
NULL);
}
crm_trace("Scheduling resource actions");
for (GList *iter = scheduler->priv->resources;
iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
rsc->priv->cmds->create_actions(rsc);
}
}
/*!
* \internal
* \brief Check whether a resource or any of its descendants are managed
*
* \param[in] rsc Resource to check
*
* \return true if resource or any descendant is managed, otherwise false
*/
static bool
is_managed(const pcmk_resource_t *rsc)
{
if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
return true;
}
for (GList *iter = rsc->priv->children;
iter != NULL; iter = iter->next) {
if (is_managed((pcmk_resource_t *) iter->data)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether any resources in the cluster are managed
*
* \param[in] scheduler Scheduler data
*
* \return true if any resource is managed, otherwise false
*/
static bool
any_managed_resources(const pcmk_scheduler_t *scheduler)
{
for (const GList *iter = scheduler->priv->resources;
iter != NULL; iter = iter->next) {
if (is_managed((const pcmk_resource_t *) iter->data)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether a node requires fencing
*
* \param[in] node Node to check
* \param[in] have_managed Whether any resource in cluster is managed
*
* \return true if \p node should be fenced, otherwise false
*/
static bool
needs_fencing(const pcmk_node_t *node, bool have_managed)
{
return have_managed && node->details->unclean
&& pe_can_fence(node->priv->scheduler, node);
}
/*!
* \internal
* \brief Check whether a node requires shutdown
*
* \param[in] node Node to check
*
* \return true if \p node should be shut down, otherwise false
*/
static bool
needs_shutdown(const pcmk_node_t *node)
{
if (pcmk__is_pacemaker_remote_node(node)) {
/* Do not send shutdown actions for Pacemaker Remote nodes.
* @TODO We might come up with a good use for this in the future.
*/
return false;
}
return node->details->online && node->details->shutdown;
}
/*!
* \internal
* \brief Track and order non-DC fencing
*
* \param[in,out] list List of existing non-DC fencing actions
* \param[in,out] action Fencing action to prepend to \p list
* \param[in] scheduler Scheduler data
*
* \return (Possibly new) head of \p list
*/
static GList *
add_nondc_fencing(GList *list, pcmk_action_t *action,
const pcmk_scheduler_t *scheduler)
{
if (!pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing)
&& (list != NULL)) {
/* Concurrent fencing is disabled, so order each non-DC
* fencing in a chain. If there is any DC fencing or
* shutdown, it will be ordered after the last action in the
* chain later.
*/
order_actions((pcmk_action_t *) list->data, action, pcmk__ar_ordered);
}
return g_list_prepend(list, action);
}
/*!
* \internal
* \brief Schedule a node for fencing
*
* \param[in,out] node Node that requires fencing
*/
static pcmk_action_t *
schedule_fencing(pcmk_node_t *node)
{
pcmk_action_t *fencing = pe_fence_op(node, NULL, FALSE, "node is unclean",
FALSE, node->priv->scheduler);
pcmk__sched_warn(node->priv->scheduler, "Scheduling node %s for fencing",
pcmk__node_name(node));
pcmk__order_vs_fence(fencing, node->priv->scheduler);
return fencing;
}
/*!
* \internal
* \brief Create and order node fencing and shutdown actions
*
* \param[in,out] scheduler Scheduler data
*/
static void
schedule_fencing_and_shutdowns(pcmk_scheduler_t *scheduler)
{
pcmk_action_t *dc_down = NULL;
bool integrity_lost = false;
bool have_managed = any_managed_resources(scheduler);
GList *fencing_ops = NULL;
GList *shutdown_ops = NULL;
crm_trace("Scheduling fencing and shutdowns as needed");
if (!have_managed) {
crm_notice("No fencing will be done until there are resources "
"to manage");
}
// Check each node for whether it needs fencing or shutdown
for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
pcmk_action_t *fencing = NULL;
const bool is_dc = pcmk__same_node(node, scheduler->dc_node);
/* Guest nodes are "fenced" by recovering their container resource,
* so handle them separately.
*/
if (pcmk__is_guest_or_bundle_node(node)) {
if (pcmk_is_set(node->priv->flags, pcmk__node_remote_reset)
&& have_managed && pe_can_fence(scheduler, node)) {
pcmk__fence_guest(node);
}
continue;
}
if (needs_fencing(node, have_managed)) {
fencing = schedule_fencing(node);
// Track DC and non-DC fence actions separately
if (is_dc) {
dc_down = fencing;
} else {
fencing_ops = add_nondc_fencing(fencing_ops, fencing,
scheduler);
}
} else if (needs_shutdown(node)) {
pcmk_action_t *down_op = pcmk__new_shutdown_action(node);
// Track DC and non-DC shutdown actions separately
if (is_dc) {
dc_down = down_op;
} else {
shutdown_ops = g_list_prepend(shutdown_ops, down_op);
}
}
if ((fencing == NULL) && node->details->unclean) {
integrity_lost = true;
pcmk__config_warn("Node %s is unclean but cannot be fenced",
pcmk__node_name(node));
}
}
if (integrity_lost) {
if (!pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
pcmk__config_warn("Resource functionality and data integrity "
"cannot be guaranteed (configure, enable, "
"and test fencing to correct this)");
} else if (!pcmk_is_set(scheduler->flags, pcmk__sched_quorate)) {
crm_notice("Unclean nodes will not be fenced until quorum is "
"attained or " PCMK_OPT_NO_QUORUM_POLICY " is set to "
PCMK_VALUE_IGNORE);
}
}
if (dc_down != NULL) {
/* Order any non-DC shutdowns before any DC shutdown, to avoid repeated
* DC elections. However, we don't want to order non-DC shutdowns before
* a DC *fencing*, because even though we don't want a node that's
* shutting down to become DC, the DC fencing could be ordered before a
* clone stop that's also ordered before the shutdowns, thus leading to
* a graph loop.
*/
if (pcmk__str_eq(dc_down->task, PCMK_ACTION_DO_SHUTDOWN,
pcmk__str_none)) {
pcmk__order_after_each(dc_down, shutdown_ops);
}
// Order any non-DC fencing before any DC fencing or shutdown
if (pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) {
/* With concurrent fencing, order each non-DC fencing action
* separately before any DC fencing or shutdown.
*/
pcmk__order_after_each(dc_down, fencing_ops);
} else if (fencing_ops != NULL) {
/* Without concurrent fencing, the non-DC fencing actions are
* already ordered relative to each other, so we just need to order
* the DC fencing after the last action in the chain (which is the
* first item in the list).
*/
order_actions((pcmk_action_t *) fencing_ops->data, dc_down,
pcmk__ar_ordered);
}
}
g_list_free(fencing_ops);
g_list_free(shutdown_ops);
}
static void
log_resource_details(pcmk_scheduler_t *scheduler)
{
pcmk__output_t *out = scheduler->priv->out;
GList *all = NULL;
/* Due to the `crm_mon --node=` feature, out->message() for all the
* resource-related messages expects a list of nodes that we are allowed to
* output information for. Here, we create a wildcard to match all nodes.
*/
all = g_list_prepend(all, (gpointer) "*");
for (GList *item = scheduler->priv->resources;
item != NULL; item = item->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) item->data;
// Log all resources except inactive orphans
if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed)
|| (rsc->priv->orig_role != pcmk_role_stopped)) {
out->message(out, pcmk__map_element_name(rsc->priv->xml), 0UL,
rsc, all, all);
}
}
g_list_free(all);
}
static void
log_all_actions(pcmk_scheduler_t *scheduler)
{
/* This only ever outputs to the log, so ignore whatever output object was
* previously set and just log instead.
*/
pcmk__output_t *prev_out = scheduler->priv->out;
pcmk__output_t *out = NULL;
if (pcmk__log_output_new(&out) != pcmk_rc_ok) {
return;
}
pe__register_messages(out);
pcmk__register_lib_messages(out);
pcmk__output_set_log_level(out, LOG_NOTICE);
scheduler->priv->out = out;
out->begin_list(out, NULL, NULL, "Actions");
pcmk__output_actions(scheduler);
out->end_list(out);
out->finish(out, CRM_EX_OK, true, NULL);
pcmk__output_free(out);
scheduler->priv->out = prev_out;
}
/*!
* \internal
* \brief Log all required but unrunnable actions at trace level
*
* \param[in] scheduler Scheduler data
*/
static void
log_unrunnable_actions(const pcmk_scheduler_t *scheduler)
{
const uint64_t flags = pcmk__action_optional
|pcmk__action_runnable
|pcmk__action_pseudo;
crm_trace("Required but unrunnable actions:");
for (const GList *iter = scheduler->priv->actions;
iter != NULL; iter = iter->next) {
const pcmk_action_t *action = (const pcmk_action_t *) iter->data;
if (!pcmk_any_flags_set(action->flags, flags)) {
pcmk__log_action("\t", action, true);
}
}
}
/*!
* \internal
* \brief Unpack the CIB for scheduling
*
* \param[in,out] cib CIB XML to unpack (may be NULL if already unpacked)
* \param[in] flags Scheduler flags to set in addition to defaults
* \param[in,out] scheduler Scheduler data
*/
static void
unpack_cib(xmlNode *cib, unsigned long long flags, pcmk_scheduler_t *scheduler)
{
- const char* localhost_save = NULL;
-
if (pcmk_is_set(scheduler->flags, pcmk__sched_have_status)) {
crm_trace("Reusing previously calculated cluster status");
pcmk__set_scheduler_flags(scheduler, flags);
return;
}
- localhost_save = scheduler->priv->local_node_name;
-
CRM_ASSERT(cib != NULL);
crm_trace("Calculating cluster status");
/* This will zero the entire struct without freeing anything first, so
* callers should never call pcmk__schedule_actions() with a populated data
* set unless pcmk__sched_have_status is set (i.e. cluster_status() was
* previously called, whether directly or via pcmk__schedule_actions()).
*/
set_working_set_defaults(scheduler);
- if (localhost_save) {
- scheduler->priv->local_node_name = localhost_save;
- }
-
pcmk__set_scheduler_flags(scheduler, flags);
scheduler->input = cib;
cluster_status(scheduler); // Sets pcmk__sched_have_status
}
/*!
* \internal
* \brief Run the scheduler for a given CIB
*
* \param[in,out] cib CIB XML to use as scheduler input
* \param[in] flags Scheduler flags to set in addition to defaults
* \param[in,out] scheduler Scheduler data
*/
void
pcmk__schedule_actions(xmlNode *cib, unsigned long long flags,
pcmk_scheduler_t *scheduler)
{
unpack_cib(cib, flags, scheduler);
pcmk__set_assignment_methods(scheduler);
pcmk__apply_node_health(scheduler);
pcmk__unpack_constraints(scheduler);
if (pcmk_is_set(scheduler->flags, pcmk__sched_validate_only)) {
return;
}
if (!pcmk_is_set(scheduler->flags, pcmk__sched_location_only)
&& pcmk__is_daemon) {
log_resource_details(scheduler);
}
apply_node_criteria(scheduler);
if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) {
return;
}
pcmk__create_internal_constraints(scheduler);
pcmk__handle_rsc_config_changes(scheduler);
assign_resources(scheduler);
schedule_resource_actions(scheduler);
/* Remote ordering constraints need to happen prior to calculating fencing
* because it is one more place we can mark nodes as needing fencing.
*/
pcmk__order_remote_connection_actions(scheduler);
schedule_fencing_and_shutdowns(scheduler);
pcmk__apply_orderings(scheduler);
log_all_actions(scheduler);
pcmk__create_graph(scheduler);
if (get_crm_log_level() == LOG_TRACE) {
log_unrunnable_actions(scheduler);
}
}
/*!
* \internal
* \brief Initialize scheduler data
*
* Make our own copies of the CIB XML and date/time object, if they're not
* \c NULL. This way we don't have to take ownership of the objects passed via
* the API.
*
* This function is most useful for public API functions that want the caller
* to retain ownership of the CIB object
*
* \param[in,out] out Output object
* \param[in] input The CIB XML to check (if \c NULL, use current CIB)
* \param[in] date Date and time to use in the scheduler (if \c NULL,
* use current date and time). This can be used for
* checking whether a rule is in effect at a certa
* date and time.
* \param[out] scheduler Where to store initialized scheduler data
*
* \return Standard Pacemaker return code
*/
int
pcmk__init_scheduler(pcmk__output_t *out, xmlNodePtr input, const crm_time_t *date,
pcmk_scheduler_t **scheduler)
{
// Allows for cleaner syntax than dereferencing the scheduler argument
pcmk_scheduler_t *new_scheduler = NULL;
new_scheduler = pe_new_working_set();
if (new_scheduler == NULL) {
return ENOMEM;
}
pcmk__set_scheduler_flags(new_scheduler, pcmk__sched_no_counts);
// Populate the scheduler data
// Make our own copy of the given input or fetch the CIB and use that
if (input != NULL) {
new_scheduler->input = pcmk__xml_copy(NULL, input);
if (new_scheduler->input == NULL) {
out->err(out, "Failed to copy input XML");
pe_free_working_set(new_scheduler);
return ENOMEM;
}
} else {
int rc = cib__signon_query(out, NULL, &(new_scheduler->input));
if (rc != pcmk_rc_ok) {
pe_free_working_set(new_scheduler);
return rc;
}
}
// Make our own copy of the given crm_time_t object; otherwise
// cluster_status() populates with the current time
if (date != NULL) {
// pcmk_copy_time() guarantees non-NULL
new_scheduler->priv->now = pcmk_copy_time(date);
}
// Unpack everything
cluster_status(new_scheduler);
*scheduler = new_scheduler;
return pcmk_rc_ok;
}
diff --git a/lib/pengine/status.c b/lib/pengine/status.c
index 7da255f614..31580849d0 100644
--- a/lib/pengine/status.c
+++ b/lib/pengine/status.c
@@ -1,528 +1,530 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/cib_internal.h>
#include <glib.h>
#include <crm/pengine/internal.h>
#include <pe_status_private.h>
/*!
* \brief Create a new object to hold scheduler data
*
* \return New, initialized scheduler data on success, else NULL (and set errno)
* \note Only pcmk_scheduler_t objects created with this function (as opposed
* to statically declared or directly allocated) should be used with the
* functions in this library, to allow for future extensions to the
* data type. The caller is responsible for freeing the memory with
* pe_free_working_set() when the instance is no longer needed.
*/
pcmk_scheduler_t *
pe_new_working_set(void)
{
pcmk_scheduler_t *scheduler = calloc(1, sizeof(pcmk_scheduler_t));
if (scheduler == NULL) {
return NULL;
}
scheduler->priv = calloc(1, sizeof(pcmk__scheduler_private_t));
if (scheduler->priv == NULL) {
free(scheduler);
return NULL;
}
set_working_set_defaults(scheduler);
return scheduler;
}
/*!
* \brief Free scheduler data
*
* \param[in,out] scheduler Scheduler data to free
*/
void
pe_free_working_set(pcmk_scheduler_t *scheduler)
{
if (scheduler != NULL) {
pe_reset_working_set(scheduler);
free(scheduler->priv);
free(scheduler);
}
}
#define XPATH_DEPRECATED_RULES \
"//" PCMK_XE_OP_DEFAULTS "//" PCMK_XE_EXPRESSION \
"|//" PCMK_XE_OP "//" PCMK_XE_EXPRESSION
/*!
* \internal
* \brief Log a warning for deprecated rule syntax in operations
*
* \param[in] scheduler Scheduler data
*/
static void
check_for_deprecated_rules(pcmk_scheduler_t *scheduler)
{
// @COMPAT Drop this function when support for the syntax is dropped
xmlNode *deprecated = get_xpath_object(XPATH_DEPRECATED_RULES,
scheduler->input, LOG_NEVER);
if (deprecated != NULL) {
pcmk__warn_once(pcmk__wo_op_attr_expr,
"Support for rules with node attribute expressions in "
PCMK_XE_OP " or " PCMK_XE_OP_DEFAULTS " is deprecated "
"and will be dropped in a future release");
}
}
/*
* Unpack everything
* At the end you'll have:
* - A list of nodes
* - A list of resources (each with any dependencies on other resources)
* - A list of constraints between resources and nodes
* - A list of constraints between start/stop actions
* - A list of nodes that need to be stonith'd
* - A list of nodes that need to be shutdown
* - A list of the possible stop/start actions (without dependencies)
*/
gboolean
cluster_status(pcmk_scheduler_t * scheduler)
{
const char *new_version = NULL;
xmlNode *section = NULL;
if ((scheduler == NULL) || (scheduler->input == NULL)) {
return FALSE;
}
new_version = crm_element_value(scheduler->input, PCMK_XA_CRM_FEATURE_SET);
if (pcmk__check_feature_set(new_version) != pcmk_rc_ok) {
pcmk__config_err("Can't process CIB with feature set '%s' greater than our own '%s'",
new_version, CRM_FEATURE_SET);
return FALSE;
}
crm_trace("Beginning unpack");
if (scheduler->priv->failed != NULL) {
pcmk__xml_free(scheduler->priv->failed);
}
scheduler->priv->failed = pcmk__xe_create(NULL, "failed-ops");
if (scheduler->priv->now == NULL) {
scheduler->priv->now = crm_time_new(NULL);
}
if (pcmk__xe_attr_is_true(scheduler->input, PCMK_XA_HAVE_QUORUM)) {
pcmk__set_scheduler_flags(scheduler, pcmk__sched_quorate);
} else {
pcmk__clear_scheduler_flags(scheduler, pcmk__sched_quorate);
}
scheduler->priv->op_defaults = get_xpath_object("//" PCMK_XE_OP_DEFAULTS,
scheduler->input,
LOG_NEVER);
check_for_deprecated_rules(scheduler);
scheduler->priv->rsc_defaults = get_xpath_object("//" PCMK_XE_RSC_DEFAULTS,
scheduler->input,
LOG_NEVER);
section = get_xpath_object("//" PCMK_XE_CRM_CONFIG, scheduler->input,
LOG_TRACE);
unpack_config(section, scheduler);
if (!pcmk_any_flags_set(scheduler->flags,
pcmk__sched_location_only|pcmk__sched_quorate)
&& (scheduler->no_quorum_policy != pcmk_no_quorum_ignore)) {
pcmk__sched_warn(scheduler,
"Fencing and resource management disabled "
"due to lack of quorum");
}
section = get_xpath_object("//" PCMK_XE_NODES, scheduler->input, LOG_TRACE);
unpack_nodes(section, scheduler);
section = get_xpath_object("//" PCMK_XE_RESOURCES, scheduler->input,
LOG_TRACE);
if (!pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) {
unpack_remote_nodes(section, scheduler);
}
unpack_resources(section, scheduler);
section = get_xpath_object("//" PCMK_XE_FENCING_TOPOLOGY, scheduler->input,
LOG_TRACE);
pcmk__unpack_fencing_topology(section, scheduler);
section = get_xpath_object("//" PCMK_XE_TAGS, scheduler->input, LOG_NEVER);
unpack_tags(section, scheduler);
if (!pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) {
section = get_xpath_object("//" PCMK_XE_STATUS, scheduler->input,
LOG_TRACE);
unpack_status(section, scheduler);
}
if (!pcmk_is_set(scheduler->flags, pcmk__sched_no_counts)) {
for (GList *item = scheduler->priv->resources;
item != NULL; item = item->next) {
pcmk_resource_t *rsc = item->data;
rsc->priv->fns->count(item->data);
}
crm_trace("Cluster resource count: %d (%d disabled, %d blocked)",
scheduler->ninstances, scheduler->disabled_resources,
scheduler->blocked_resources);
}
pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_status);
return TRUE;
}
/*!
* \internal
* \brief Free a list of pcmk_resource_t
*
* \param[in,out] resources List to free
*
* \note When the scheduler's resource list is freed, that includes the original
* storage for the uname and id of any Pacemaker Remote nodes in the
* scheduler's node list, so take care not to use those afterward.
* \todo Refactor pcmk_node_t to strdup() the node name.
*/
static void
pe_free_resources(GList *resources)
{
pcmk_resource_t *rsc = NULL;
GList *iterator = resources;
while (iterator != NULL) {
rsc = (pcmk_resource_t *) iterator->data;
iterator = iterator->next;
rsc->priv->fns->free(rsc);
}
if (resources != NULL) {
g_list_free(resources);
}
}
static void
pe_free_actions(GList *actions)
{
GList *iterator = actions;
while (iterator != NULL) {
pe_free_action(iterator->data);
iterator = iterator->next;
}
if (actions != NULL) {
g_list_free(actions);
}
}
static void
pe_free_nodes(GList *nodes)
{
for (GList *iterator = nodes; iterator != NULL; iterator = iterator->next) {
pcmk_node_t *node = (pcmk_node_t *) iterator->data;
// Shouldn't be possible, but to be safe ...
if (node == NULL) {
continue;
}
if (node->details == NULL) {
free(node);
continue;
}
/* This is called after pe_free_resources(), which means that we can't
* use node->private->name for Pacemaker Remote nodes.
*/
crm_trace("Freeing node %s", (pcmk__is_pacemaker_remote_node(node)?
"(guest or remote)" : pcmk__node_name(node)));
if (node->priv->attrs != NULL) {
g_hash_table_destroy(node->priv->attrs);
}
if (node->priv->utilization != NULL) {
g_hash_table_destroy(node->priv->utilization);
}
if (node->priv->digest_cache != NULL) {
g_hash_table_destroy(node->priv->digest_cache);
}
g_list_free(node->details->running_rsc);
g_list_free(node->priv->assigned_resources);
free(node->priv);
free(node->details);
free(node->assign);
free(node);
}
if (nodes != NULL) {
g_list_free(nodes);
}
}
static void
pe__free_ordering(GList *constraints)
{
GList *iterator = constraints;
while (iterator != NULL) {
pcmk__action_relation_t *order = iterator->data;
iterator = iterator->next;
free(order->task1);
free(order->task2);
free(order);
}
if (constraints != NULL) {
g_list_free(constraints);
}
}
static void
pe__free_location(GList *constraints)
{
GList *iterator = constraints;
while (iterator != NULL) {
pcmk__location_t *cons = iterator->data;
iterator = iterator->next;
g_list_free_full(cons->nodes, free);
free(cons->id);
free(cons);
}
if (constraints != NULL) {
g_list_free(constraints);
}
}
/*!
* \brief Reset scheduler data to defaults without freeing it or constraints
*
* \param[in,out] scheduler Scheduler data to reset
*
* \deprecated This function is deprecated as part of the API;
* pe_reset_working_set() should be used instead.
*/
void
cleanup_calculations(pcmk_scheduler_t *scheduler)
{
if (scheduler == NULL) {
return;
}
pcmk__clear_scheduler_flags(scheduler, pcmk__sched_have_status);
if (scheduler->priv->options != NULL) {
g_hash_table_destroy(scheduler->priv->options);
}
if (scheduler->priv->singletons != NULL) {
g_hash_table_destroy(scheduler->priv->singletons);
}
if (scheduler->priv->ticket_constraints != NULL) {
g_hash_table_destroy(scheduler->priv->ticket_constraints);
}
if (scheduler->priv->templates != NULL) {
g_hash_table_destroy(scheduler->priv->templates);
}
if (scheduler->tags) {
g_hash_table_destroy(scheduler->tags);
}
crm_trace("deleting resources");
pe_free_resources(scheduler->priv->resources);
crm_trace("deleting actions");
pe_free_actions(scheduler->priv->actions);
crm_trace("deleting nodes");
pe_free_nodes(scheduler->nodes);
pe__free_param_checks(scheduler);
g_list_free(scheduler->stop_needed);
crm_time_free(scheduler->priv->now);
pcmk__xml_free(scheduler->input);
pcmk__xml_free(scheduler->priv->failed);
pcmk__xml_free(scheduler->priv->graph);
set_working_set_defaults(scheduler);
CRM_LOG_ASSERT((scheduler->priv->location_constraints == NULL)
&& (scheduler->priv->ordering_constraints == NULL));
}
/*!
* \brief Reset scheduler data to default state without freeing it
*
* \param[in,out] scheduler Scheduler data to reset
*/
void
pe_reset_working_set(pcmk_scheduler_t *scheduler)
{
if (scheduler == NULL) {
return;
}
crm_trace("Deleting %d ordering constraints",
g_list_length(scheduler->priv->ordering_constraints));
pe__free_ordering(scheduler->priv->ordering_constraints);
scheduler->priv->ordering_constraints = NULL;
crm_trace("Deleting %d location constraints",
g_list_length(scheduler->priv->location_constraints));
pe__free_location(scheduler->priv->location_constraints);
scheduler->priv->location_constraints = NULL;
crm_trace("Deleting %d colocation constraints",
g_list_length(scheduler->priv->colocation_constraints));
g_list_free_full(scheduler->priv->colocation_constraints, free);
scheduler->priv->colocation_constraints = NULL;
cleanup_calculations(scheduler);
}
void
set_working_set_defaults(pcmk_scheduler_t *scheduler)
{
// These members must be preserved
pcmk__scheduler_private_t *priv = scheduler->priv;
pcmk__output_t *out = priv->out;
+ const char *local_node_name = scheduler->priv->local_node_name;
// Wipe the main structs (any other members must have previously been freed)
memset(scheduler, 0, sizeof(pcmk_scheduler_t));
memset(priv, 0, sizeof(pcmk__scheduler_private_t));
// Restore the members to preserve
scheduler->priv = priv;
scheduler->priv->out = out;
+ scheduler->priv->local_node_name = local_node_name;
// Set defaults for everything else
scheduler->priv->next_ordering_id = 1;
scheduler->priv->next_action_id = 1;
scheduler->no_quorum_policy = pcmk_no_quorum_stop;
pcmk__set_scheduler_flags(scheduler,
pcmk__sched_symmetric_cluster
|pcmk__sched_stop_removed_resources
|pcmk__sched_cancel_removed_actions);
if (!strcmp(PCMK__CONCURRENT_FENCING_DEFAULT, PCMK_VALUE_TRUE)) {
pcmk__set_scheduler_flags(scheduler, pcmk__sched_concurrent_fencing);
}
}
pcmk_resource_t *
pe_find_resource(GList *rsc_list, const char *id)
{
return pe_find_resource_with_flags(rsc_list, id, pcmk_rsc_match_history);
}
pcmk_resource_t *
pe_find_resource_with_flags(GList *rsc_list, const char *id, enum pe_find flags)
{
GList *rIter = NULL;
for (rIter = rsc_list; id && rIter; rIter = rIter->next) {
pcmk_resource_t *parent = rIter->data;
pcmk_resource_t *match = parent->priv->fns->find_rsc(parent, id, NULL,
flags);
if (match != NULL) {
return match;
}
}
crm_trace("No match for %s", id);
return NULL;
}
/*!
* \brief Find a node by name or ID in a list of nodes
*
* \param[in] nodes List of nodes (as pcmk_node_t*)
* \param[in] id If not NULL, ID of node to find
* \param[in] node_name If not NULL, name of node to find
*
* \return Node from \p nodes that matches \p id if any,
* otherwise node from \p nodes that matches \p uname if any,
* otherwise NULL
*/
pcmk_node_t *
pe_find_node_any(const GList *nodes, const char *id, const char *uname)
{
pcmk_node_t *match = NULL;
if (id != NULL) {
match = pe_find_node_id(nodes, id);
}
if ((match == NULL) && (uname != NULL)) {
match = pcmk__find_node_in_list(nodes, uname);
}
return match;
}
/*!
* \brief Find a node by ID in a list of nodes
*
* \param[in] nodes List of nodes (as pcmk_node_t*)
* \param[in] id ID of node to find
*
* \return Node from \p nodes that matches \p id if any, otherwise NULL
*/
pcmk_node_t *
pe_find_node_id(const GList *nodes, const char *id)
{
for (const GList *iter = nodes; iter != NULL; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
/* @TODO Whether node IDs should be considered case-sensitive should
* probably depend on the node type, so functionizing the comparison
* would be worthwhile
*/
if (pcmk__str_eq(node->priv->id, id, pcmk__str_casei)) {
return node;
}
}
return NULL;
}
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
#include <crm/pengine/status_compat.h>
/*!
* \brief Find a node by name in a list of nodes
*
* \param[in] nodes List of nodes (as pcmk_node_t*)
* \param[in] node_name Name of node to find
*
* \return Node from \p nodes that matches \p node_name if any, otherwise NULL
*/
pcmk_node_t *
pe_find_node(const GList *nodes, const char *node_name)
{
return pcmk__find_node_in_list(nodes, node_name);
}
// LCOV_EXCL_STOP
// End deprecated API
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Sep 22, 11:07 PM (14 m, 27 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2403396
Default Alt Text
(45 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment