Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/lib/pacemaker/pcmk_scheduler.c b/lib/pacemaker/pcmk_scheduler.c
index af189a2c7d..dc70cc6f04 100644
--- a/lib/pacemaker/pcmk_scheduler.c
+++ b/lib/pacemaker/pcmk_scheduler.c
@@ -1,888 +1,880 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/cib/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/scheduler_internal.h>
#include <glib.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
CRM_TRACE_INIT_DATA(pacemaker);
/*!
* \internal
* \brief Do deferred action checks after assignment
*
* When unpacking the resource history, the scheduler checks for resource
* configurations that have changed since an action was run. However, at that
* time, bundles using the REMOTE_CONTAINER_HACK don't have their final
* parameter information, so instead they add a deferred check to a list. This
* function processes one entry in that list.
*
* \param[in,out] rsc Resource that action history is for
* \param[in,out] node Node that action history is for
* \param[in] rsc_op Action history entry
* \param[in] check Type of deferred check to do
*/
static void
check_params(pcmk_resource_t *rsc, pcmk_node_t *node, const xmlNode *rsc_op,
enum pcmk__check_parameters check)
{
const char *reason = NULL;
pcmk__op_digest_t *digest_data = NULL;
switch (check) {
case pcmk__check_active:
if (pcmk__check_action_config(rsc, node, rsc_op)
&& pe_get_failcount(node, rsc, NULL, pcmk__fc_effective,
NULL)) {
reason = "action definition changed";
}
break;
case pcmk__check_last_failure:
digest_data = rsc_action_digest_cmp(rsc, rsc_op, node,
rsc->priv->scheduler);
switch (digest_data->rc) {
case pcmk__digest_unknown:
crm_trace("Resource %s history entry %s on %s has "
"no digest to compare",
rsc->id, pcmk__xe_id(rsc_op), node->priv->id);
break;
case pcmk__digest_match:
break;
default:
reason = "resource parameters have changed";
break;
}
break;
}
if (reason != NULL) {
pe__clear_failcount(rsc, node, reason, rsc->priv->scheduler);
}
}
/*!
* \internal
* \brief Check whether a resource has failcount clearing scheduled on a node
*
* \param[in] node Node to check
* \param[in] rsc Resource to check
*
* \return true if \p rsc has failcount clearing scheduled on \p node,
* otherwise false
*/
static bool
failcount_clear_action_exists(const pcmk_node_t *node,
const pcmk_resource_t *rsc)
{
GList *list = pe__resource_actions(rsc, node, PCMK_ACTION_CLEAR_FAILCOUNT,
TRUE);
if (list != NULL) {
g_list_free(list);
return true;
}
return false;
}
/*!
* \internal
* \brief Ban a resource from a node if it reached its failure threshold there
*
* \param[in,out] data Resource to check failure threshold for
* \param[in] user_data Node to check resource on
*/
static void
check_failure_threshold(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
const pcmk_node_t *node = user_data;
// If this is a collective resource, apply recursively to children instead
if (rsc->priv->children != NULL) {
g_list_foreach(rsc->priv->children, check_failure_threshold,
user_data);
return;
}
if (!failcount_clear_action_exists(node, rsc)) {
/* Don't force the resource away from this node due to a failcount
* that's going to be cleared.
*
* @TODO Failcount clearing can be scheduled in
* pcmk__handle_rsc_config_changes() via process_rsc_history(), or in
* schedule_resource_actions() via check_params(). This runs well before
* then, so it cannot detect those, meaning we might check the migration
* threshold when we shouldn't. Worst case, we stop or move the
* resource, then move it back in the next transition.
*/
pcmk_resource_t *failed = NULL;
if (pcmk__threshold_reached(rsc, node, &failed)) {
resource_location(failed, node, -PCMK_SCORE_INFINITY,
"__fail_limit__", rsc->priv->scheduler);
}
}
}
/*!
* \internal
* \brief If resource has exclusive discovery, ban node if not allowed
*
* Location constraints have a PCMK_XA_RESOURCE_DISCOVERY option that allows
* users to specify where probes are done for the affected resource. If this is
* set to \c exclusive, probes will only be done on nodes listed in exclusive
* constraints. This function bans the resource from the node if the node is not
* listed.
*
* \param[in,out] data Resource to check
* \param[in] user_data Node to check resource on
*/
static void
apply_exclusive_discovery(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
const pcmk_node_t *node = user_data;
if (pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes)
|| pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
pcmk__rsc_exclusive_probes)) {
pcmk_node_t *match = NULL;
// If this is a collective resource, apply recursively to children
g_list_foreach(rsc->priv->children, apply_exclusive_discovery,
user_data);
match = g_hash_table_lookup(rsc->priv->allowed_nodes,
node->priv->id);
if ((match != NULL)
&& (match->assign->probe_mode != pcmk__probe_exclusive)) {
match->assign->score = -PCMK_SCORE_INFINITY;
}
}
}
/*!
* \internal
* \brief Apply stickiness to a resource if appropriate
*
* \param[in,out] data Resource to check for stickiness
* \param[in] user_data Ignored
*/
static void
apply_stickiness(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
pcmk_node_t *node = NULL;
// If this is a collective resource, apply recursively to children instead
if (rsc->priv->children != NULL) {
g_list_foreach(rsc->priv->children, apply_stickiness, NULL);
return;
}
/* A resource is sticky if it is managed, has stickiness configured, and is
* active on a single node.
*/
if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)
|| (rsc->priv->stickiness < 1)
|| !pcmk__list_of_1(rsc->priv->active_nodes)) {
return;
}
node = rsc->priv->active_nodes->data;
/* In a symmetric cluster, stickiness can always be used. In an
* asymmetric cluster, we have to check whether the resource is still
* allowed on the node, so we don't keep the resource somewhere it is no
* longer explicitly enabled.
*/
if (!pcmk_is_set(rsc->priv->scheduler->flags,
pcmk__sched_symmetric_cluster)
&& (g_hash_table_lookup(rsc->priv->allowed_nodes,
node->priv->id) == NULL)) {
pcmk__rsc_debug(rsc,
"Ignoring %s stickiness because the cluster is "
"asymmetric and %s is not explicitly allowed",
rsc->id, pcmk__node_name(node));
return;
}
pcmk__rsc_debug(rsc, "Resource %s has %d stickiness on %s",
rsc->id, rsc->priv->stickiness, pcmk__node_name(node));
resource_location(rsc, node, rsc->priv->stickiness, "stickiness",
rsc->priv->scheduler);
}
/*!
* \internal
* \brief Apply shutdown locks for all resources as appropriate
*
* \param[in,out] scheduler Scheduler data
*/
static void
apply_shutdown_locks(pcmk_scheduler_t *scheduler)
{
if (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
return;
}
for (GList *iter = scheduler->priv->resources;
iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
rsc->priv->cmds->shutdown_lock(rsc);
}
}
/*
* \internal
* \brief Apply node-specific scheduling criteria
*
* After the CIB has been unpacked, process node-specific scheduling criteria
* including shutdown locks, location constraints, resource stickiness,
* migration thresholds, and exclusive resource discovery.
*/
static void
apply_node_criteria(pcmk_scheduler_t *scheduler)
{
crm_trace("Applying node-specific scheduling criteria");
apply_shutdown_locks(scheduler);
pcmk__apply_locations(scheduler);
g_list_foreach(scheduler->priv->resources, apply_stickiness, NULL);
for (GList *node_iter = scheduler->nodes; node_iter != NULL;
node_iter = node_iter->next) {
for (GList *rsc_iter = scheduler->priv->resources;
rsc_iter != NULL; rsc_iter = rsc_iter->next) {
check_failure_threshold(rsc_iter->data, node_iter->data);
apply_exclusive_discovery(rsc_iter->data, node_iter->data);
}
}
}
/*!
* \internal
* \brief Assign resources to nodes
*
* \param[in,out] scheduler Scheduler data
*/
static void
assign_resources(pcmk_scheduler_t *scheduler)
{
GList *iter = NULL;
crm_trace("Assigning resources to nodes");
if (!pcmk__str_eq(scheduler->priv->placement_strategy, PCMK_VALUE_DEFAULT,
pcmk__str_casei)) {
pcmk__sort_resources(scheduler);
}
pcmk__show_node_capacities("Original", scheduler);
if (pcmk_is_set(scheduler->flags, pcmk__sched_have_remote_nodes)) {
/* Assign remote connection resources first (which will also assign any
* colocation dependencies). If the connection is migrating, always
* prefer the partial migration target.
*/
for (iter = scheduler->priv->resources;
iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
const pcmk_node_t *target = rsc->priv->partial_migration_target;
if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
pcmk__rsc_trace(rsc, "Assigning remote connection resource '%s'",
rsc->id);
rsc->priv->cmds->assign(rsc, target, true);
}
}
}
/* now do the rest of the resources */
for (iter = scheduler->priv->resources; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
if (!pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
pcmk__rsc_trace(rsc, "Assigning %s resource '%s'",
rsc->priv->xml->name, rsc->id);
rsc->priv->cmds->assign(rsc, NULL, true);
}
}
pcmk__show_node_capacities("Remaining", scheduler);
}
/*!
* \internal
* \brief Schedule fail count clearing on online nodes if resource is orphaned
*
* \param[in,out] data Resource to check
* \param[in] user_data Ignored
*/
static void
clear_failcounts_if_orphaned(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
return;
}
crm_trace("Clear fail counts for orphaned resource %s", rsc->id);
/* There's no need to recurse into rsc->private->children because those
* should just be unassigned clone instances.
*/
for (GList *iter = rsc->priv->scheduler->nodes;
iter != NULL; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
pcmk_action_t *clear_op = NULL;
if (!node->details->online) {
continue;
}
if (pe_get_failcount(node, rsc, NULL, pcmk__fc_effective, NULL) == 0) {
continue;
}
clear_op = pe__clear_failcount(rsc, node, "it is orphaned",
rsc->priv->scheduler);
/* We can't use order_action_then_stop() here because its
* pcmk__ar_guest_allowed breaks things
*/
pcmk__new_ordering(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc),
NULL, pcmk__ar_ordered, rsc->priv->scheduler);
}
}
/*!
* \internal
* \brief Schedule any resource actions needed
*
* \param[in,out] scheduler Scheduler data
*/
static void
schedule_resource_actions(pcmk_scheduler_t *scheduler)
{
// Process deferred action checks
pe__foreach_param_check(scheduler, check_params);
pe__free_param_checks(scheduler);
if (pcmk_is_set(scheduler->flags, pcmk__sched_probe_resources)) {
crm_trace("Scheduling probes");
pcmk__schedule_probes(scheduler);
}
if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
g_list_foreach(scheduler->priv->resources, clear_failcounts_if_orphaned,
NULL);
}
crm_trace("Scheduling resource actions");
for (GList *iter = scheduler->priv->resources;
iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
rsc->priv->cmds->create_actions(rsc);
}
}
/*!
* \internal
* \brief Check whether a resource or any of its descendants are managed
*
* \param[in] rsc Resource to check
*
* \return true if resource or any descendant is managed, otherwise false
*/
static bool
is_managed(const pcmk_resource_t *rsc)
{
if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
return true;
}
for (GList *iter = rsc->priv->children;
iter != NULL; iter = iter->next) {
if (is_managed((pcmk_resource_t *) iter->data)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether any resources in the cluster are managed
*
* \param[in] scheduler Scheduler data
*
* \return true if any resource is managed, otherwise false
*/
static bool
any_managed_resources(const pcmk_scheduler_t *scheduler)
{
for (const GList *iter = scheduler->priv->resources;
iter != NULL; iter = iter->next) {
if (is_managed((const pcmk_resource_t *) iter->data)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether a node requires fencing
*
* \param[in] node Node to check
* \param[in] have_managed Whether any resource in cluster is managed
*
* \return true if \p node should be fenced, otherwise false
*/
static bool
needs_fencing(const pcmk_node_t *node, bool have_managed)
{
return have_managed && node->details->unclean
&& pe_can_fence(node->priv->scheduler, node);
}
/*!
* \internal
* \brief Check whether a node requires shutdown
*
* \param[in] node Node to check
*
* \return true if \p node should be shut down, otherwise false
*/
static bool
needs_shutdown(const pcmk_node_t *node)
{
if (pcmk__is_pacemaker_remote_node(node)) {
/* Do not send shutdown actions for Pacemaker Remote nodes.
* @TODO We might come up with a good use for this in the future.
*/
return false;
}
return node->details->online && node->details->shutdown;
}
/*!
* \internal
* \brief Track and order non-DC fencing
*
* \param[in,out] list List of existing non-DC fencing actions
* \param[in,out] action Fencing action to prepend to \p list
* \param[in] scheduler Scheduler data
*
* \return (Possibly new) head of \p list
*/
static GList *
add_nondc_fencing(GList *list, pcmk_action_t *action,
const pcmk_scheduler_t *scheduler)
{
if (!pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing)
&& (list != NULL)) {
/* Concurrent fencing is disabled, so order each non-DC
* fencing in a chain. If there is any DC fencing or
* shutdown, it will be ordered after the last action in the
* chain later.
*/
order_actions((pcmk_action_t *) list->data, action, pcmk__ar_ordered);
}
return g_list_prepend(list, action);
}
/*!
* \internal
* \brief Schedule a node for fencing
*
* \param[in,out] node Node that requires fencing
*/
static pcmk_action_t *
schedule_fencing(pcmk_node_t *node)
{
pcmk_action_t *fencing = pe_fence_op(node, NULL, FALSE, "node is unclean",
FALSE, node->priv->scheduler);
pcmk__sched_warn(node->priv->scheduler, "Scheduling node %s for fencing",
pcmk__node_name(node));
pcmk__order_vs_fence(fencing, node->priv->scheduler);
return fencing;
}
/*!
* \internal
* \brief Create and order node fencing and shutdown actions
*
* \param[in,out] scheduler Scheduler data
*/
static void
schedule_fencing_and_shutdowns(pcmk_scheduler_t *scheduler)
{
pcmk_action_t *dc_down = NULL;
bool integrity_lost = false;
bool have_managed = any_managed_resources(scheduler);
GList *fencing_ops = NULL;
GList *shutdown_ops = NULL;
crm_trace("Scheduling fencing and shutdowns as needed");
if (!have_managed) {
crm_notice("No fencing will be done until there are resources "
"to manage");
}
// Check each node for whether it needs fencing or shutdown
for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
pcmk_action_t *fencing = NULL;
const bool is_dc = pcmk__same_node(node, scheduler->dc_node);
/* Guest nodes are "fenced" by recovering their container resource,
* so handle them separately.
*/
if (pcmk__is_guest_or_bundle_node(node)) {
if (pcmk_is_set(node->priv->flags, pcmk__node_remote_reset)
&& have_managed && pe_can_fence(scheduler, node)) {
pcmk__fence_guest(node);
}
continue;
}
if (needs_fencing(node, have_managed)) {
fencing = schedule_fencing(node);
// Track DC and non-DC fence actions separately
if (is_dc) {
dc_down = fencing;
} else {
fencing_ops = add_nondc_fencing(fencing_ops, fencing,
scheduler);
}
} else if (needs_shutdown(node)) {
pcmk_action_t *down_op = pcmk__new_shutdown_action(node);
// Track DC and non-DC shutdown actions separately
if (is_dc) {
dc_down = down_op;
} else {
shutdown_ops = g_list_prepend(shutdown_ops, down_op);
}
}
if ((fencing == NULL) && node->details->unclean) {
integrity_lost = true;
pcmk__config_warn("Node %s is unclean but cannot be fenced",
pcmk__node_name(node));
}
}
if (integrity_lost) {
if (!pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
pcmk__config_warn("Resource functionality and data integrity "
"cannot be guaranteed (configure, enable, "
"and test fencing to correct this)");
} else if (!pcmk_is_set(scheduler->flags, pcmk__sched_quorate)) {
crm_notice("Unclean nodes will not be fenced until quorum is "
"attained or " PCMK_OPT_NO_QUORUM_POLICY " is set to "
PCMK_VALUE_IGNORE);
}
}
if (dc_down != NULL) {
/* Order any non-DC shutdowns before any DC shutdown, to avoid repeated
* DC elections. However, we don't want to order non-DC shutdowns before
* a DC *fencing*, because even though we don't want a node that's
* shutting down to become DC, the DC fencing could be ordered before a
* clone stop that's also ordered before the shutdowns, thus leading to
* a graph loop.
*/
if (pcmk__str_eq(dc_down->task, PCMK_ACTION_DO_SHUTDOWN,
pcmk__str_none)) {
pcmk__order_after_each(dc_down, shutdown_ops);
}
// Order any non-DC fencing before any DC fencing or shutdown
if (pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) {
/* With concurrent fencing, order each non-DC fencing action
* separately before any DC fencing or shutdown.
*/
pcmk__order_after_each(dc_down, fencing_ops);
} else if (fencing_ops != NULL) {
/* Without concurrent fencing, the non-DC fencing actions are
* already ordered relative to each other, so we just need to order
* the DC fencing after the last action in the chain (which is the
* first item in the list).
*/
order_actions((pcmk_action_t *) fencing_ops->data, dc_down,
pcmk__ar_ordered);
}
}
g_list_free(fencing_ops);
g_list_free(shutdown_ops);
}
static void
log_resource_details(pcmk_scheduler_t *scheduler)
{
pcmk__output_t *out = scheduler->priv->out;
GList *all = NULL;
/* Due to the `crm_mon --node=` feature, out->message() for all the
* resource-related messages expects a list of nodes that we are allowed to
* output information for. Here, we create a wildcard to match all nodes.
*/
all = g_list_prepend(all, (gpointer) "*");
for (GList *item = scheduler->priv->resources;
item != NULL; item = item->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) item->data;
// Log all resources except inactive orphans
if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed)
|| (rsc->priv->orig_role != pcmk_role_stopped)) {
out->message(out, pcmk__map_element_name(rsc->priv->xml), 0UL,
rsc, all, all);
}
}
g_list_free(all);
}
static void
log_all_actions(pcmk_scheduler_t *scheduler)
{
/* This only ever outputs to the log, so ignore whatever output object was
* previously set and just log instead.
*/
pcmk__output_t *prev_out = scheduler->priv->out;
pcmk__output_t *out = NULL;
if (pcmk__log_output_new(&out) != pcmk_rc_ok) {
return;
}
pe__register_messages(out);
pcmk__register_lib_messages(out);
pcmk__output_set_log_level(out, LOG_NOTICE);
scheduler->priv->out = out;
out->begin_list(out, NULL, NULL, "Actions");
pcmk__output_actions(scheduler);
out->end_list(out);
out->finish(out, CRM_EX_OK, true, NULL);
pcmk__output_free(out);
scheduler->priv->out = prev_out;
}
/*!
* \internal
* \brief Log all required but unrunnable actions at trace level
*
* \param[in] scheduler Scheduler data
*/
static void
log_unrunnable_actions(const pcmk_scheduler_t *scheduler)
{
const uint64_t flags = pcmk__action_optional
|pcmk__action_runnable
|pcmk__action_pseudo;
crm_trace("Required but unrunnable actions:");
for (const GList *iter = scheduler->priv->actions;
iter != NULL; iter = iter->next) {
const pcmk_action_t *action = (const pcmk_action_t *) iter->data;
if (!pcmk_any_flags_set(action->flags, flags)) {
pcmk__log_action("\t", action, true);
}
}
}
/*!
* \internal
* \brief Unpack the CIB for scheduling
*
* \param[in,out] cib CIB XML to unpack (may be NULL if already unpacked)
* \param[in] flags Scheduler flags to set in addition to defaults
* \param[in,out] scheduler Scheduler data
*/
static void
unpack_cib(xmlNode *cib, unsigned long long flags, pcmk_scheduler_t *scheduler)
{
- const char* localhost_save = NULL;
-
if (pcmk_is_set(scheduler->flags, pcmk__sched_have_status)) {
crm_trace("Reusing previously calculated cluster status");
pcmk__set_scheduler_flags(scheduler, flags);
return;
}
- localhost_save = scheduler->priv->local_node_name;
-
CRM_ASSERT(cib != NULL);
crm_trace("Calculating cluster status");
/* This will zero the entire struct without freeing anything first, so
* callers should never call pcmk__schedule_actions() with a populated data
* set unless pcmk__sched_have_status is set (i.e. cluster_status() was
* previously called, whether directly or via pcmk__schedule_actions()).
*/
set_working_set_defaults(scheduler);
- if (localhost_save) {
- scheduler->priv->local_node_name = localhost_save;
- }
-
pcmk__set_scheduler_flags(scheduler, flags);
scheduler->input = cib;
cluster_status(scheduler); // Sets pcmk__sched_have_status
}
/*!
* \internal
* \brief Run the scheduler for a given CIB
*
* \param[in,out] cib CIB XML to use as scheduler input
* \param[in] flags Scheduler flags to set in addition to defaults
* \param[in,out] scheduler Scheduler data
*/
void
pcmk__schedule_actions(xmlNode *cib, unsigned long long flags,
pcmk_scheduler_t *scheduler)
{
unpack_cib(cib, flags, scheduler);
pcmk__set_assignment_methods(scheduler);
pcmk__apply_node_health(scheduler);
pcmk__unpack_constraints(scheduler);
if (pcmk_is_set(scheduler->flags, pcmk__sched_validate_only)) {
return;
}
if (!pcmk_is_set(scheduler->flags, pcmk__sched_location_only)
&& pcmk__is_daemon) {
log_resource_details(scheduler);
}
apply_node_criteria(scheduler);
if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) {
return;
}
pcmk__create_internal_constraints(scheduler);
pcmk__handle_rsc_config_changes(scheduler);
assign_resources(scheduler);
schedule_resource_actions(scheduler);
/* Remote ordering constraints need to happen prior to calculating fencing
* because it is one more place we can mark nodes as needing fencing.
*/
pcmk__order_remote_connection_actions(scheduler);
schedule_fencing_and_shutdowns(scheduler);
pcmk__apply_orderings(scheduler);
log_all_actions(scheduler);
pcmk__create_graph(scheduler);
if (get_crm_log_level() == LOG_TRACE) {
log_unrunnable_actions(scheduler);
}
}
/*!
* \internal
* \brief Initialize scheduler data
*
* Make our own copies of the CIB XML and date/time object, if they're not
* \c NULL. This way we don't have to take ownership of the objects passed via
* the API.
*
* This function is most useful for public API functions that want the caller
* to retain ownership of the CIB object
*
* \param[in,out] out Output object
* \param[in] input The CIB XML to check (if \c NULL, use current CIB)
* \param[in] date Date and time to use in the scheduler (if \c NULL,
* use current date and time). This can be used for
* checking whether a rule is in effect at a certa
* date and time.
* \param[out] scheduler Where to store initialized scheduler data
*
* \return Standard Pacemaker return code
*/
int
pcmk__init_scheduler(pcmk__output_t *out, xmlNodePtr input, const crm_time_t *date,
pcmk_scheduler_t **scheduler)
{
// Allows for cleaner syntax than dereferencing the scheduler argument
pcmk_scheduler_t *new_scheduler = NULL;
new_scheduler = pe_new_working_set();
if (new_scheduler == NULL) {
return ENOMEM;
}
pcmk__set_scheduler_flags(new_scheduler, pcmk__sched_no_counts);
// Populate the scheduler data
// Make our own copy of the given input or fetch the CIB and use that
if (input != NULL) {
new_scheduler->input = pcmk__xml_copy(NULL, input);
if (new_scheduler->input == NULL) {
out->err(out, "Failed to copy input XML");
pe_free_working_set(new_scheduler);
return ENOMEM;
}
} else {
int rc = cib__signon_query(out, NULL, &(new_scheduler->input));
if (rc != pcmk_rc_ok) {
pe_free_working_set(new_scheduler);
return rc;
}
}
// Make our own copy of the given crm_time_t object; otherwise
// cluster_status() populates with the current time
if (date != NULL) {
// pcmk_copy_time() guarantees non-NULL
new_scheduler->priv->now = pcmk_copy_time(date);
}
// Unpack everything
cluster_status(new_scheduler);
*scheduler = new_scheduler;
return pcmk_rc_ok;
}
diff --git a/lib/pengine/status.c b/lib/pengine/status.c
index 7da255f614..31580849d0 100644
--- a/lib/pengine/status.c
+++ b/lib/pengine/status.c
@@ -1,528 +1,530 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/cib_internal.h>
#include <glib.h>
#include <crm/pengine/internal.h>
#include <pe_status_private.h>
/*!
* \brief Create a new object to hold scheduler data
*
* \return New, initialized scheduler data on success, else NULL (and set errno)
* \note Only pcmk_scheduler_t objects created with this function (as opposed
* to statically declared or directly allocated) should be used with the
* functions in this library, to allow for future extensions to the
* data type. The caller is responsible for freeing the memory with
* pe_free_working_set() when the instance is no longer needed.
*/
pcmk_scheduler_t *
pe_new_working_set(void)
{
pcmk_scheduler_t *scheduler = calloc(1, sizeof(pcmk_scheduler_t));
if (scheduler == NULL) {
return NULL;
}
scheduler->priv = calloc(1, sizeof(pcmk__scheduler_private_t));
if (scheduler->priv == NULL) {
free(scheduler);
return NULL;
}
set_working_set_defaults(scheduler);
return scheduler;
}
/*!
* \brief Free scheduler data
*
* \param[in,out] scheduler Scheduler data to free
*/
void
pe_free_working_set(pcmk_scheduler_t *scheduler)
{
if (scheduler != NULL) {
pe_reset_working_set(scheduler);
free(scheduler->priv);
free(scheduler);
}
}
#define XPATH_DEPRECATED_RULES \
"//" PCMK_XE_OP_DEFAULTS "//" PCMK_XE_EXPRESSION \
"|//" PCMK_XE_OP "//" PCMK_XE_EXPRESSION
/*!
* \internal
* \brief Log a warning for deprecated rule syntax in operations
*
* \param[in] scheduler Scheduler data
*/
static void
check_for_deprecated_rules(pcmk_scheduler_t *scheduler)
{
// @COMPAT Drop this function when support for the syntax is dropped
xmlNode *deprecated = get_xpath_object(XPATH_DEPRECATED_RULES,
scheduler->input, LOG_NEVER);
if (deprecated != NULL) {
pcmk__warn_once(pcmk__wo_op_attr_expr,
"Support for rules with node attribute expressions in "
PCMK_XE_OP " or " PCMK_XE_OP_DEFAULTS " is deprecated "
"and will be dropped in a future release");
}
}
/*
* Unpack everything
* At the end you'll have:
* - A list of nodes
* - A list of resources (each with any dependencies on other resources)
* - A list of constraints between resources and nodes
* - A list of constraints between start/stop actions
* - A list of nodes that need to be stonith'd
* - A list of nodes that need to be shutdown
* - A list of the possible stop/start actions (without dependencies)
*/
gboolean
cluster_status(pcmk_scheduler_t * scheduler)
{
const char *new_version = NULL;
xmlNode *section = NULL;
if ((scheduler == NULL) || (scheduler->input == NULL)) {
return FALSE;
}
new_version = crm_element_value(scheduler->input, PCMK_XA_CRM_FEATURE_SET);
if (pcmk__check_feature_set(new_version) != pcmk_rc_ok) {
pcmk__config_err("Can't process CIB with feature set '%s' greater than our own '%s'",
new_version, CRM_FEATURE_SET);
return FALSE;
}
crm_trace("Beginning unpack");
if (scheduler->priv->failed != NULL) {
pcmk__xml_free(scheduler->priv->failed);
}
scheduler->priv->failed = pcmk__xe_create(NULL, "failed-ops");
if (scheduler->priv->now == NULL) {
scheduler->priv->now = crm_time_new(NULL);
}
if (pcmk__xe_attr_is_true(scheduler->input, PCMK_XA_HAVE_QUORUM)) {
pcmk__set_scheduler_flags(scheduler, pcmk__sched_quorate);
} else {
pcmk__clear_scheduler_flags(scheduler, pcmk__sched_quorate);
}
scheduler->priv->op_defaults = get_xpath_object("//" PCMK_XE_OP_DEFAULTS,
scheduler->input,
LOG_NEVER);
check_for_deprecated_rules(scheduler);
scheduler->priv->rsc_defaults = get_xpath_object("//" PCMK_XE_RSC_DEFAULTS,
scheduler->input,
LOG_NEVER);
section = get_xpath_object("//" PCMK_XE_CRM_CONFIG, scheduler->input,
LOG_TRACE);
unpack_config(section, scheduler);
if (!pcmk_any_flags_set(scheduler->flags,
pcmk__sched_location_only|pcmk__sched_quorate)
&& (scheduler->no_quorum_policy != pcmk_no_quorum_ignore)) {
pcmk__sched_warn(scheduler,
"Fencing and resource management disabled "
"due to lack of quorum");
}
section = get_xpath_object("//" PCMK_XE_NODES, scheduler->input, LOG_TRACE);
unpack_nodes(section, scheduler);
section = get_xpath_object("//" PCMK_XE_RESOURCES, scheduler->input,
LOG_TRACE);
if (!pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) {
unpack_remote_nodes(section, scheduler);
}
unpack_resources(section, scheduler);
section = get_xpath_object("//" PCMK_XE_FENCING_TOPOLOGY, scheduler->input,
LOG_TRACE);
pcmk__unpack_fencing_topology(section, scheduler);
section = get_xpath_object("//" PCMK_XE_TAGS, scheduler->input, LOG_NEVER);
unpack_tags(section, scheduler);
if (!pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) {
section = get_xpath_object("//" PCMK_XE_STATUS, scheduler->input,
LOG_TRACE);
unpack_status(section, scheduler);
}
if (!pcmk_is_set(scheduler->flags, pcmk__sched_no_counts)) {
for (GList *item = scheduler->priv->resources;
item != NULL; item = item->next) {
pcmk_resource_t *rsc = item->data;
rsc->priv->fns->count(item->data);
}
crm_trace("Cluster resource count: %d (%d disabled, %d blocked)",
scheduler->ninstances, scheduler->disabled_resources,
scheduler->blocked_resources);
}
pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_status);
return TRUE;
}
/*!
* \internal
* \brief Free a list of pcmk_resource_t
*
* \param[in,out] resources List to free
*
* \note When the scheduler's resource list is freed, that includes the original
* storage for the uname and id of any Pacemaker Remote nodes in the
* scheduler's node list, so take care not to use those afterward.
* \todo Refactor pcmk_node_t to strdup() the node name.
*/
static void
pe_free_resources(GList *resources)
{
pcmk_resource_t *rsc = NULL;
GList *iterator = resources;
while (iterator != NULL) {
rsc = (pcmk_resource_t *) iterator->data;
iterator = iterator->next;
rsc->priv->fns->free(rsc);
}
if (resources != NULL) {
g_list_free(resources);
}
}
static void
pe_free_actions(GList *actions)
{
GList *iterator = actions;
while (iterator != NULL) {
pe_free_action(iterator->data);
iterator = iterator->next;
}
if (actions != NULL) {
g_list_free(actions);
}
}
static void
pe_free_nodes(GList *nodes)
{
for (GList *iterator = nodes; iterator != NULL; iterator = iterator->next) {
pcmk_node_t *node = (pcmk_node_t *) iterator->data;
// Shouldn't be possible, but to be safe ...
if (node == NULL) {
continue;
}
if (node->details == NULL) {
free(node);
continue;
}
/* This is called after pe_free_resources(), which means that we can't
* use node->private->name for Pacemaker Remote nodes.
*/
crm_trace("Freeing node %s", (pcmk__is_pacemaker_remote_node(node)?
"(guest or remote)" : pcmk__node_name(node)));
if (node->priv->attrs != NULL) {
g_hash_table_destroy(node->priv->attrs);
}
if (node->priv->utilization != NULL) {
g_hash_table_destroy(node->priv->utilization);
}
if (node->priv->digest_cache != NULL) {
g_hash_table_destroy(node->priv->digest_cache);
}
g_list_free(node->details->running_rsc);
g_list_free(node->priv->assigned_resources);
free(node->priv);
free(node->details);
free(node->assign);
free(node);
}
if (nodes != NULL) {
g_list_free(nodes);
}
}
static void
pe__free_ordering(GList *constraints)
{
GList *iterator = constraints;
while (iterator != NULL) {
pcmk__action_relation_t *order = iterator->data;
iterator = iterator->next;
free(order->task1);
free(order->task2);
free(order);
}
if (constraints != NULL) {
g_list_free(constraints);
}
}
static void
pe__free_location(GList *constraints)
{
GList *iterator = constraints;
while (iterator != NULL) {
pcmk__location_t *cons = iterator->data;
iterator = iterator->next;
g_list_free_full(cons->nodes, free);
free(cons->id);
free(cons);
}
if (constraints != NULL) {
g_list_free(constraints);
}
}
/*!
* \brief Reset scheduler data to defaults without freeing it or constraints
*
* \param[in,out] scheduler Scheduler data to reset
*
* \deprecated This function is deprecated as part of the API;
* pe_reset_working_set() should be used instead.
*/
void
cleanup_calculations(pcmk_scheduler_t *scheduler)
{
if (scheduler == NULL) {
return;
}
pcmk__clear_scheduler_flags(scheduler, pcmk__sched_have_status);
if (scheduler->priv->options != NULL) {
g_hash_table_destroy(scheduler->priv->options);
}
if (scheduler->priv->singletons != NULL) {
g_hash_table_destroy(scheduler->priv->singletons);
}
if (scheduler->priv->ticket_constraints != NULL) {
g_hash_table_destroy(scheduler->priv->ticket_constraints);
}
if (scheduler->priv->templates != NULL) {
g_hash_table_destroy(scheduler->priv->templates);
}
if (scheduler->tags) {
g_hash_table_destroy(scheduler->tags);
}
crm_trace("deleting resources");
pe_free_resources(scheduler->priv->resources);
crm_trace("deleting actions");
pe_free_actions(scheduler->priv->actions);
crm_trace("deleting nodes");
pe_free_nodes(scheduler->nodes);
pe__free_param_checks(scheduler);
g_list_free(scheduler->stop_needed);
crm_time_free(scheduler->priv->now);
pcmk__xml_free(scheduler->input);
pcmk__xml_free(scheduler->priv->failed);
pcmk__xml_free(scheduler->priv->graph);
set_working_set_defaults(scheduler);
CRM_LOG_ASSERT((scheduler->priv->location_constraints == NULL)
&& (scheduler->priv->ordering_constraints == NULL));
}
/*!
* \brief Reset scheduler data to default state without freeing it
*
* \param[in,out] scheduler Scheduler data to reset
*/
void
pe_reset_working_set(pcmk_scheduler_t *scheduler)
{
if (scheduler == NULL) {
return;
}
crm_trace("Deleting %d ordering constraints",
g_list_length(scheduler->priv->ordering_constraints));
pe__free_ordering(scheduler->priv->ordering_constraints);
scheduler->priv->ordering_constraints = NULL;
crm_trace("Deleting %d location constraints",
g_list_length(scheduler->priv->location_constraints));
pe__free_location(scheduler->priv->location_constraints);
scheduler->priv->location_constraints = NULL;
crm_trace("Deleting %d colocation constraints",
g_list_length(scheduler->priv->colocation_constraints));
g_list_free_full(scheduler->priv->colocation_constraints, free);
scheduler->priv->colocation_constraints = NULL;
cleanup_calculations(scheduler);
}
void
set_working_set_defaults(pcmk_scheduler_t *scheduler)
{
// These members must be preserved
pcmk__scheduler_private_t *priv = scheduler->priv;
pcmk__output_t *out = priv->out;
+ const char *local_node_name = scheduler->priv->local_node_name;
// Wipe the main structs (any other members must have previously been freed)
memset(scheduler, 0, sizeof(pcmk_scheduler_t));
memset(priv, 0, sizeof(pcmk__scheduler_private_t));
// Restore the members to preserve
scheduler->priv = priv;
scheduler->priv->out = out;
+ scheduler->priv->local_node_name = local_node_name;
// Set defaults for everything else
scheduler->priv->next_ordering_id = 1;
scheduler->priv->next_action_id = 1;
scheduler->no_quorum_policy = pcmk_no_quorum_stop;
pcmk__set_scheduler_flags(scheduler,
pcmk__sched_symmetric_cluster
|pcmk__sched_stop_removed_resources
|pcmk__sched_cancel_removed_actions);
if (!strcmp(PCMK__CONCURRENT_FENCING_DEFAULT, PCMK_VALUE_TRUE)) {
pcmk__set_scheduler_flags(scheduler, pcmk__sched_concurrent_fencing);
}
}
pcmk_resource_t *
pe_find_resource(GList *rsc_list, const char *id)
{
return pe_find_resource_with_flags(rsc_list, id, pcmk_rsc_match_history);
}
pcmk_resource_t *
pe_find_resource_with_flags(GList *rsc_list, const char *id, enum pe_find flags)
{
GList *rIter = NULL;
for (rIter = rsc_list; id && rIter; rIter = rIter->next) {
pcmk_resource_t *parent = rIter->data;
pcmk_resource_t *match = parent->priv->fns->find_rsc(parent, id, NULL,
flags);
if (match != NULL) {
return match;
}
}
crm_trace("No match for %s", id);
return NULL;
}
/*!
* \brief Find a node by name or ID in a list of nodes
*
* \param[in] nodes List of nodes (as pcmk_node_t*)
* \param[in] id If not NULL, ID of node to find
* \param[in] node_name If not NULL, name of node to find
*
* \return Node from \p nodes that matches \p id if any,
* otherwise node from \p nodes that matches \p uname if any,
* otherwise NULL
*/
pcmk_node_t *
pe_find_node_any(const GList *nodes, const char *id, const char *uname)
{
pcmk_node_t *match = NULL;
if (id != NULL) {
match = pe_find_node_id(nodes, id);
}
if ((match == NULL) && (uname != NULL)) {
match = pcmk__find_node_in_list(nodes, uname);
}
return match;
}
/*!
* \brief Find a node by ID in a list of nodes
*
* \param[in] nodes List of nodes (as pcmk_node_t*)
* \param[in] id ID of node to find
*
* \return Node from \p nodes that matches \p id if any, otherwise NULL
*/
pcmk_node_t *
pe_find_node_id(const GList *nodes, const char *id)
{
for (const GList *iter = nodes; iter != NULL; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
/* @TODO Whether node IDs should be considered case-sensitive should
* probably depend on the node type, so functionizing the comparison
* would be worthwhile
*/
if (pcmk__str_eq(node->priv->id, id, pcmk__str_casei)) {
return node;
}
}
return NULL;
}
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
#include <crm/pengine/status_compat.h>
/*!
* \brief Find a node by name in a list of nodes
*
* \param[in] nodes List of nodes (as pcmk_node_t*)
* \param[in] node_name Name of node to find
*
* \return Node from \p nodes that matches \p node_name if any, otherwise NULL
*/
pcmk_node_t *
pe_find_node(const GList *nodes, const char *node_name)
{
return pcmk__find_node_in_list(nodes, node_name);
}
// LCOV_EXCL_STOP
// End deprecated API

File Metadata

Mime Type
text/x-diff
Expires
Mon, Sep 22, 11:07 PM (12 h, 17 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2403396
Default Alt Text
(45 KB)

Event Timeline