Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F1842198
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
71 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/schedulerd/schedulerd_messages.c b/daemons/schedulerd/schedulerd_messages.c
index 2f7deb4915..b956ecaaa5 100644
--- a/daemons/schedulerd/schedulerd_messages.c
+++ b/daemons/schedulerd/schedulerd_messages.c
@@ -1,335 +1,329 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <pacemaker-internal.h>
#include <stdbool.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "pacemaker-schedulerd.h"
static GHashTable *schedulerd_handlers = NULL;
static pcmk_scheduler_t *
init_working_set(void)
{
pcmk_scheduler_t *scheduler = pe_new_working_set();
pcmk__mem_assert(scheduler);
-
- crm_config_error = FALSE;
- crm_config_warning = FALSE;
-
- was_processing_error = FALSE;
- was_processing_warning = FALSE;
-
scheduler->priv = logger_out;
return scheduler;
}
static xmlNode *
handle_pecalc_request(pcmk__request_t *request)
{
static struct series_s {
const char *name;
const char *param;
/* Maximum number of inputs of this kind to save to disk.
* If -1, save all; if 0, save none.
*/
int wrap;
} series[] = {
{ "pe-error", PCMK_OPT_PE_ERROR_SERIES_MAX, -1 },
{ "pe-warn", PCMK_OPT_PE_WARN_SERIES_MAX, 5000 },
{ "pe-input", PCMK_OPT_PE_INPUT_SERIES_MAX, 4000 },
};
xmlNode *msg = request->xml;
xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL);
xmlNode *xml_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
static char *last_digest = NULL;
static char *filename = NULL;
unsigned int seq;
int series_id = 0;
int series_wrap = 0;
char *digest = NULL;
const char *value = NULL;
time_t execution_date = time(NULL);
xmlNode *converted = NULL;
xmlNode *reply = NULL;
bool is_repoke = false;
bool process = true;
pcmk_scheduler_t *scheduler = init_working_set();
pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE);
digest = pcmk__digest_xml(xml_data, false);
converted = pcmk__xml_copy(NULL, xml_data);
if (pcmk_update_configured_schema(&converted, true) != pcmk_rc_ok) {
scheduler->graph = pcmk__xe_create(NULL, PCMK__XE_TRANSITION_GRAPH);
crm_xml_add_int(scheduler->graph, "transition_id", 0);
crm_xml_add_int(scheduler->graph, PCMK_OPT_CLUSTER_DELAY, 0);
process = false;
free(digest);
} else if (pcmk__str_eq(digest, last_digest, pcmk__str_casei)) {
is_repoke = true;
free(digest);
} else {
free(last_digest);
last_digest = digest;
}
if (process) {
pcmk__schedule_actions(converted,
pcmk__sched_no_counts
|pcmk__sched_no_compat
|pcmk__sched_show_utilization, scheduler);
}
// Get appropriate index into series[] array
- if (was_processing_error || crm_config_error) {
+ if (pcmk_is_set(scheduler->flags, pcmk__sched_processing_error)
+ || crm_config_error) {
series_id = 0;
} else if (was_processing_warning || crm_config_warning) {
series_id = 1;
} else {
series_id = 2;
}
value = pcmk__cluster_option(scheduler->config_hash,
series[series_id].param);
if ((value == NULL)
|| (pcmk__scan_min_int(value, &series_wrap, -1) != pcmk_rc_ok)) {
series_wrap = series[series_id].wrap;
}
if (pcmk__read_series_sequence(PE_STATE_DIR, series[series_id].name,
&seq) != pcmk_rc_ok) {
// @TODO maybe handle errors better ...
seq = 0;
}
crm_trace("Series %s: wrap=%d, seq=%u, pref=%s",
series[series_id].name, series_wrap, seq, value);
scheduler->input = NULL;
reply = create_reply(msg, scheduler->graph);
if (reply == NULL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Failed building ping reply for client %s",
pcmk__client_name(request->ipc_client));
goto done;
}
if (series_wrap == 0) { // Don't save any inputs of this kind
free(filename);
filename = NULL;
} else if (!is_repoke) { // Input changed, save to disk
free(filename);
filename = pcmk__series_filename(PE_STATE_DIR,
series[series_id].name, seq, true);
}
crm_xml_add(reply, PCMK__XA_CRM_TGRAPH_IN, filename);
pcmk__log_transition_summary(scheduler, filename);
if (series_wrap == 0) {
crm_debug("Not saving input to disk (disabled by configuration)");
} else if (is_repoke) {
crm_info("Input has not changed since last time, not saving to disk");
} else {
unlink(filename);
crm_xml_add_ll(xml_data, PCMK_XA_EXECUTION_DATE,
(long long) execution_date);
pcmk__xml_write_file(xml_data, filename, true);
pcmk__write_series_sequence(PE_STATE_DIR, series[series_id].name,
++seq, series_wrap);
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
done:
pcmk__xml_free(converted);
pe_free_working_set(scheduler);
return reply;
}
static xmlNode *
handle_unknown_request(pcmk__request_t *request)
{
pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
PCMK__XE_ACK, NULL, CRM_EX_INVALID_PARAM);
pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
"Unknown IPC request type '%s' (bug?)",
pcmk__client_name(request->ipc_client));
return NULL;
}
static xmlNode *
handle_hello_request(pcmk__request_t *request)
{
pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags,
PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE);
crm_trace("Received IPC hello from %s", pcmk__client_name(request->ipc_client));
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
static void
schedulerd_register_handlers(void)
{
pcmk__server_command_t handlers[] = {
{ CRM_OP_HELLO, handle_hello_request },
{ CRM_OP_PECALC, handle_pecalc_request },
{ NULL, handle_unknown_request },
};
schedulerd_handlers = pcmk__register_handlers(handlers);
}
static int32_t
pe_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return 0;
}
static int32_t
pe_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
xmlNode *msg = NULL;
pcmk__client_t *c = pcmk__find_client(qbc);
const char *sys_to = NULL;
CRM_CHECK(c != NULL, return 0);
if (schedulerd_handlers == NULL) {
schedulerd_register_handlers();
}
msg = pcmk__client_data2xml(c, data, &id, &flags);
if (msg == NULL) {
pcmk__ipc_send_ack(c, id, flags, PCMK__XE_ACK, NULL, CRM_EX_PROTOCOL);
return 0;
}
sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
if (pcmk__str_eq(crm_element_value(msg, PCMK__XA_SUBT),
PCMK__VALUE_RESPONSE, pcmk__str_none)) {
pcmk__ipc_send_ack(c, id, flags, PCMK__XE_ACK, NULL,
CRM_EX_INDETERMINATE);
crm_info("Ignoring IPC reply from %s", pcmk__client_name(c));
} else if (!pcmk__str_eq(sys_to, CRM_SYSTEM_PENGINE, pcmk__str_none)) {
pcmk__ipc_send_ack(c, id, flags, PCMK__XE_ACK, NULL,
CRM_EX_INDETERMINATE);
crm_info("Ignoring invalid IPC message: to '%s' not "
CRM_SYSTEM_PENGINE, pcmk__s(sys_to, ""));
} else {
char *log_msg = NULL;
const char *reason = NULL;
xmlNode *reply = NULL;
pcmk__request_t request = {
.ipc_client = c,
.ipc_id = id,
.ipc_flags = flags,
.peer = NULL,
.xml = msg,
.call_options = 0,
.result = PCMK__UNKNOWN_RESULT,
};
request.op = crm_element_value_copy(request.xml, PCMK__XA_CRM_TASK);
CRM_CHECK(request.op != NULL, return 0);
reply = pcmk__process_request(&request, schedulerd_handlers);
if (reply != NULL) {
pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event);
pcmk__xml_free(reply);
}
reason = request.result.exit_reason;
log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s",
request.op, pcmk__request_origin_type(&request),
pcmk__request_origin(&request),
pcmk_exec_status_str(request.result.execution_status),
(reason == NULL)? "" : " (",
(reason == NULL)? "" : reason,
(reason == NULL)? "" : ")");
if (!pcmk__result_ok(&request.result)) {
crm_warn("%s", log_msg);
} else {
crm_debug("%s", log_msg);
}
free(log_msg);
pcmk__reset_request(&request);
}
pcmk__xml_free(msg);
return 0;
}
/* Error code means? */
static int32_t
pe_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
pcmk__free_client(client);
return 0;
}
static void
pe_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
pe_ipc_closed(c);
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = pe_ipc_accept,
.connection_created = NULL,
.msg_process = pe_ipc_dispatch,
.connection_closed = pe_ipc_closed,
.connection_destroyed = pe_ipc_destroy
};
diff --git a/include/crm/common/scheduler.h b/include/crm/common/scheduler.h
index 27da726dcf..4dba20116c 100644
--- a/include/crm/common/scheduler.h
+++ b/include/crm/common/scheduler.h
@@ -1,177 +1,176 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_COMMON_SCHEDULER__H
#define PCMK__CRM_COMMON_SCHEDULER__H
#include <sys/types.h> // time_t
#include <libxml/tree.h> // xmlNode
#include <glib.h> // guint, GList, GHashTable
#include <crm/common/iso8601.h> // crm_time_t
#include <crm/common/actions.h>
#include <crm/common/nodes.h>
#include <crm/common/resources.h>
#include <crm/common/roles.h>
#include <crm/common/rules.h>
#include <crm/common/scheduler_types.h>
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \file
* \brief Scheduler API
* \ingroup core
*/
// NOTE: sbd (as of at least 1.5.2) uses this enum
//! Possible responses to loss of quorum
enum pe_quorum_policy {
pcmk_no_quorum_freeze, //<! Do not recover resources from outside partition
pcmk_no_quorum_stop, //<! Stop all resources in partition
pcmk_no_quorum_ignore, //<! Act as if partition still holds quorum
pcmk_no_quorum_fence, //<! Fence all nodes in partition
pcmk_no_quorum_demote, //<! Demote promotable resources and stop all others
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
// NOTE: sbd (as of at least 1.5.2) uses this value
//! \deprecated Use pcmk_no_quorum_freeze instead
no_quorum_freeze = pcmk_no_quorum_freeze,
// NOTE: sbd (as of at least 1.5.2) uses this value
//! \deprecated Use pcmk_no_quorum_stop instead
no_quorum_stop = pcmk_no_quorum_stop,
// NOTE: sbd (as of at least 1.5.2) uses this value
//! \deprecated Use pcmk_no_quorum_ignore instead
no_quorum_ignore = pcmk_no_quorum_ignore,
//! \deprecated Use pcmk_no_quorum_fence instead
no_quorum_suicide = pcmk_no_quorum_fence,
// NOTE: sbd (as of at least 1.5.2) uses this value
//! \deprecated Use pcmk_no_quorum_demote instead
no_quorum_demote = pcmk_no_quorum_demote,
#endif
};
// Implementation of pcmk_scheduler_t
// @COMPAT Make contents internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
struct pe_working_set_s {
// Be careful about when each piece of information is available and final
// NOTE: sbd (as of at least 1.5.2) uses this
//! \deprecated Set scheduler input with pcmk_set_scheduler_cib() instead
xmlNode *input; // CIB XML
crm_time_t *now; // Current time for evaluation purposes
char *dc_uuid; // Node ID of designated controller
// NOTE: sbd (as of at least 1.5.2) uses this
//! \deprecated Call pcmk_get_dc() instead
pcmk_node_t *dc_node; // Node object for DC
const char *stonith_action; // Default fencing action
const char *placement_strategy; // Value of placement-strategy property
// NOTE: sbd (as of at least 1.5.2) uses this
// @COMPAT Change to uint64_t at a compatibility break
//! \deprecated Call pcmk_has_quorum() to check quorum
unsigned long long flags; // Group of enum pcmk__scheduler_flags
int stonith_timeout; // Value of stonith-timeout property
// NOTE: sbd (as of at least 1.5.2) uses this
//! \deprecated Call pcmk_get_no_quorum_policy() to get no-quorum policy
enum pe_quorum_policy no_quorum_policy; // Response to loss of quorum
GHashTable *config_hash; // Cluster properties
// Ticket constraints unpacked from ticket state
GHashTable *tickets;
// Actions for which there can be only one (such as "fence node X")
GHashTable *singletons;
// NOTE: sbd (as of at least 1.5.2) uses this
//! \deprecated Call pcmk_find_node() to find a node instead
GList *nodes; // Nodes in cluster
GList *resources; // Resources in cluster
GList *placement_constraints; // Location constraints
GList *ordering_constraints; // Ordering constraints
GList *colocation_constraints; // Colocation constraints
// Ticket constraints unpacked by libpacemaker
GList *ticket_constraints;
GList *actions; // Scheduled actions
xmlNode *failed; // History entries of failed actions
xmlNode *op_defaults; // Configured operation defaults
xmlNode *rsc_defaults; // Configured resource defaults
int num_synapse; // Number of transition graph synapses
int max_valid_nodes; // \deprecated Do not use
int order_id; // ID to use for next created ordering
int action_id; // ID to use for next created action
xmlNode *graph; // Transition graph
GHashTable *template_rsc_sets; // Mappings of template ID to resource ID
// @COMPAT Replace this with a fencer variable (only place it's used)
const char *localhost; // \deprecated Do not use
GHashTable *tags; // Configuration tags (ID -> pcmk__idref_t*)
int blocked_resources; // Number of blocked resources in cluster
int disabled_resources; // Number of disabled resources in cluster
GList *param_check; // History entries that need to be checked
GList *stop_needed; // Containers that need stop actions
time_t recheck_by; // Hint to controller when to reschedule
int ninstances; // Total number of resource instances
guint shutdown_lock; // How long to lock resources (seconds)
int priority_fencing_delay; // Priority fencing delay
// pcmk__output_t *
void *priv; // For Pacemaker use only
guint node_pending_timeout; // Pending join times out after this (ms)
};
//!@}
/* Whether the scheduler input currently being processed has warnings or errors
*
* @COMPAT When we can break API compatibility, we should make these
* internal-only. Ideally they would be converted to pcmk__scheduler_flags
* values, but everywhere they're needed doesn't currently have access to the
* scheduler data.
*/
//!@{
//! \deprecated Do not use
-extern gboolean was_processing_error;
extern gboolean was_processing_warning;
//!@}
pcmk_node_t *pcmk_get_dc(const pcmk_scheduler_t *scheduler);
enum pe_quorum_policy pcmk_get_no_quorum_policy(const pcmk_scheduler_t
*scheduler);
int pcmk_set_scheduler_cib(pcmk_scheduler_t *scheduler, xmlNode *cib);
bool pcmk_has_quorum(const pcmk_scheduler_t *scheduler);
pcmk_node_t *pcmk_find_node(const pcmk_scheduler_t *scheduler,
const char *node_name);
#ifdef __cplusplus
}
#endif
#endif // PCMK__CRM_COMMON_SCHEDULER__H
diff --git a/include/crm/common/scheduler_internal.h b/include/crm/common/scheduler_internal.h
index 237d5a4f9e..cd441e9146 100644
--- a/include/crm/common/scheduler_internal.h
+++ b/include/crm/common/scheduler_internal.h
@@ -1,249 +1,253 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_COMMON_SCHEDULER_INTERNAL__H
#define PCMK__CRM_COMMON_SCHEDULER_INTERNAL__H
#include <crm/common/action_relation_internal.h>
#include <crm/common/actions_internal.h>
#include <crm/common/attrs_internal.h>
#include <crm/common/bundles_internal.h>
#include <crm/common/clone_internal.h>
#include <crm/common/digest_internal.h>
#include <crm/common/failcounts_internal.h>
#include <crm/common/group_internal.h>
#include <crm/common/history_internal.h>
#include <crm/common/location_internal.h>
#include <crm/common/nodes_internal.h>
#include <crm/common/primitive_internal.h>
#include <crm/common/remote_internal.h>
#include <crm/common/resources_internal.h>
#include <crm/common/roles_internal.h>
#include <crm/common/rules_internal.h>
#include <crm/common/tickets_internal.h>
#ifdef __cplusplus
extern "C" {
#endif
enum pcmk__check_parameters {
/* Clear fail count if parameters changed for un-expired start or monitor
* last_failure.
*/
pcmk__check_last_failure,
/* Clear fail count if parameters changed for start, monitor, promote, or
* migrate_from actions for active resources.
*/
pcmk__check_active,
};
// Scheduling options and conditions
enum pcmk__scheduler_flags {
// No scheduler flags set (compare with equality rather than bit set)
pcmk__sched_none = 0ULL,
/* These flags are dynamically determined conditions */
// Whether partition has quorum (via \c PCMK_XA_HAVE_QUORUM attribute)
//! \deprecated Call pcmk_has_quorum() to check quorum instead
pcmk__sched_quorate = (1ULL << 0),
// Whether cluster is symmetric (via symmetric-cluster property)
pcmk__sched_symmetric_cluster = (1ULL << 1),
+ // Whether scheduling encountered a non-configuration error
+ pcmk__sched_processing_error = (1ULL << 2),
+
// Whether cluster is in maintenance mode (via maintenance-mode property)
pcmk__sched_in_maintenance = (1ULL << 3),
// Whether fencing is enabled (via stonith-enabled property)
pcmk__sched_fencing_enabled = (1ULL << 4),
// Whether cluster has a fencing resource (via CIB resources)
/*! \deprecated To indicate the cluster has a fencing resource, add either a
* fencing resource configuration or the have-watchdog cluster option to the
* input CIB
*/
pcmk__sched_have_fencing = (1ULL << 5),
// Whether any resource provides or requires unfencing (via CIB resources)
pcmk__sched_enable_unfencing = (1ULL << 6),
// Whether concurrent fencing is allowed (via concurrent-fencing property)
pcmk__sched_concurrent_fencing = (1ULL << 7),
/*
* Whether resources removed from the configuration should be stopped (via
* stop-orphan-resources property)
*/
pcmk__sched_stop_removed_resources = (1ULL << 8),
/*
* Whether recurring actions removed from the configuration should be
* cancelled (via stop-orphan-actions property)
*/
pcmk__sched_cancel_removed_actions = (1ULL << 9),
// Whether to stop all resources (via stop-all-resources property)
pcmk__sched_stop_all = (1ULL << 10),
/*
* Whether start failure should be treated as if
* \c PCMK_META_MIGRATION_THRESHOLD is 1 (via
* \c PCMK_OPT_START_FAILURE_IS_FATAL property)
*/
pcmk__sched_start_failure_fatal = (1ULL << 12),
// Unused
pcmk__sched_remove_after_stop = (1ULL << 13),
// Whether unseen nodes should be fenced (via startup-fencing property)
pcmk__sched_startup_fencing = (1ULL << 14),
/*
* Whether resources should be left stopped when their node shuts down
* cleanly (via shutdown-lock property)
*/
pcmk__sched_shutdown_lock = (1ULL << 15),
/*
* Whether resources' current state should be probed (when unknown) before
* scheduling any other actions (via the enable-startup-probes property)
*/
pcmk__sched_probe_resources = (1ULL << 16),
// Whether the CIB status section has been parsed yet
pcmk__sched_have_status = (1ULL << 17),
// Whether the cluster includes any Pacemaker Remote nodes (via CIB)
pcmk__sched_have_remote_nodes = (1ULL << 18),
/* The remaining flags are scheduling options that must be set explicitly */
/*
* Whether to skip unpacking the CIB status section and stop the scheduling
* sequence after applying node-specific location criteria (skipping
* assignment, ordering, actions, etc.).
*/
pcmk__sched_location_only = (1ULL << 20),
// Whether sensitive resource attributes have been masked
pcmk__sched_sanitized = (1ULL << 21),
// Skip counting of total, disabled, and blocked resource instances
pcmk__sched_no_counts = (1ULL << 23),
/*
* Skip deprecated code kept solely for backward API compatibility
* (internal code should always set this)
*/
pcmk__sched_no_compat = (1ULL << 24),
// Whether node scores should be output instead of logged
pcmk__sched_output_scores = (1ULL << 25),
// Whether to show node and resource utilization (in log or output)
pcmk__sched_show_utilization = (1ULL << 26),
/*
* Whether to stop the scheduling sequence after unpacking the CIB,
* calculating cluster status, and applying node health (skipping
* applying node-specific location criteria, assignment, etc.)
*/
pcmk__sched_validate_only = (1ULL << 27),
};
// Group of enum pcmk__warnings flags for warnings we want to log once
extern uint32_t pcmk__warnings;
/*!
* \internal
* \brief Log a resource-tagged message at info severity
*
* \param[in] rsc Tag message with this resource's ID
* \param[in] fmt... printf(3)-style format and arguments
*/
#define pcmk__rsc_info(rsc, fmt, args...) \
crm_log_tag(LOG_INFO, ((rsc) == NULL)? "<NULL>" : (rsc)->id, (fmt), ##args)
/*!
* \internal
* \brief Log a resource-tagged message at debug severity
*
* \param[in] rsc Tag message with this resource's ID
* \param[in] fmt... printf(3)-style format and arguments
*/
#define pcmk__rsc_debug(rsc, fmt, args...) \
crm_log_tag(LOG_DEBUG, ((rsc) == NULL)? "<NULL>" : (rsc)->id, (fmt), ##args)
/*!
* \internal
* \brief Log a resource-tagged message at trace severity
*
* \param[in] rsc Tag message with this resource's ID
* \param[in] fmt... printf(3)-style format and arguments
*/
#define pcmk__rsc_trace(rsc, fmt, args...) \
crm_log_tag(LOG_TRACE, ((rsc) == NULL)? "<NULL>" : (rsc)->id, (fmt), ##args)
/*!
* \internal
* \brief Log an error and remember that current scheduler input has errors
*
* \param[in,out] scheduler Scheduler data
* \param[in] fmt... printf(3)-style format and arguments
*/
-#define pcmk__sched_err(scheduler, fmt...) do { \
- was_processing_error = TRUE; \
- crm_err(fmt); \
+#define pcmk__sched_err(scheduler, fmt...) do { \
+ pcmk__set_scheduler_flags((scheduler), \
+ pcmk__sched_processing_error); \
+ crm_err(fmt); \
} while (0)
/*!
* \internal
* \brief Log a warning and remember that current scheduler input has warnings
*
* \param[in,out] scheduler Scheduler data
* \param[in] fmt... printf(3)-style format and arguments
*/
#define pcmk__sched_warn(scheduler, fmt...) do { \
was_processing_warning = TRUE; \
crm_warn(fmt); \
} while (0)
/*!
* \internal
* \brief Set scheduler flags
*
* \param[in,out] scheduler Scheduler data
* \param[in] flags_to_set Group of enum pcmk__scheduler_flags to set
*/
#define pcmk__set_scheduler_flags(scheduler, flags_to_set) do { \
(scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Scheduler", crm_system_name, \
(scheduler)->flags, (flags_to_set), #flags_to_set); \
} while (0)
/*!
* \internal
* \brief Clear scheduler flags
*
* \param[in,out] scheduler Scheduler data
* \param[in] flags_to_clear Group of enum pcmk__scheduler_flags to clear
*/
#define pcmk__clear_scheduler_flags(scheduler, flags_to_clear) do { \
(scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Scheduler", crm_system_name, \
(scheduler)->flags, (flags_to_clear), #flags_to_clear); \
} while (0)
#ifdef __cplusplus
}
#endif
#endif // PCMK__CRM_COMMON_SCHEDULER_INTERNAL__H
diff --git a/lib/common/scheduler.c b/lib/common/scheduler.c
index d9919db5bf..0961af55fd 100644
--- a/lib/common/scheduler.c
+++ b/lib/common/scheduler.c
@@ -1,109 +1,108 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdint.h> // uint32_t
#include <errno.h> // EINVAL
#include <glib.h> // gboolean, FALSE
#include <libxml/tree.h> // xmlNode
#include <crm/common/scheduler.h>
uint32_t pcmk__warnings = 0;
-gboolean was_processing_error = FALSE;
gboolean was_processing_warning = FALSE;
/*!
* \internal
* \brief Get the Designated Controller node from scheduler data
*
* \param[in] scheduler Scheduler data
*
* \return Designated Controller node from scheduler data, or NULL if none
*/
pcmk_node_t *
pcmk_get_dc(const pcmk_scheduler_t *scheduler)
{
return (scheduler == NULL)? NULL : scheduler->dc_node;
}
/*!
* \internal
* \brief Get the no quorum policy from scheduler data
*
* \param[in] scheduler Scheduler data
*
* \return No quorum policy from scheduler data
*/
enum pe_quorum_policy
pcmk_get_no_quorum_policy(const pcmk_scheduler_t *scheduler)
{
if (scheduler == NULL) {
return pcmk_no_quorum_stop; // The default
}
return scheduler->no_quorum_policy;
}
/*!
* \internal
* \brief Set CIB XML as scheduler input in scheduler data
*
* \param[out] scheduler Scheduler data
* \param[in] cib CIB XML to set as scheduler input
*
* \return Standard Pacemaker return code (EINVAL if \p scheduler is NULL,
* otherwise pcmk_rc_ok)
* \note This will not free any previously set scheduler CIB.
*/
int
pcmk_set_scheduler_cib(pcmk_scheduler_t *scheduler, xmlNode *cib)
{
if (scheduler == NULL) {
return EINVAL;
}
scheduler->input = cib;
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Check whether cluster has quorum
*
* \param[in] scheduler Scheduler data
*
* \return true if cluster has quorum, otherwise false
*/
bool
pcmk_has_quorum(const pcmk_scheduler_t *scheduler)
{
if (scheduler == NULL) {
return false;
}
return pcmk_is_set(scheduler->flags, pcmk__sched_quorate);
}
/*!
* \brief Find a node by name in scheduler data
*
* \param[in] scheduler Scheduler data
* \param[in] node_name Name of node to find
*
* \return Node from scheduler data that matches \p node_name if any,
* otherwise NULL
*/
pcmk_node_t *
pcmk_find_node(const pcmk_scheduler_t *scheduler, const char *node_name)
{
if ((scheduler == NULL) || (node_name == NULL)) {
return NULL;
}
return pcmk__find_node_in_list(scheduler->nodes, node_name);
}
diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c
index 796eb9145a..9696fcf0c6 100644
--- a/lib/pacemaker/pcmk_graph_producer.c
+++ b/lib/pacemaker/pcmk_graph_producer.c
@@ -1,1110 +1,1111 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/common/xml.h>
#include <glib.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
// Convenience macros for logging action properties
#define action_type_str(flags) \
(pcmk_is_set((flags), pcmk__action_pseudo)? "pseudo-action" : "action")
#define action_optional_str(flags) \
(pcmk_is_set((flags), pcmk__action_optional)? "optional" : "required")
#define action_runnable_str(flags) \
(pcmk_is_set((flags), pcmk__action_runnable)? "runnable" : "unrunnable")
#define action_node_str(a) \
(((a)->node == NULL)? "no node" : (a)->node->private->name)
/*!
* \internal
* \brief Add an XML node tag for a specified ID
*
* \param[in] id Node UUID to add
* \param[in,out] xml Parent XML tag to add to
*/
static xmlNode*
add_node_to_xml_by_id(const char *id, xmlNode *xml)
{
xmlNode *node_xml;
node_xml = pcmk__xe_create(xml, PCMK_XE_NODE);
crm_xml_add(node_xml, PCMK_XA_ID, id);
return node_xml;
}
/*!
* \internal
* \brief Add an XML node tag for a specified node
*
* \param[in] node Node to add
* \param[in,out] xml XML to add node to
*/
static void
add_node_to_xml(const pcmk_node_t *node, void *xml)
{
add_node_to_xml_by_id(node->private->id, (xmlNode *) xml);
}
/*!
* \internal
* \brief Count (optionally add to XML) nodes needing maintenance state update
*
* \param[in,out] xml Parent XML tag to add to, if any
* \param[in] scheduler Scheduler data
*
* \return Count of nodes added
* \note Only Pacemaker Remote nodes are considered currently
*/
static int
add_maintenance_nodes(xmlNode *xml, const pcmk_scheduler_t *scheduler)
{
xmlNode *maintenance = NULL;
int count = 0;
if (xml != NULL) {
maintenance = pcmk__xe_create(xml, PCMK__XE_MAINTENANCE);
}
for (const GList *iter = scheduler->nodes;
iter != NULL; iter = iter->next) {
const pcmk_node_t *node = iter->data;
if (!pcmk__is_pacemaker_remote_node(node)) {
continue;
}
if ((node->details->maintenance
&& !pcmk_is_set(node->private->flags, pcmk__node_remote_maint))
|| (!node->details->maintenance
&& pcmk_is_set(node->private->flags, pcmk__node_remote_maint))) {
if (maintenance != NULL) {
crm_xml_add(add_node_to_xml_by_id(node->private->id,
maintenance),
PCMK__XA_NODE_IN_MAINTENANCE,
(node->details->maintenance? "1" : "0"));
}
count++;
}
}
crm_trace("%s %d nodes in need of maintenance mode update in state",
((maintenance == NULL)? "Counted" : "Added"), count);
return count;
}
/*!
* \internal
* \brief Add pseudo action with nodes needing maintenance state update
*
* \param[in,out] scheduler Scheduler data
*/
static void
add_maintenance_update(pcmk_scheduler_t *scheduler)
{
pcmk_action_t *action = NULL;
if (add_maintenance_nodes(NULL, scheduler) != 0) {
action = get_pseudo_op(PCMK_ACTION_MAINTENANCE_NODES, scheduler);
pcmk__set_action_flags(action, pcmk__action_always_in_graph);
}
}
/*!
* \internal
* \brief Add XML with nodes that an action is expected to bring down
*
* If a specified action is expected to bring any nodes down, add an XML block
* with their UUIDs. When a node is lost, this allows the controller to
* determine whether it was expected.
*
* \param[in,out] xml Parent XML tag to add to
* \param[in] action Action to check for downed nodes
*/
static void
add_downed_nodes(xmlNode *xml, const pcmk_action_t *action)
{
CRM_CHECK((xml != NULL) && (action != NULL) && (action->node != NULL),
return);
if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
/* Shutdown makes the action's node down */
xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED);
add_node_to_xml_by_id(action->node->private->id, downed);
} else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH,
pcmk__str_none)) {
/* Fencing makes the action's node and any hosted guest nodes down */
const char *fence = g_hash_table_lookup(action->meta,
PCMK__META_STONITH_ACTION);
if (pcmk__is_fencing_action(fence)) {
xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED);
add_node_to_xml_by_id(action->node->private->id, downed);
pe_foreach_guest_node(action->node->private->scheduler,
action->node, add_node_to_xml, downed);
}
} else if ((action->rsc != NULL)
&& pcmk_is_set(action->rsc->flags,
pcmk__rsc_is_remote_connection)
&& pcmk__str_eq(action->task, PCMK_ACTION_STOP,
pcmk__str_none)) {
/* Stopping a remote connection resource makes connected node down,
* unless it's part of a migration
*/
GList *iter;
pcmk_action_t *input;
bool migrating = false;
for (iter = action->actions_before; iter != NULL; iter = iter->next) {
input = ((pcmk__related_action_t *) iter->data)->action;
if ((input->rsc != NULL)
&& pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_none)
&& pcmk__str_eq(input->task, PCMK_ACTION_MIGRATE_FROM,
pcmk__str_none)) {
migrating = true;
break;
}
}
if (!migrating) {
xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED);
add_node_to_xml_by_id(action->rsc->id, downed);
}
}
}
/*!
* \internal
* \brief Create a transition graph operation key for a clone action
*
* \param[in] action Clone action
* \param[in] interval_ms Action interval in milliseconds
*
* \return Newly allocated string with transition graph operation key
*/
static char *
clone_op_key(const pcmk_action_t *action, guint interval_ms)
{
if (pcmk__str_eq(action->task, PCMK_ACTION_NOTIFY, pcmk__str_none)) {
const char *n_type = g_hash_table_lookup(action->meta, "notify_type");
const char *n_task = g_hash_table_lookup(action->meta,
"notify_operation");
return pcmk__notify_key(action->rsc->private->history_id, n_type,
n_task);
}
return pcmk__op_key(action->rsc->private->history_id,
pcmk__s(action->cancel_task, action->task),
interval_ms);
}
/*!
* \internal
* \brief Add node details to transition graph action XML
*
* \param[in] action Scheduled action
* \param[in,out] xml Transition graph action XML for \p action
*/
static void
add_node_details(const pcmk_action_t *action, xmlNode *xml)
{
pcmk_node_t *router_node = pcmk__connection_host_for_action(action);
crm_xml_add(xml, PCMK__META_ON_NODE, action->node->private->name);
crm_xml_add(xml, PCMK__META_ON_NODE_UUID, action->node->private->id);
if (router_node != NULL) {
crm_xml_add(xml, PCMK__XA_ROUTER_NODE, router_node->private->name);
}
}
/*!
* \internal
* \brief Add resource details to transition graph action XML
*
* \param[in] action Scheduled action
* \param[in,out] action_xml Transition graph action XML for \p action
*/
static void
add_resource_details(const pcmk_action_t *action, xmlNode *action_xml)
{
xmlNode *rsc_xml = NULL;
const char *attr_list[] = {
PCMK_XA_CLASS,
PCMK_XA_PROVIDER,
PCMK_XA_TYPE,
};
/* If a resource is locked to a node via PCMK_OPT_SHUTDOWN_LOCK, mark its
* actions so the controller can preserve the lock when the action
* completes.
*/
if (pcmk__action_locks_rsc_to_node(action)) {
crm_xml_add_ll(action_xml, PCMK_OPT_SHUTDOWN_LOCK,
(long long) action->rsc->private->lock_time);
}
// List affected resource
rsc_xml = pcmk__xe_create(action_xml,
(const char *) action->rsc->private->xml->name);
if (pcmk_is_set(action->rsc->flags, pcmk__rsc_removed)
&& (action->rsc->private->history_id != NULL)) {
/* Use the numbered instance name here, because if there is more
* than one instance on a node, we need to make sure the command
* goes to the right one.
*
* This is important even for anonymous clones, because the clone's
* unique meta-attribute might have just been toggled from on to
* off.
*/
crm_debug("Using orphan clone name %s instead of history ID %s",
action->rsc->id, action->rsc->private->history_id);
crm_xml_add(rsc_xml, PCMK_XA_ID, action->rsc->private->history_id);
crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id);
} else if (!pcmk_is_set(action->rsc->flags, pcmk__rsc_unique)) {
const char *xml_id = pcmk__xe_id(action->rsc->private->xml);
crm_debug("Using anonymous clone name %s for %s (aka %s)",
xml_id, action->rsc->id, action->rsc->private->history_id);
/* ID is what we'd like client to use
* LONG_ID is what they might know it as instead
*
* LONG_ID is only strictly needed /here/ during the
* transition period until all nodes in the cluster
* are running the new software /and/ have rebooted
* once (meaning that they've only ever spoken to a DC
* supporting this feature).
*
* If anyone toggles the unique flag to 'on', the
* 'instance free' name will correspond to an orphan
* and fall into the clause above instead
*/
crm_xml_add(rsc_xml, PCMK_XA_ID, xml_id);
if ((action->rsc->private->history_id != NULL)
&& !pcmk__str_eq(xml_id, action->rsc->private->history_id,
pcmk__str_none)) {
crm_xml_add(rsc_xml, PCMK__XA_LONG_ID,
action->rsc->private->history_id);
} else {
crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id);
}
} else {
CRM_ASSERT(action->rsc->private->history_id == NULL);
crm_xml_add(rsc_xml, PCMK_XA_ID, action->rsc->id);
}
for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) {
crm_xml_add(rsc_xml, attr_list[lpc],
g_hash_table_lookup(action->rsc->private->meta,
attr_list[lpc]));
}
}
/*!
* \internal
* \brief Add action attributes to transition graph action XML
*
* \param[in,out] action Scheduled action
* \param[in,out] action_xml Transition graph action XML for \p action
*/
static void
add_action_attributes(pcmk_action_t *action, xmlNode *action_xml)
{
xmlNode *args_xml = NULL;
pcmk_resource_t *rsc = action->rsc;
/* We create free-standing XML to start, so we can sort the attributes
* before adding it to action_xml, which keeps the scheduler regression
* test graphs comparable.
*/
args_xml = pcmk__xe_create(action_xml, PCMK__XE_ATTRIBUTES);
crm_xml_add(args_xml, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
g_hash_table_foreach(action->extra, hash2field, args_xml);
if ((rsc != NULL) && (action->node != NULL)) {
// Get the resource instance attributes, evaluated properly for node
GHashTable *params = pe_rsc_params(rsc, action->node,
rsc->private->scheduler);
pcmk__substitute_remote_addr(rsc, params);
g_hash_table_foreach(params, hash2smartfield, args_xml);
} else if ((rsc != NULL)
&& (rsc->private->variant <= pcmk__rsc_variant_primitive)) {
GHashTable *params = pe_rsc_params(rsc, NULL, rsc->private->scheduler);
g_hash_table_foreach(params, hash2smartfield, args_xml);
}
g_hash_table_foreach(action->meta, hash2metafield, args_xml);
if (rsc != NULL) {
pcmk_resource_t *parent = rsc;
while (parent != NULL) {
parent->private->cmds->add_graph_meta(parent, args_xml);
parent = parent->private->parent;
}
pcmk__add_guest_meta_to_xml(args_xml, action);
} else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)
&& (action->node != NULL)) {
/* Pass the node's attributes as meta-attributes.
*
* @TODO: Determine whether it is still necessary to do this. It was
* added in 33d99707, probably for the libfence-based implementation in
* c9a90bd, which is no longer used.
*/
g_hash_table_foreach(action->node->private->attrs, hash2metafield,
args_xml);
}
pcmk__xe_sort_attrs(args_xml);
}
/*!
* \internal
* \brief Create the transition graph XML for a scheduled action
*
* \param[in,out] parent Parent XML element to add action to
* \param[in,out] action Scheduled action
* \param[in] skip_details If false, add action details as sub-elements
* \param[in] scheduler Scheduler data
*/
static void
create_graph_action(xmlNode *parent, pcmk_action_t *action, bool skip_details,
const pcmk_scheduler_t *scheduler)
{
bool needs_node_info = true;
bool needs_maintenance_info = false;
xmlNode *action_xml = NULL;
if ((action == NULL) || (scheduler == NULL)) {
return;
}
// Create the top-level element based on task
if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) {
/* All fences need node info; guest node fences are pseudo-events */
if (pcmk_is_set(action->flags, pcmk__action_pseudo)) {
action_xml = pcmk__xe_create(parent, PCMK__XE_PSEUDO_EVENT);
} else {
action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT);
}
} else if (pcmk__str_any_of(action->task,
PCMK_ACTION_DO_SHUTDOWN,
PCMK_ACTION_CLEAR_FAILCOUNT, NULL)) {
action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT);
} else if (pcmk__str_eq(action->task, PCMK_ACTION_LRM_DELETE,
pcmk__str_none)) {
// CIB-only clean-up for shutdown locks
action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT);
crm_xml_add(action_xml, PCMK__XA_MODE, PCMK__VALUE_CIB);
} else if (pcmk_is_set(action->flags, pcmk__action_pseudo)) {
if (pcmk__str_eq(action->task, PCMK_ACTION_MAINTENANCE_NODES,
pcmk__str_none)) {
needs_maintenance_info = true;
}
action_xml = pcmk__xe_create(parent, PCMK__XE_PSEUDO_EVENT);
needs_node_info = false;
} else {
action_xml = pcmk__xe_create(parent, PCMK__XE_RSC_OP);
}
crm_xml_add_int(action_xml, PCMK_XA_ID, action->id);
crm_xml_add(action_xml, PCMK_XA_OPERATION, action->task);
if ((action->rsc != NULL) && (action->rsc->private->history_id != NULL)) {
char *clone_key = NULL;
guint interval_ms;
if (pcmk__guint_from_hash(action->meta, PCMK_META_INTERVAL, 0,
&interval_ms) != pcmk_rc_ok) {
interval_ms = 0;
}
clone_key = clone_op_key(action, interval_ms);
crm_xml_add(action_xml, PCMK__XA_OPERATION_KEY, clone_key);
crm_xml_add(action_xml, "internal_" PCMK__XA_OPERATION_KEY,
action->uuid);
free(clone_key);
} else {
crm_xml_add(action_xml, PCMK__XA_OPERATION_KEY, action->uuid);
}
if (needs_node_info && (action->node != NULL)) {
add_node_details(action, action_xml);
pcmk__insert_dup(action->meta, PCMK__META_ON_NODE,
action->node->private->name);
pcmk__insert_dup(action->meta, PCMK__META_ON_NODE_UUID,
action->node->private->id);
}
if (skip_details) {
return;
}
if ((action->rsc != NULL)
&& !pcmk_is_set(action->flags, pcmk__action_pseudo)) {
// This is a real resource action, so add resource details
add_resource_details(action, action_xml);
}
/* List any attributes in effect */
add_action_attributes(action, action_xml);
/* List any nodes this action is expected to make down */
if (needs_node_info && (action->node != NULL)) {
add_downed_nodes(action_xml, action);
}
if (needs_maintenance_info) {
add_maintenance_nodes(action_xml, scheduler);
}
}
/*!
* \internal
* \brief Check whether an action should be added to the transition graph
*
* \param[in,out] action Action to check
*
* \return true if action should be added to graph, otherwise false
*/
static bool
should_add_action_to_graph(pcmk_action_t *action)
{
if (!pcmk_is_set(action->flags, pcmk__action_runnable)) {
crm_trace("Ignoring action %s (%d): unrunnable",
action->uuid, action->id);
return false;
}
if (pcmk_is_set(action->flags, pcmk__action_optional)
&& !pcmk_is_set(action->flags, pcmk__action_always_in_graph)) {
crm_trace("Ignoring action %s (%d): optional",
action->uuid, action->id);
return false;
}
/* Actions for unmanaged resources should be excluded from the graph,
* with the exception of monitors and cancellation of recurring monitors.
*/
if ((action->rsc != NULL)
&& !pcmk_is_set(action->rsc->flags, pcmk__rsc_managed)
&& !pcmk__str_eq(action->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
const char *interval_ms_s;
/* A cancellation of a recurring monitor will get here because the task
* is cancel rather than monitor, but the interval can still be used to
* recognize it. The interval has been normalized to milliseconds by
* this point, so a string comparison is sufficient.
*/
interval_ms_s = g_hash_table_lookup(action->meta, PCMK_META_INTERVAL);
if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) {
crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)",
action->uuid, action->id, action->rsc->id);
return false;
}
}
/* Always add pseudo-actions, fence actions, and shutdown actions (already
* determined to be required and runnable by this point)
*/
if (pcmk_is_set(action->flags, pcmk__action_pseudo)
|| pcmk__strcase_any_of(action->task, PCMK_ACTION_STONITH,
PCMK_ACTION_DO_SHUTDOWN, NULL)) {
return true;
}
if (action->node == NULL) {
pcmk__sched_err(action->scheduler,
"Skipping action %s (%d) "
"because it was not assigned to a node (bug?)",
action->uuid, action->id);
pcmk__log_action("Unassigned", action, false);
return false;
}
if (pcmk_is_set(action->flags, pcmk__action_on_dc)) {
crm_trace("Action %s (%d) should be dumped: "
"can run on DC instead of %s",
action->uuid, action->id, pcmk__node_name(action->node));
} else if (pcmk__is_guest_or_bundle_node(action->node)
&& !pcmk_is_set(action->node->private->flags,
pcmk__node_remote_reset)) {
crm_trace("Action %s (%d) should be dumped: "
"assuming will be runnable on guest %s",
action->uuid, action->id, pcmk__node_name(action->node));
} else if (!action->node->details->online) {
pcmk__sched_err(action->scheduler,
"Skipping action %s (%d) "
"because it was scheduled for offline node (bug?)",
action->uuid, action->id);
pcmk__log_action("Offline node", action, false);
return false;
} else if (action->node->details->unclean) {
pcmk__sched_err(action->scheduler,
"Skipping action %s (%d) "
"because it was scheduled for unclean node (bug?)",
action->uuid, action->id);
pcmk__log_action("Unclean node", action, false);
return false;
}
return true;
}
/*!
* \internal
* \brief Check whether an ordering's flags can change an action
*
* \param[in] ordering Ordering to check
*
* \return true if ordering has flags that can change an action, false otherwise
*/
static bool
ordering_can_change_actions(const pcmk__related_action_t *ordering)
{
return pcmk_any_flags_set(ordering->flags,
~(pcmk__ar_then_implies_first_graphed
|pcmk__ar_first_implies_then_graphed
|pcmk__ar_ordered));
}
/*!
* \internal
* \brief Check whether an action input should be in the transition graph
*
* \param[in] action Action to check
* \param[in,out] input Action input to check
*
* \return true if input should be in graph, false otherwise
* \note This function may not only check an input, but disable it under certian
* circumstances (load or anti-colocation orderings that are not needed).
*/
static bool
should_add_input_to_graph(const pcmk_action_t *action,
pcmk__related_action_t *input)
{
if (input->graphed) {
return true;
}
if (input->flags == pcmk__ar_none) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"ordering disabled",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (!pcmk_is_set(input->action->flags, pcmk__action_runnable)
&& !ordering_can_change_actions(input)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"optional and input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (!pcmk_is_set(input->action->flags, pcmk__action_runnable)
&& pcmk_is_set(input->flags, pcmk__ar_min_runnable)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"minimum number of instances required but input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (pcmk_is_set(input->flags, pcmk__ar_unmigratable_then_blocks)
&& !pcmk_is_set(input->action->flags, pcmk__action_runnable)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"input blocked if 'then' unmigratable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (pcmk_is_set(input->flags, pcmk__ar_if_first_unmigratable)
&& pcmk_is_set(input->action->flags, pcmk__action_migratable)) {
crm_trace("Ignoring %s (%d) input %s (%d): ordering applies "
"only if input is unmigratable, but it is migratable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if ((input->flags == pcmk__ar_ordered)
&& pcmk_is_set(input->action->flags, pcmk__action_migratable)
&& pcmk__ends_with(input->action->uuid, "_stop_0")) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"optional but stop in migration",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (input->flags == pcmk__ar_if_on_same_node_or_target) {
pcmk_node_t *input_node = input->action->node;
if ((action->rsc != NULL)
&& pcmk__str_eq(action->task, PCMK_ACTION_MIGRATE_TO,
pcmk__str_none)) {
pcmk_node_t *assigned = action->rsc->private->assigned_node;
/* For load_stopped -> migrate_to orderings, we care about where
* the resource has been assigned, not where migrate_to will be
* executed.
*/
if (!pcmk__same_node(input_node, assigned)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"migration target %s is not same as input node %s",
action->uuid, action->id,
input->action->uuid, input->action->id,
(assigned? assigned->private->name : "<none>"),
(input_node? input_node->private->name : "<none>"));
input->flags = pcmk__ar_none;
return false;
}
} else if (!pcmk__same_node(input_node, action->node)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"not on same node (%s vs %s)",
action->uuid, action->id,
input->action->uuid, input->action->id,
(action->node? action->node->private->name : "<none>"),
(input_node? input_node->private->name : "<none>"));
input->flags = pcmk__ar_none;
return false;
} else if (pcmk_is_set(input->action->flags, pcmk__action_optional)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"ordering optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
input->flags = pcmk__ar_none;
return false;
}
} else if (input->flags == pcmk__ar_if_required_on_same_node) {
if (input->action->node && action->node
&& !pcmk__same_node(input->action->node, action->node)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"not on same node (%s vs %s)",
action->uuid, action->id,
input->action->uuid, input->action->id,
pcmk__node_name(action->node),
pcmk__node_name(input->action->node));
input->flags = pcmk__ar_none;
return false;
} else if (pcmk_is_set(input->action->flags, pcmk__action_optional)) {
crm_trace("Ignoring %s (%d) input %s (%d): optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
input->flags = pcmk__ar_none;
return false;
}
} else if (input->action->rsc
&& input->action->rsc != action->rsc
&& pcmk_is_set(input->action->rsc->flags, pcmk__rsc_failed)
&& !pcmk_is_set(input->action->rsc->flags, pcmk__rsc_managed)
&& pcmk__ends_with(input->action->uuid, "_stop_0")
&& pcmk__is_clone(action->rsc)) {
crm_warn("Ignoring requirement that %s complete before %s:"
" unmanaged failed resources cannot prevent clone shutdown",
input->action->uuid, action->uuid);
return false;
} else if (pcmk_is_set(input->action->flags, pcmk__action_optional)
&& !pcmk_any_flags_set(input->action->flags,
pcmk__action_always_in_graph
|pcmk__action_added_to_graph)
&& !should_add_action_to_graph(input->action)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"input optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
}
crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x",
action->uuid, action->id, action_type_str(input->action->flags),
input->action->uuid, input->action->id,
action_node_str(input->action),
action_runnable_str(input->action->flags),
action_optional_str(input->action->flags), input->flags);
return true;
}
/*!
* \internal
* \brief Check whether an ordering creates an ordering loop
*
* \param[in] init_action "First" action in ordering
* \param[in] action Callers should always set this the same as
* \p init_action (this function may use a different
* value for recursive calls)
* \param[in,out] input Action wrapper for "then" action in ordering
*
* \return true if the ordering creates a loop, otherwise false
*/
bool
pcmk__graph_has_loop(const pcmk_action_t *init_action,
const pcmk_action_t *action, pcmk__related_action_t *input)
{
bool has_loop = false;
if (pcmk_is_set(input->action->flags, pcmk__action_detect_loop)) {
crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)",
input->action->uuid,
input->action->node? input->action->node->private->name : "",
action->uuid,
action->node? action->node->private->name : "",
input->flags);
return false;
}
// Don't need to check inputs that won't be used
if (!should_add_input_to_graph(action, input)) {
return false;
}
if (input->action == init_action) {
crm_debug("Input loop found in %s@%s ->...-> %s@%s",
action->uuid,
action->node? action->node->private->name : "",
init_action->uuid,
init_action->node? init_action->node->private->name : "");
return true;
}
pcmk__set_action_flags(input->action, pcmk__action_detect_loop);
crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)"
"for graph loop with %s@%s ",
action->uuid,
action->node? action->node->private->name : "",
input->action->uuid,
input->action->node? input->action->node->private->name : "",
input->flags,
init_action->uuid,
init_action->node? init_action->node->private->name : "");
// Recursively check input itself for loops
for (GList *iter = input->action->actions_before;
iter != NULL; iter = iter->next) {
if (pcmk__graph_has_loop(init_action, input->action,
(pcmk__related_action_t *) iter->data)) {
// Recursive call already logged a debug message
has_loop = true;
break;
}
}
pcmk__clear_action_flags(input->action, pcmk__action_detect_loop);
if (!has_loop) {
crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)",
input->action->uuid,
input->action->node? input->action->node->private->name : "",
action->uuid,
action->node? action->node->private->name : "",
input->flags);
}
return has_loop;
}
/*!
* \internal
* \brief Create a synapse XML element for a transition graph
*
* \param[in] action Action that synapse is for
* \param[in,out] scheduler Scheduler data containing graph
*
* \return Newly added XML element for new graph synapse
*/
static xmlNode *
create_graph_synapse(const pcmk_action_t *action, pcmk_scheduler_t *scheduler)
{
int synapse_priority = 0;
xmlNode *syn = pcmk__xe_create(scheduler->graph, "synapse");
crm_xml_add_int(syn, PCMK_XA_ID, scheduler->num_synapse);
scheduler->num_synapse++;
if (action->rsc != NULL) {
synapse_priority = action->rsc->private->priority;
}
if (action->priority > synapse_priority) {
synapse_priority = action->priority;
}
if (synapse_priority > 0) {
crm_xml_add_int(syn, PCMK__XA_PRIORITY, synapse_priority);
}
return syn;
}
/*!
* \internal
* \brief Add an action to the transition graph XML if appropriate
*
* \param[in,out] data Action to possibly add
* \param[in,out] user_data Scheduler data
*
* \note This will de-duplicate the action inputs, meaning that the
* pcmk__related_action_t:type flags can no longer be relied on to retain
* their original settings. That means this MUST be called after
* pcmk__apply_orderings() is complete, and nothing after this should rely
* on those type flags. (For example, some code looks for type equal to
* some flag rather than whether the flag is set, and some code looks for
* particular combinations of flags -- such code must be done before
* pcmk__create_graph().)
*/
static void
add_action_to_graph(gpointer data, gpointer user_data)
{
pcmk_action_t *action = (pcmk_action_t *) data;
pcmk_scheduler_t *scheduler = (pcmk_scheduler_t *) user_data;
xmlNode *syn = NULL;
xmlNode *set = NULL;
xmlNode *in = NULL;
/* If we haven't already, de-duplicate inputs (even if we won't be adding
* the action to the graph, so that crm_simulate's dot graphs don't have
* duplicates).
*/
if (!pcmk_is_set(action->flags, pcmk__action_inputs_deduplicated)) {
pcmk__deduplicate_action_inputs(action);
pcmk__set_action_flags(action, pcmk__action_inputs_deduplicated);
}
if (pcmk_is_set(action->flags, pcmk__action_added_to_graph)
|| !should_add_action_to_graph(action)) {
return; // Already added, or shouldn't be
}
pcmk__set_action_flags(action, pcmk__action_added_to_graph);
crm_trace("Adding action %d (%s%s%s) to graph",
action->id, action->uuid,
((action->node == NULL)? "" : " on "),
((action->node == NULL)? "" : action->node->private->name));
syn = create_graph_synapse(action, scheduler);
set = pcmk__xe_create(syn, "action_set");
in = pcmk__xe_create(syn, "inputs");
create_graph_action(set, action, false, scheduler);
for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) {
pcmk__related_action_t *input = lpc->data;
if (should_add_input_to_graph(action, input)) {
xmlNode *input_xml = pcmk__xe_create(in, "trigger");
input->graphed = true;
create_graph_action(input_xml, input->action, true, scheduler);
}
}
}
static int transition_id = -1;
/*!
* \internal
* \brief Log a message after calculating a transition
*
* \param[in] scheduler Scheduler data
* \param[in] filename Where transition input is stored
*/
void
pcmk__log_transition_summary(const pcmk_scheduler_t *scheduler,
const char *filename)
{
- if (was_processing_error || crm_config_error) {
+ if (pcmk_is_set(scheduler->flags, pcmk__sched_processing_error)
+ || crm_config_error) {
crm_err("Calculated transition %d (with errors)%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
} else if (was_processing_warning || crm_config_warning) {
crm_warn("Calculated transition %d (with warnings)%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
} else {
crm_notice("Calculated transition %d%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
}
if (crm_config_error) {
crm_notice("Configuration errors found during scheduler processing,"
" please run \"crm_verify -L\" to identify issues");
}
}
/*!
* \internal
* \brief Add a resource's actions to the transition graph
*
* \param[in,out] rsc Resource whose actions should be added
*/
void
pcmk__add_rsc_actions_to_graph(pcmk_resource_t *rsc)
{
GList *iter = NULL;
CRM_ASSERT(rsc != NULL);
pcmk__rsc_trace(rsc, "Adding actions for %s to graph", rsc->id);
// First add the resource's own actions
g_list_foreach(rsc->private->actions, add_action_to_graph,
rsc->private->scheduler);
// Then recursively add its children's actions (appropriate to variant)
for (iter = rsc->private->children; iter != NULL; iter = iter->next) {
pcmk_resource_t *child_rsc = (pcmk_resource_t *) iter->data;
child_rsc->private->cmds->add_actions_to_graph(child_rsc);
}
}
/*!
* \internal
* \brief Create a transition graph with all cluster actions needed
*
* \param[in,out] scheduler Scheduler data
*/
void
pcmk__create_graph(pcmk_scheduler_t *scheduler)
{
GList *iter = NULL;
const char *value = NULL;
long long limit = 0LL;
GHashTable *config_hash = scheduler->config_hash;
transition_id++;
crm_trace("Creating transition graph %d", transition_id);
scheduler->graph = pcmk__xe_create(NULL, PCMK__XE_TRANSITION_GRAPH);
value = pcmk__cluster_option(config_hash, PCMK_OPT_CLUSTER_DELAY);
crm_xml_add(scheduler->graph, PCMK_OPT_CLUSTER_DELAY, value);
value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT);
crm_xml_add(scheduler->graph, PCMK_OPT_STONITH_TIMEOUT, value);
crm_xml_add(scheduler->graph, "failed-stop-offset", "INFINITY");
if (pcmk_is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) {
crm_xml_add(scheduler->graph, "failed-start-offset", "INFINITY");
} else {
crm_xml_add(scheduler->graph, "failed-start-offset", "1");
}
value = pcmk__cluster_option(config_hash, PCMK_OPT_BATCH_LIMIT);
crm_xml_add(scheduler->graph, PCMK_OPT_BATCH_LIMIT, value);
crm_xml_add_int(scheduler->graph, "transition_id", transition_id);
value = pcmk__cluster_option(config_hash, PCMK_OPT_MIGRATION_LIMIT);
if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) {
crm_xml_add(scheduler->graph, PCMK_OPT_MIGRATION_LIMIT, value);
}
if (scheduler->recheck_by > 0) {
char *recheck_epoch = NULL;
recheck_epoch = crm_strdup_printf("%llu",
(long long) scheduler->recheck_by);
crm_xml_add(scheduler->graph, "recheck-by", recheck_epoch);
free(recheck_epoch);
}
/* The following code will de-duplicate action inputs, so nothing past this
* should rely on the action input type flags retaining their original
* values.
*/
// Add resource actions to graph
for (iter = scheduler->resources; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
pcmk__rsc_trace(rsc, "Processing actions for %s", rsc->id);
rsc->private->cmds->add_actions_to_graph(rsc);
}
// Add pseudo-action for list of nodes with maintenance state update
add_maintenance_update(scheduler);
// Add non-resource (node) actions
for (iter = scheduler->actions; iter != NULL; iter = iter->next) {
pcmk_action_t *action = (pcmk_action_t *) iter->data;
if ((action->rsc != NULL)
&& (action->node != NULL)
&& action->node->details->shutdown
&& !pcmk_is_set(action->rsc->flags, pcmk__rsc_maintenance)
&& !pcmk_any_flags_set(action->flags,
pcmk__action_optional|pcmk__action_runnable)
&& pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) {
/* Eventually we should just ignore the 'fence' case, but for now
* it's the best way to detect (in CTS) when CIB resource updates
* are being lost.
*/
if (pcmk_is_set(scheduler->flags, pcmk__sched_quorate)
|| (scheduler->no_quorum_policy == pcmk_no_quorum_ignore)) {
const bool managed = pcmk_is_set(action->rsc->flags,
pcmk__rsc_managed);
const bool failed = pcmk_is_set(action->rsc->flags,
pcmk__rsc_failed);
crm_crit("Cannot %s %s because of %s:%s%s (%s)",
action->node->details->unclean? "fence" : "shut down",
pcmk__node_name(action->node), action->rsc->id,
(managed? " blocked" : " unmanaged"),
(failed? " failed" : ""), action->uuid);
}
}
add_action_to_graph((gpointer) action, (gpointer) scheduler);
}
crm_log_xml_trace(scheduler->graph, "graph");
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Nov 23, 2:03 PM (1 d, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1012276
Default Alt Text
(71 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment