Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
index e8509c0c88..49c09f653a 100644
--- a/daemons/controld/controld_te_events.c
+++ b/daemons/controld/controld_te_events.c
@@ -1,613 +1,613 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/common/xml.h>
#include <pacemaker-controld.h>
#include <crm/common/attrs_internal.h>
#include <crm/common/ipc_attrd_internal.h>
/*!
* \internal
* \brief Action numbers of outside events processed in current update diff
*
* This table is to be used as a set. It should be empty when the transitioner
* begins processing a CIB update diff. It ensures that if there are multiple
* events (for example, "_last_0" and "_last_failure_0") for the same action,
* only one of them updates the failcount. Events that originate outside the
* cluster can't be confirmed, since they're not in the transition graph.
*/
static GHashTable *outside_events = NULL;
/*!
* \internal
* \brief Empty the hash table containing action numbers of outside events
*/
void
controld_remove_all_outside_events(void)
{
if (outside_events != NULL) {
g_hash_table_remove_all(outside_events);
}
}
/*!
* \internal
* \brief Destroy the hash table containing action numbers of outside events
*/
void
controld_destroy_outside_events_table(void)
{
if (outside_events != NULL) {
g_hash_table_destroy(outside_events);
outside_events = NULL;
}
}
/*!
* \internal
* \brief Add an outside event's action number to a set
*
* \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
* event was not already in the set, or \p pcmk_rc_already otherwise.
*/
static int
record_outside_event(gint action_num)
{
if (outside_events == NULL) {
outside_events = g_hash_table_new(NULL, NULL);
}
if (g_hash_table_add(outside_events, GINT_TO_POINTER(action_num))) {
return pcmk_rc_ok;
}
return pcmk_rc_already;
}
gboolean
fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node)
{
const char *target_uuid = NULL;
const char *router = NULL;
const char *router_uuid = NULL;
xmlNode *last_action = NULL;
GList *gIter = NULL;
GList *gIter2 = NULL;
if (graph == NULL || graph->complete) {
return FALSE;
}
gIter = graph->synapses;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
/* We've already been here */
continue;
}
gIter2 = synapse->actions;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
if ((action->type == pcmk__pseudo_graph_action)
|| pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
} else if (action->type == pcmk__cluster_graph_action) {
const char *task = crm_element_value(action->xml,
PCMK_XA_OPERATION);
if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
continue;
}
}
target_uuid = crm_element_value(action->xml,
PCMK__META_ON_NODE_UUID);
router = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if (router) {
const crm_node_t *node =
pcmk__get_node(0, router, NULL,
pcmk__node_search_cluster_member);
if (node) {
router_uuid = node->uuid;
}
}
if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
pcmk__set_synapse_flags(synapse, pcmk__synapse_failed);
last_action = action->xml;
stop_te_timer(action);
pcmk__update_graph(graph, action);
if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
crm_notice("Action %d (%s) was pending on %s (offline)",
action->id,
crm_element_value(action->xml,
PCMK__XA_OPERATION_KEY),
down_node);
} else {
crm_info("Action %d (%s) is scheduled for %s (offline)",
action->id,
crm_element_value(action->xml, PCMK__XA_OPERATION_KEY),
down_node);
}
}
}
}
if (last_action != NULL) {
crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Node failure", last_action);
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Update failure-related node attributes if warranted
*
* \param[in] event XML describing operation that (maybe) failed
* \param[in] event_node_uuid Node that event occurred on
* \param[in] rc Actual operation return code
* \param[in] target_rc Expected operation return code
* \param[in] do_update If TRUE, do update regardless of operation type
* \param[in] ignore_failures If TRUE, update last failure but not fail count
*
* \return TRUE if this was not a direct nack, success or lrm status refresh
*/
static gboolean
update_failcount(const xmlNode *event, const char *event_node_uuid, int rc,
int target_rc, gboolean do_update, gboolean ignore_failures)
{
guint interval_ms = 0;
char *task = NULL;
char *rsc_id = NULL;
const char *value = NULL;
const char *id = crm_element_value(event, PCMK__XA_OPERATION_KEY);
- const char *on_uname = crm_peer_uname(event_node_uuid);
+ const char *on_uname = pcmk__node_name_from_uuid(event_node_uuid);
const char *origin = crm_element_value(event, PCMK_XA_CRM_DEBUG_ORIGIN);
// Nothing needs to be done for success or status refresh
if (rc == target_rc) {
return FALSE;
} else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
id, rc, on_uname);
return FALSE;
}
/* Sanity check */
CRM_CHECK(on_uname != NULL, return TRUE);
CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
crm_err("Couldn't parse: %s", pcmk__xe_id(event)); goto bail);
/* Decide whether update is necessary and what value to use */
if ((interval_ms > 0)
|| pcmk__str_eq(task, PCMK_ACTION_PROMOTE, pcmk__str_none)
|| pcmk__str_eq(task, PCMK_ACTION_DEMOTE, pcmk__str_none)) {
do_update = TRUE;
} else if (pcmk__str_eq(task, PCMK_ACTION_START, pcmk__str_none)) {
do_update = TRUE;
value = pcmk__s(controld_globals.transition_graph->failed_start_offset,
PCMK_VALUE_INFINITY);
} else if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_none)) {
do_update = TRUE;
value = pcmk__s(controld_globals.transition_graph->failed_stop_offset,
PCMK_VALUE_INFINITY);
}
if (do_update) {
pcmk__attrd_query_pair_t *fail_pair = NULL;
pcmk__attrd_query_pair_t *last_pair = NULL;
char *fail_name = NULL;
char *last_name = NULL;
GList *attrs = NULL;
uint32_t opts = pcmk__node_attr_none;
char *now = pcmk__ttoa(time(NULL));
// Fail count will be either incremented or set to infinity
if (!pcmk_str_is_infinity(value)) {
value = PCMK_XA_VALUE "++";
}
if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
opts |= pcmk__node_attr_remote;
}
crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
(ignore_failures? "last failure" : "failcount"),
rsc_id, on_uname, task, rc, value, now);
/* Update the fail count, if we're not ignoring failures */
if (!ignore_failures) {
fail_pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t));
fail_name = pcmk__failcount_name(rsc_id, task, interval_ms);
fail_pair->name = fail_name;
fail_pair->value = value;
fail_pair->node = on_uname;
attrs = g_list_prepend(attrs, fail_pair);
}
/* Update the last failure time (even if we're ignoring failures,
* so that failure can still be detected and shown, e.g. by crm_mon)
*/
last_pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t));
last_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
last_pair->name = last_name;
last_pair->value = now;
last_pair->node = on_uname;
attrs = g_list_prepend(attrs, last_pair);
update_attrd_list(attrs, opts);
free(fail_name);
free(fail_pair);
free(last_name);
free(last_pair);
g_list_free(attrs);
free(now);
}
bail:
free(rsc_id);
free(task);
return TRUE;
}
pcmk__graph_action_t *
controld_get_action(int id)
{
for (GList *item = controld_globals.transition_graph->synapses;
item != NULL; item = item->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data;
for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data;
if (action->id == id) {
return action;
}
}
}
return NULL;
}
pcmk__graph_action_t *
get_cancel_action(const char *id, const char *node)
{
GList *gIter = NULL;
GList *gIter2 = NULL;
gIter = controld_globals.transition_graph->synapses;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
gIter2 = synapse->actions;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
const char *task = NULL;
const char *target = NULL;
pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
task = crm_element_value(action->xml, PCMK_XA_OPERATION);
if (!pcmk__str_eq(PCMK_ACTION_CANCEL, task, pcmk__str_casei)) {
continue;
}
task = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
crm_trace("Wrong key %s for %s on %s", task, id, node);
continue;
}
target = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
crm_trace("Wrong node %s for %s on %s", target, id, node);
continue;
}
crm_trace("Found %s on %s", id, node);
return action;
}
}
return NULL;
}
bool
confirm_cancel_action(const char *id, const char *node_id)
{
const char *op_key = NULL;
const char *node_name = NULL;
pcmk__graph_action_t *cancel = get_cancel_action(id, node_id);
if (cancel == NULL) {
return FALSE;
}
op_key = crm_element_value(cancel->xml, PCMK__XA_OPERATION_KEY);
node_name = crm_element_value(cancel->xml, PCMK__META_ON_NODE);
stop_te_timer(cancel);
te_action_confirmed(cancel, controld_globals.transition_graph);
crm_info("Cancellation of %s on %s confirmed (action %d)",
op_key, node_name, cancel->id);
return TRUE;
}
/* downed nodes are listed like: <downed> <node id="UUID1" /> ... </downed> */
#define XPATH_DOWNED "//" PCMK__XE_DOWNED \
"/" PCMK_XE_NODE "[@" PCMK_XA_ID "='%s']"
/*!
* \brief Find a transition event that would have made a specified node down
*
* \param[in] target UUID of node to match
*
* \return Matching event if found, NULL otherwise
*/
pcmk__graph_action_t *
match_down_event(const char *target)
{
pcmk__graph_action_t *match = NULL;
xmlXPathObjectPtr xpath_ret = NULL;
GList *gIter, *gIter2;
char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
for (gIter = controld_globals.transition_graph->synapses;
gIter != NULL && match == NULL;
gIter = gIter->next) {
for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions;
gIter2 != NULL && match == NULL;
gIter2 = gIter2->next) {
match = (pcmk__graph_action_t *) gIter2->data;
if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
xpath_ret = xpath_search(match->xml, xpath);
if (numXpathResults(xpath_ret) < 1) {
match = NULL;
}
freeXpathObject(xpath_ret);
} else {
// Only actions that were actually started can match
match = NULL;
}
}
}
free(xpath);
if (match != NULL) {
crm_debug("Shutdown action %d (%s) found for node %s", match->id,
crm_element_value(match->xml, PCMK__XA_OPERATION_KEY),
target);
} else {
crm_debug("No reason to expect node %s to be down", target);
}
return match;
}
void
process_graph_event(xmlNode *event, const char *event_node)
{
int rc = -1; // Actual result
int target_rc = -1; // Expected result
int status = -1; // Executor status
int callid = -1; // Executor call ID
int transition_num = -1; // Transition number
int action_num = -1; // Action number within transition
char *update_te_uuid = NULL;
bool ignore_failures = FALSE;
const char *id = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *uname = NULL;
CRM_ASSERT(event != NULL);
/*
<lrm_rsc_op id="rsc_east-05_last_0" operation_key="rsc_east-05_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" transition-magic="0:7;9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" call-id="17" rc-code="7" op-status="0" interval="0" last-rc-change="1355361636" exec-time="128" queue-time="0" op-digest="c81f5f40b1c9e859c992e800b1aa6972"/>
*/
magic = crm_element_value(event, PCMK__XA_TRANSITION_KEY);
if (magic == NULL) {
/* non-change */
return;
}
crm_element_value_int(event, PCMK__XA_OP_STATUS, &status);
if (status == PCMK_EXEC_PENDING) {
return;
}
id = crm_element_value(event, PCMK__XA_OPERATION_KEY);
crm_element_value_int(event, PCMK__XA_RC_CODE, &rc);
crm_element_value_int(event, PCMK__XA_CALL_ID, &callid);
rc = pcmk__effective_rc(rc);
if (decode_transition_key(magic, &update_te_uuid, &transition_num,
&action_num, &target_rc) == FALSE) {
// decode_transition_key() already logged the bad key
crm_err("Can't process action %s result: Incompatible versions? "
CRM_XS " call-id=%d", id, callid);
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Bad event", event);
return;
}
if (transition_num == -1) {
// E.g. crm_resource --fail
if (record_outside_event(action_num) != pcmk_rc_ok) {
crm_debug("Outside event with transition key '%s' has already been "
"processed", magic);
goto bail;
}
desc = "initiated outside of the cluster";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Unexpected event", event);
} else if ((action_num < 0)
|| !pcmk__str_eq(update_te_uuid, controld_globals.te_uuid,
pcmk__str_none)) {
desc = "initiated by a different DC";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Foreign event", event);
} else if ((controld_globals.transition_graph->id != transition_num)
|| controld_globals.transition_graph->complete) {
// Action is not from currently active transition
guint interval_ms = 0;
if (parse_op_key(id, NULL, NULL, &interval_ms)
&& (interval_ms != 0)) {
/* Recurring actions have the transition number they were first
* scheduled in.
*/
if (status == PCMK_EXEC_CANCELLED) {
confirm_cancel_action(id, get_node_id(event));
goto bail;
}
desc = "arrived after initial scheduling";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Change in recurring result", event);
} else if (controld_globals.transition_graph->id != transition_num) {
desc = "arrived really late";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Old event", event);
} else {
desc = "arrived late";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Inactive graph", event);
}
} else {
// Event is result of an action from currently active transition
pcmk__graph_action_t *action = controld_get_action(action_num);
if (action == NULL) {
// Should never happen
desc = "unknown";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Unknown event", event);
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
/* Nothing further needs to be done if the action has already been
* confirmed. This can happen e.g. when processing both an
* "xxx_last_0" or "xxx_last_failure_0" record as well as the main
* history record, which would otherwise result in incorrectly
* bumping the fail count twice.
*/
crm_log_xml_debug(event, "Event already confirmed:");
goto bail;
} else {
/* An action result needs to be confirmed.
* (This is the only case where desc == NULL.)
*/
if (pcmk__str_eq(crm_meta_value(action->params, PCMK_META_ON_FAIL),
PCMK_VALUE_IGNORE, pcmk__str_casei)) {
ignore_failures = TRUE;
} else if (rc != target_rc) {
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
}
stop_te_timer(action);
te_action_confirmed(action, controld_globals.transition_graph);
if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) {
abort_transition(action->synapse->priority + 1,
pcmk__graph_restart, "Event failed", event);
}
}
}
if (id == NULL) {
id = "unknown action";
}
uname = crm_element_value(event, PCMK__META_ON_NODE);
if (uname == NULL) {
uname = "unknown node";
}
if (status == PCMK_EXEC_INVALID) {
// We couldn't attempt the action
crm_info("Transition %d action %d (%s on %s): %s",
transition_num, action_num, id, uname,
pcmk_exec_status_str(status));
} else if (desc && update_failcount(event, event_node, rc, target_rc,
(transition_num == -1), FALSE)) {
crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(target_rc),
services_ocf_exitcode_str(rc),
target_rc, rc, callid, desc);
} else if (desc) {
crm_info("Transition %d action %d (%s on %s): %s "
CRM_XS " rc=%d target-rc=%d call-id=%d",
transition_num, action_num, id, uname,
desc, rc, target_rc, callid);
} else if (rc == target_rc) {
crm_info("Transition %d action %d (%s on %s) confirmed: %s "
CRM_XS " rc=%d call-id=%d",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(rc), rc, callid);
} else {
update_failcount(event, event_node, rc, target_rc,
(transition_num == -1), ignore_failures);
crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
CRM_XS " target-rc=%d rc=%d call-id=%d",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(target_rc),
services_ocf_exitcode_str(rc),
target_rc, rc, callid);
}
bail:
free(update_te_uuid);
}
diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c
index de3ccd9607..3e7120933b 100644
--- a/daemons/controld/controld_te_utils.c
+++ b/daemons/controld/controld_te_utils.c
@@ -1,507 +1,507 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <pacemaker-controld.h>
//! Triggers transition graph processing
static crm_trigger_t *transition_trigger = NULL;
static GHashTable *node_pending_timers = NULL;
gboolean
stop_te_timer(pcmk__graph_action_t *action)
{
if (action == NULL) {
return FALSE;
}
if (action->timer != 0) {
crm_trace("Stopping action timer");
g_source_remove(action->timer);
action->timer = 0;
} else {
crm_trace("Action timer was already stopped");
return FALSE;
}
return TRUE;
}
static gboolean
te_graph_trigger(gpointer user_data)
{
if (controld_globals.transition_graph == NULL) {
crm_debug("Nothing to do");
return TRUE;
}
crm_trace("Invoking graph %d in state %s",
controld_globals.transition_graph->id,
fsa_state2string(controld_globals.fsa_state));
switch (controld_globals.fsa_state) {
case S_STARTING:
case S_PENDING:
case S_NOT_DC:
case S_HALT:
case S_ILLEGAL:
case S_STOPPING:
case S_TERMINATE:
return TRUE;
default:
break;
}
if (!controld_globals.transition_graph->complete) {
enum pcmk__graph_status graph_rc;
int orig_limit = controld_globals.transition_graph->batch_limit;
int throttled_limit = throttle_get_total_job_limit(orig_limit);
controld_globals.transition_graph->batch_limit = throttled_limit;
graph_rc = pcmk__execute_graph(controld_globals.transition_graph);
controld_globals.transition_graph->batch_limit = orig_limit;
if (graph_rc == pcmk__graph_active) {
crm_trace("Transition not yet complete");
return TRUE;
} else if (graph_rc == pcmk__graph_pending) {
crm_trace("Transition not yet complete - no actions fired");
return TRUE;
}
if (graph_rc != pcmk__graph_complete) {
crm_warn("Transition failed: %s",
pcmk__graph_status2text(graph_rc));
pcmk__log_graph(LOG_NOTICE, controld_globals.transition_graph);
}
}
crm_debug("Transition %d is now complete",
controld_globals.transition_graph->id);
controld_globals.transition_graph->complete = true;
notify_crmd(controld_globals.transition_graph);
return TRUE;
}
/*!
* \internal
* \brief Initialize transition trigger
*/
void
controld_init_transition_trigger(void)
{
transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger,
NULL);
}
/*!
* \internal
* \brief Destroy transition trigger
*/
void
controld_destroy_transition_trigger(void)
{
mainloop_destroy_trigger(transition_trigger);
transition_trigger = NULL;
}
void
controld_trigger_graph_as(const char *fn, int line)
{
crm_trace("%s:%d - Triggered graph processing", fn, line);
mainloop_set_trigger(transition_trigger);
}
static struct abort_timer_s {
bool aborted;
guint id;
int priority;
enum pcmk__graph_next action;
const char *text;
} abort_timer = { 0, };
static gboolean
abort_timer_popped(gpointer data)
{
struct abort_timer_s *abort_timer = (struct abort_timer_s *) data;
if (AM_I_DC && (abort_timer->aborted == FALSE)) {
abort_transition(abort_timer->priority, abort_timer->action,
abort_timer->text, NULL);
}
abort_timer->id = 0;
return FALSE; // do not immediately reschedule timer
}
/*!
* \internal
* \brief Abort transition after delay, if not already aborted in that time
*
* \param[in] abort_text Must be literal string
*/
void
abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action,
const char *abort_text, guint delay_ms)
{
if (abort_timer.id) {
// Timer already in progress, stop and reschedule
g_source_remove(abort_timer.id);
}
abort_timer.aborted = FALSE;
abort_timer.priority = abort_priority;
abort_timer.action = abort_action;
abort_timer.text = abort_text;
abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, &abort_timer);
}
static void
free_node_pending_timer(gpointer data)
{
struct abort_timer_s *node_pending_timer = (struct abort_timer_s *) data;
if (node_pending_timer->id != 0) {
g_source_remove(node_pending_timer->id);
node_pending_timer->id = 0;
}
free(node_pending_timer);
}
static gboolean
node_pending_timer_popped(gpointer key)
{
struct abort_timer_s *node_pending_timer = NULL;
if (node_pending_timers == NULL) {
return FALSE;
}
node_pending_timer = g_hash_table_lookup(node_pending_timers, key);
if (node_pending_timer == NULL) {
return FALSE;
}
crm_warn("Node with " PCMK_XA_ID " '%s' pending timed out (%us) "
"on joining the process group",
(const char *) key, controld_globals.node_pending_timeout);
if (controld_globals.node_pending_timeout > 0) {
abort_timer_popped(node_pending_timer);
}
g_hash_table_remove(node_pending_timers, key);
return FALSE; // do not reschedule timer
}
static void
init_node_pending_timer(const crm_node_t *node, guint timeout)
{
struct abort_timer_s *node_pending_timer = NULL;
char *key = NULL;
if (node->uuid == NULL) {
return;
}
if (node_pending_timers == NULL) {
node_pending_timers = pcmk__strikey_table(free,
free_node_pending_timer);
// The timer is somehow already existing
} else if (g_hash_table_lookup(node_pending_timers, node->uuid) != NULL) {
return;
}
crm_notice("Waiting for pending %s with " PCMK_XA_ID " '%s' "
"to join the process group (timeout=%us)",
node->uname ? node->uname : "node", node->uuid,
controld_globals.node_pending_timeout);
key = pcmk__str_copy(node->uuid);
node_pending_timer = pcmk__assert_alloc(1, sizeof(struct abort_timer_s));
node_pending_timer->aborted = FALSE;
node_pending_timer->priority = PCMK_SCORE_INFINITY;
node_pending_timer->action = pcmk__graph_restart;
node_pending_timer->text = "Node pending timed out";
g_hash_table_replace(node_pending_timers, key, node_pending_timer);
node_pending_timer->id = g_timeout_add_seconds(timeout,
node_pending_timer_popped,
key);
CRM_ASSERT(node_pending_timer->id != 0);
}
static void
remove_node_pending_timer(const char *node_uuid)
{
if (node_pending_timers == NULL) {
return;
}
g_hash_table_remove(node_pending_timers, node_uuid);
}
void
controld_node_pending_timer(const crm_node_t *node)
{
long long remaining_timeout = 0;
/* If the node is not an active cluster node, is leaving the cluster, or is
* already part of CPG, or PCMK_OPT_NODE_PENDING_TIMEOUT is disabled, free
* any node pending timer for it.
*/
if (pcmk_is_set(node->flags, crm_remote_node)
|| (node->when_member <= 1) || (node->when_online > 0)
|| (controld_globals.node_pending_timeout == 0)) {
remove_node_pending_timer(node->uuid);
return;
}
// Node is a cluster member but offline in CPG
remaining_timeout = node->when_member - time(NULL)
+ controld_globals.node_pending_timeout;
/* It already passed node pending timeout somehow.
* Free any node pending timer of it.
*/
if (remaining_timeout <= 0) {
remove_node_pending_timer(node->uuid);
return;
}
init_node_pending_timer(node, remaining_timeout);
}
void
controld_free_node_pending_timers(void)
{
if (node_pending_timers == NULL) {
return;
}
g_hash_table_destroy(node_pending_timers);
node_pending_timers = NULL;
}
static const char *
abort2text(enum pcmk__graph_next abort_action)
{
switch (abort_action) {
case pcmk__graph_done: return "done";
case pcmk__graph_wait: return "stop";
case pcmk__graph_restart: return "restart";
case pcmk__graph_shutdown: return "shutdown";
}
return "unknown";
}
static bool
update_abort_priority(pcmk__graph_t *graph, int priority,
enum pcmk__graph_next action, const char *abort_reason)
{
bool change = FALSE;
if (graph == NULL) {
return change;
}
if (graph->abort_priority < priority) {
crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority);
graph->abort_priority = priority;
if (graph->abort_reason != NULL) {
crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason);
}
graph->abort_reason = abort_reason;
change = TRUE;
}
if (graph->completion_action < action) {
crm_debug("Abort action %s superseded by %s: %s",
abort2text(graph->completion_action), abort2text(action), abort_reason);
graph->completion_action = action;
change = TRUE;
}
return change;
}
void
abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action,
const char *abort_text, const xmlNode *reason,
const char *fn, int line)
{
int add[] = { 0, 0, 0 };
int del[] = { 0, 0, 0 };
int level = LOG_INFO;
const xmlNode *diff = NULL;
const xmlNode *change = NULL;
CRM_CHECK(controld_globals.transition_graph != NULL, return);
switch (controld_globals.fsa_state) {
case S_STARTING:
case S_PENDING:
case S_NOT_DC:
case S_HALT:
case S_ILLEGAL:
case S_STOPPING:
case S_TERMINATE:
crm_info("Abort %s suppressed: state=%s (%scomplete)",
abort_text, fsa_state2string(controld_globals.fsa_state),
(controld_globals.transition_graph->complete? "" : "in"));
return;
default:
break;
}
abort_timer.aborted = TRUE;
controld_expect_sched_reply(NULL);
if (!controld_globals.transition_graph->complete
&& update_abort_priority(controld_globals.transition_graph,
abort_priority, abort_action,
abort_text)) {
level = LOG_NOTICE;
}
if (reason != NULL) {
const xmlNode *search = NULL;
for(search = reason; search; search = search->parent) {
if (pcmk__xe_is(search, PCMK_XE_DIFF)) {
diff = search;
break;
}
}
if(diff) {
xml_patch_versions(diff, add, del);
for(search = reason; search; search = search->parent) {
if (pcmk__xe_is(search, PCMK_XE_CHANGE)) {
change = search;
break;
}
}
}
}
if (reason == NULL) {
do_crm_log(level,
"Transition %d aborted: %s " CRM_XS " source=%s:%d "
"complete=%s", controld_globals.transition_graph->id,
abort_text, fn, line,
pcmk__btoa(controld_globals.transition_graph->complete));
} else if(change == NULL) {
GString *local_path = pcmk__element_xpath(reason);
CRM_ASSERT(local_path != NULL);
do_crm_log(level, "Transition %d aborted by %s.%s: %s "
CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
controld_globals.transition_graph->id, reason->name,
pcmk__xe_id(reason), abort_text, add[0], add[1], add[2], fn,
line, (const char *) local_path->str,
pcmk__btoa(controld_globals.transition_graph->complete));
g_string_free(local_path, TRUE);
} else {
const char *op = crm_element_value(change, PCMK_XA_OPERATION);
const char *path = crm_element_value(change, PCMK_XA_PATH);
if(change == reason) {
if (strcmp(op, PCMK_VALUE_CREATE) == 0) {
reason = reason->children;
} else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) {
reason = pcmk__xe_first_child(reason, PCMK_XE_CHANGE_RESULT,
NULL, NULL);
if(reason) {
reason = reason->children;
}
}
CRM_CHECK(reason != NULL, goto done);
}
if (strcmp(op, PCMK_VALUE_DELETE) == 0) {
const char *shortpath = strrchr(path, '/');
do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
controld_globals.transition_graph->id,
(shortpath? (shortpath + 1) : path), abort_text,
add[0], add[1], add[2], fn, line, path,
pcmk__btoa(controld_globals.transition_graph->complete));
} else if (pcmk__xe_is(reason, PCMK_XE_NVPAIR)) {
do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
controld_globals.transition_graph->id,
crm_element_value(reason, PCMK_XA_ID), op,
crm_element_value(reason, PCMK_XA_NAME),
crm_element_value(reason, PCMK_XA_VALUE),
abort_text, add[0], add[1], add[2], fn, line, path,
pcmk__btoa(controld_globals.transition_graph->complete));
} else if (pcmk__xe_is(reason, PCMK__XE_LRM_RSC_OP)) {
const char *magic = crm_element_value(reason,
PCMK__XA_TRANSITION_MAGIC);
do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
controld_globals.transition_graph->id,
crm_element_value(reason, PCMK__XA_OPERATION_KEY), op,
crm_element_value(reason, PCMK__META_ON_NODE),
abort_text,
magic, add[0], add[1], add[2], fn, line,
pcmk__btoa(controld_globals.transition_graph->complete));
} else if (pcmk__str_any_of((const char *) reason->name,
PCMK__XE_NODE_STATE, PCMK_XE_NODE, NULL)) {
- const char *uname = crm_peer_uname(pcmk__xe_id(reason));
+ const char *uname = pcmk__node_name_from_uuid(pcmk__xe_id(reason));
do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s",
controld_globals.transition_graph->id,
reason->name, op, pcmk__s(uname, pcmk__xe_id(reason)),
abort_text, add[0], add[1], add[2], fn, line,
pcmk__btoa(controld_globals.transition_graph->complete));
} else {
const char *id = pcmk__xe_id(reason);
do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
controld_globals.transition_graph->id,
reason->name, pcmk__s(id, ""), pcmk__s(op, "change"),
abort_text, add[0], add[1], add[2], fn, line, path,
pcmk__btoa(controld_globals.transition_graph->complete));
}
}
done:
if (controld_globals.transition_graph->complete) {
if (controld_get_period_transition_timer() > 0) {
controld_stop_transition_timer();
controld_start_transition_timer();
} else {
register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
}
return;
}
trigger_graph();
}
diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
index a233e8471b..fc24c77310 100644
--- a/include/crm/cluster/internal.h
+++ b/include/crm/cluster/internal.h
@@ -1,196 +1,197 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_CLUSTER_INTERNAL__H
# define PCMK__CRM_CLUSTER_INTERNAL__H
# include <stdbool.h>
# include <stdint.h> // uint32_t, uint64_t
# include <glib.h> // gboolean
# include <crm/cluster.h>
enum crm_proc_flag {
/* @COMPAT When crm_node_t:processes is made internal, we can merge this
* into node flags or turn it into a boolean. Until then, in theory
* something could depend on these particular numeric values.
*/
crm_proc_none = 0x00000001,
// Cluster layers
crm_proc_cpg = 0x04000000,
};
// Used with node cache search functions
enum pcmk__node_search_flags {
//! Does not affect search
pcmk__node_search_none = 0,
//! Search for cluster nodes from membership cache
pcmk__node_search_cluster_member = (1 << 0),
//! Search for remote nodes
pcmk__node_search_remote = (1 << 1),
//! Search for cluster member nodes and remote nodes
pcmk__node_search_any = pcmk__node_search_cluster_member
|pcmk__node_search_remote,
/* @COMPAT The values before this must stay the same until we can drop
* support for enum crm_get_peer_flags
*/
//! Search for cluster nodes from CIB (as of last cache refresh)
pcmk__node_search_cluster_cib = (1 << 2),
};
/*!
* \internal
* \brief Return the process bit corresponding to the current cluster stack
*
* \return Process flag if detectable, otherwise 0
*/
static inline uint32_t
crm_get_cluster_proc(void)
{
switch (pcmk_get_cluster_layer()) {
case pcmk_cluster_layer_corosync:
return crm_proc_cpg;
default:
break;
}
return crm_proc_none;
}
/*!
* \internal
* \brief Get log-friendly string description of a Corosync return code
*
* \param[in] error Corosync return code
*
* \return Log-friendly string description corresponding to \p error
*/
static inline const char *
pcmk__cs_err_str(int error)
{
# if SUPPORT_COROSYNC
switch (error) {
case CS_OK: return "OK";
case CS_ERR_LIBRARY: return "Library error";
case CS_ERR_VERSION: return "Version error";
case CS_ERR_INIT: return "Initialization error";
case CS_ERR_TIMEOUT: return "Timeout";
case CS_ERR_TRY_AGAIN: return "Try again";
case CS_ERR_INVALID_PARAM: return "Invalid parameter";
case CS_ERR_NO_MEMORY: return "No memory";
case CS_ERR_BAD_HANDLE: return "Bad handle";
case CS_ERR_BUSY: return "Busy";
case CS_ERR_ACCESS: return "Access error";
case CS_ERR_NOT_EXIST: return "Doesn't exist";
case CS_ERR_NAME_TOO_LONG: return "Name too long";
case CS_ERR_EXIST: return "Exists";
case CS_ERR_NO_SPACE: return "No space";
case CS_ERR_INTERRUPT: return "Interrupt";
case CS_ERR_NAME_NOT_FOUND: return "Name not found";
case CS_ERR_NO_RESOURCES: return "No resources";
case CS_ERR_NOT_SUPPORTED: return "Not supported";
case CS_ERR_BAD_OPERATION: return "Bad operation";
case CS_ERR_FAILED_OPERATION: return "Failed operation";
case CS_ERR_MESSAGE_ERROR: return "Message error";
case CS_ERR_QUEUE_FULL: return "Queue full";
case CS_ERR_QUEUE_NOT_AVAILABLE: return "Queue not available";
case CS_ERR_BAD_FLAGS: return "Bad flags";
case CS_ERR_TOO_BIG: return "Too big";
case CS_ERR_NO_SECTIONS: return "No sections";
}
# endif
return "Corosync error";
}
# if SUPPORT_COROSYNC
#if 0
/* This is the new way to do it, but we still support all Corosync 2 versions,
* and this isn't always available. A better alternative here would be to check
* for support in the configure script and enable this conditionally.
*/
#define pcmk__init_cmap(handle) cmap_initialize_map((handle), CMAP_MAP_ICMAP)
#else
#define pcmk__init_cmap(handle) cmap_initialize(handle)
#endif
char *pcmk__corosync_cluster_name(void);
bool pcmk__corosync_add_nodes(xmlNode *xml_parent);
void pcmk__cpg_confchg_cb(cpg_handle_t handle,
const struct cpg_name *group_name,
const struct cpg_address *member_list,
size_t member_list_entries,
const struct cpg_address *left_list,
size_t left_list_entries,
const struct cpg_address *joined_list,
size_t joined_list_entries);
char *pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id,
uint32_t pid, void *content, uint32_t *kind,
const char **from);
# endif
const char *pcmk__cluster_node_uuid(crm_node_t *node);
char *pcmk__cluster_node_name(uint32_t nodeid);
const char *pcmk__cluster_local_node_name(void);
+const char *pcmk__node_name_from_uuid(const char *uuid);
crm_node_t *crm_update_peer_proc(const char *source, crm_node_t * peer,
uint32_t flag, const char *status);
crm_node_t *pcmk__update_peer_state(const char *source, crm_node_t *node,
const char *state, uint64_t membership);
void pcmk__update_peer_expected(const char *source, crm_node_t *node,
const char *expected);
void pcmk__reap_unseen_nodes(uint64_t ring_id);
void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
gboolean),
void (*destroy) (gpointer));
enum crm_ais_msg_types pcmk__cluster_parse_msg_type(const char *text);
bool pcmk__cluster_send_message(const crm_node_t *node,
enum crm_ais_msg_types service,
const xmlNode *data);
// Membership
void pcmk__cluster_init_node_caches(void);
void pcmk__cluster_destroy_node_caches(void);
void pcmk__cluster_set_autoreap(bool enable);
void pcmk__cluster_set_status_callback(void (*dispatch)(enum crm_status_type,
crm_node_t *,
const void *));
bool pcmk__cluster_is_node_active(const crm_node_t *node);
unsigned int pcmk__cluster_num_active_nodes(void);
unsigned int pcmk__cluster_num_remote_nodes(void);
crm_node_t *pcmk__cluster_lookup_remote_node(const char *node_name);
void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name);
void pcmk__cluster_forget_remote_node(const char *node_name);
crm_node_t *pcmk__search_node_caches(unsigned int id, const char *uname,
uint32_t flags);
void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id);
void pcmk__refresh_node_caches_from_cib(xmlNode *cib);
crm_node_t *pcmk__get_node(unsigned int id, const char *uname,
const char *uuid, uint32_t flags);
#endif // PCMK__CRM_CLUSTER_INTERNAL__H
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index eb0e25f578..5360e11bed 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -1,566 +1,592 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <dlfcn.h>
#include <inttypes.h> // PRIu32
#include <stdbool.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/utsname.h> // uname()
#include <glib.h> // gboolean
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include "crmcluster_private.h"
CRM_TRACE_INIT_DATA(cluster);
/*!
* \internal
* \brief Get the message type equivalent of a string
*
* \param[in] text String of message type
*
* \return Message type equivalent of \p text
*/
enum crm_ais_msg_types
pcmk__cluster_parse_msg_type(const char *text)
{
int rc = 0;
int type = crm_msg_none;
CRM_CHECK(text != NULL, return crm_msg_none);
text = pcmk__message_name(text);
if (pcmk__str_eq(text, "ais", pcmk__str_none)) {
return crm_msg_ais;
}
if (pcmk__str_eq(text, CRM_SYSTEM_CIB, pcmk__str_none)) {
return crm_msg_cib;
}
if (pcmk__str_any_of(text, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL)) {
return crm_msg_crmd;
}
if (pcmk__str_eq(text, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
return crm_msg_te;
}
if (pcmk__str_eq(text, CRM_SYSTEM_PENGINE, pcmk__str_none)) {
return crm_msg_pe;
}
if (pcmk__str_eq(text, CRM_SYSTEM_LRMD, pcmk__str_none)) {
return crm_msg_lrmd;
}
if (pcmk__str_eq(text, CRM_SYSTEM_STONITHD, pcmk__str_none)) {
return crm_msg_stonithd;
}
if (pcmk__str_eq(text, "stonith-ng", pcmk__str_none)) {
return crm_msg_stonith_ng;
}
if (pcmk__str_eq(text, "attrd", pcmk__str_none)) {
return crm_msg_attrd;
}
/* This will normally be a transient client rather than a cluster daemon.
* Set the type to the pid of the client.
*
* @TODO Check whether this is necessary and correct.
*/
rc = sscanf(text, "%d", &type);
if ((rc != 1) || (type <= crm_msg_stonith_ng)) {
// Ensure it's sane; don't falsely return a standard message type
type = crm_msg_none;
}
return type;
}
/*!
* \internal
* \brief Get a node's cluster-layer UUID, setting it if not already set
*
* \param[in,out] node Node to check
*
* \return Cluster-layer node UUID of \p node, or \c NULL if unknown
*/
const char *
pcmk__cluster_node_uuid(crm_node_t *node)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
if (node == NULL) {
return NULL;
}
if (node->uuid != NULL) {
return node->uuid;
}
switch (cluster_layer) {
#if SUPPORT_COROSYNC
case pcmk_cluster_layer_corosync:
node->uuid = pcmk__corosync_uuid(node);
return node->uuid;
#endif // SUPPORT_COROSYNC
default:
crm_err("Unsupported cluster layer %s",
pcmk_cluster_layer_text(cluster_layer));
return NULL;
}
}
/*!
* \internal
* \brief Connect to the cluster layer
*
* \param[in,out] cluster Initialized cluster object to connect
*
* \return Standard Pacemaker return code
*/
int
pcmk_cluster_connect(pcmk_cluster_t *cluster)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
crm_notice("Connecting to %s cluster layer", cluster_layer_s);
switch (cluster_layer) {
#if SUPPORT_COROSYNC
case pcmk_cluster_layer_corosync:
return pcmk__corosync_connect(cluster);
#endif // SUPPORT_COROSYNC
default:
break;
}
crm_err("Failed to connect to unsupported cluster layer %s",
cluster_layer_s);
return EPROTONOSUPPORT;
}
/*!
* \brief Disconnect from the cluster layer
*
* \param[in,out] cluster Cluster object to disconnect
*
* \return Standard Pacemaker return code
*/
int
pcmk_cluster_disconnect(pcmk_cluster_t *cluster)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
crm_info("Disconnecting from %s cluster layer", cluster_layer_s);
switch (cluster_layer) {
#if SUPPORT_COROSYNC
case pcmk_cluster_layer_corosync:
pcmk__corosync_disconnect(cluster);
pcmk__cluster_destroy_node_caches();
return pcmk_rc_ok;
#endif // SUPPORT_COROSYNC
default:
break;
}
crm_err("Failed to disconnect from unsupported cluster layer %s",
cluster_layer_s);
return EPROTONOSUPPORT;
}
/*!
* \brief Allocate a new \p pcmk_cluster_t object
*
* \return A newly allocated \p pcmk_cluster_t object (guaranteed not \c NULL)
* \note The caller is responsible for freeing the return value using
* \p pcmk_cluster_free().
*/
pcmk_cluster_t *
pcmk_cluster_new(void)
{
return (pcmk_cluster_t *) pcmk__assert_alloc(1, sizeof(pcmk_cluster_t));
}
/*!
* \brief Free a \p pcmk_cluster_t object and its dynamically allocated members
*
* \param[in,out] cluster Cluster object to free
*/
void
pcmk_cluster_free(pcmk_cluster_t *cluster)
{
if (cluster == NULL) {
return;
}
free(cluster->uuid);
free(cluster->uname);
free(cluster);
}
/*!
* \brief Set the destroy function for a cluster object
*
* \param[in,out] cluster Cluster object
* \param[in] fn Destroy function to set
*
* \return Standard Pacemaker return code
*/
int
pcmk_cluster_set_destroy_fn(pcmk_cluster_t *cluster, void (*fn)(gpointer))
{
if (cluster == NULL) {
return EINVAL;
}
cluster->destroy = fn;
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Send an XML message via the cluster messaging layer
*
* \param[in] node Cluster node to send message to
* \param[in] service Message type to use in message host info
* \param[in] data XML message to send
*
* \return \c true on success, or \c false otherwise
*/
bool
pcmk__cluster_send_message(const crm_node_t *node,
enum crm_ais_msg_types service, const xmlNode *data)
{
// @TODO Return standard Pacemaker return code
switch (pcmk_get_cluster_layer()) {
#if SUPPORT_COROSYNC
case pcmk_cluster_layer_corosync:
return pcmk__cpg_send_xml(data, node, service);
#endif // SUPPORT_COROSYNC
default:
break;
}
return false;
}
/*!
* \internal
* \brief Get the node name corresponding to a cluster-layer node ID
*
* Get the node name from the cluster layer if possible. Otherwise, if for the
* local node, call \c uname() and get the \c nodename member from the
* <tt>struct utsname</tt> object.
*
* \param[in] nodeid Node ID to check (or 0 for the local node)
*
* \return Node name corresponding to \p nodeid
*
* \note This will fatally exit if \c uname() fails to get the local node name
* or we run out of memory.
* \note The caller is responsible for freeing the return value using \c free().
*/
char *
pcmk__cluster_node_name(uint32_t nodeid)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
switch (cluster_layer) {
#if SUPPORT_COROSYNC
case pcmk_cluster_layer_corosync:
return pcmk__corosync_name(0, nodeid);
#else
break;
#endif // SUPPORT_COROSYNC
default:
crm_err("Unsupported cluster layer: %s", cluster_layer_s);
break;
}
if (nodeid == 0) {
struct utsname hostinfo;
crm_notice("Could not get local node name from %s cluster layer, "
"defaulting to local hostname",
cluster_layer_s);
if (uname(&hostinfo) < 0) {
// @TODO Maybe let the caller decide what to do
crm_err("Failed to get the local hostname");
crm_exit(CRM_EX_FATAL);
}
return pcmk__str_copy(hostinfo.nodename);
}
crm_notice("Could not obtain a node name for node with "
PCMK_XA_ID "=" PRIu32,
nodeid);
return NULL;
}
/*!
* \internal
* \brief Get the local node's cluster-layer node name
*
* If getting the node name from the cluster layer is impossible, call
* \c uname() and get the \c nodename member from the <tt>struct utsname</tt>
* object.
*
* \return Local node's name
*
* \note This will fatally exit if \c uname() fails to get the local node name
* or we run out of memory.
*/
const char *
pcmk__cluster_local_node_name(void)
{
// @TODO Refactor to avoid trivially leaking name at exit
static char *name = NULL;
if (name == NULL) {
name = pcmk__cluster_node_name(0);
}
return name;
}
/*!
- * \brief Get the node name corresponding to a node UUID
+ * \internal
+ * \brief Get the node name corresonding to a node UUID
*
- * \param[in] uuid UUID of desired node
+ * Look for the UUID in both the remote node cache and the cluster member cache.
+ * For a Corosync cluster, if no cache entry is found, treat the UUID as a
+ * cluster-layer ID and try again.
*
- * \return name of desired node
+ * \param[in] uuid UUID to search for
*
- * \note This relies on the remote peer cache being populated with all
- * remote nodes in the cluster, so callers should maintain that cache.
+ * \return Node name corresponding to \p uuid if found, or \c NULL otherwise
*/
const char *
-crm_peer_uname(const char *uuid)
+pcmk__node_name_from_uuid(const char *uuid)
{
+ /* @TODO There are too many functions in libcrmcluster that look up a node
+ * from the node caches (possibly creating a cache entry if none exists).
+ * There are at least the following:
+ * * pcmk__cluster_lookup_remote_node()
+ * * pcmk__get_node()
+ * * pcmk__node_name_from_uuid()
+ * * pcmk__search_node_caches()
+ *
+ * There's a lot of duplication among them, but they all do slightly
+ * different things. We should try to clean them up and consolidate them to
+ * the extent possible, likely with new helper functions.
+ */
GHashTableIter iter;
crm_node_t *node = NULL;
CRM_CHECK(uuid != NULL, return NULL);
- /* remote nodes have the same uname and uuid */
+ // Remote nodes have the same uname and uuid
if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
return uuid;
}
- /* avoid blocking calls where possible */
+ // Avoid blocking calls where possible
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
- if (node->uname != NULL) {
- return node->uname;
- }
- break;
+ return node->uname;
}
}
- node = NULL;
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
- long long id;
+ long long id = 0;
if ((pcmk__scan_ll(uuid, &id, 0LL) != pcmk_rc_ok)
|| (id < 1LL) || (id > UINT32_MAX)) {
+
crm_err("Invalid Corosync node ID '%s'", uuid);
return NULL;
}
node = pcmk__search_node_caches((uint32_t) id, NULL,
pcmk__node_search_cluster_member);
if (node != NULL) {
crm_info("Setting uuid for node %s[%u] to %s",
node->uname, node->id, uuid);
- node->uuid = strdup(uuid);
+ node->uuid = pcmk__str_copy(uuid);
return node->uname;
}
- return NULL;
}
return NULL;
}
+/*!
+ * \brief Get the node name corresponding to a node UUID
+ *
+ * \param[in] uuid UUID of desired node
+ *
+ * \return name of desired node
+ *
+ * \note This relies on the remote peer cache being populated with all
+ * remote nodes in the cluster, so callers should maintain that cache.
+ */
+const char *
+crm_peer_uname(const char *uuid)
+{
+ return pcmk__node_name_from_uuid(uuid);
+}
+
/*!
* \brief Get a log-friendly string equivalent of a cluster layer
*
* \param[in] layer Cluster layer
*
* \return Log-friendly string corresponding to \p layer
*/
const char *
pcmk_cluster_layer_text(enum pcmk_cluster_layer layer)
{
switch (layer) {
case pcmk_cluster_layer_corosync:
return "corosync";
case pcmk_cluster_layer_unknown:
return "unknown";
case pcmk_cluster_layer_invalid:
return "invalid";
default:
crm_err("Invalid cluster layer: %d", layer);
return "invalid";
}
}
/*!
* \brief Get and validate the local cluster layer
*
* If a cluster layer is not configured via the \c PCMK__ENV_CLUSTER_TYPE local
* option, this will try to detect an active cluster from among the supported
* cluster layers.
*
* \return Local cluster layer
*
* \note This will fatally exit if the configured cluster layer is invalid.
*/
enum pcmk_cluster_layer
pcmk_get_cluster_layer(void)
{
static enum pcmk_cluster_layer cluster_layer = pcmk_cluster_layer_unknown;
const char *cluster = NULL;
// Cluster layer is stable once set
if (cluster_layer != pcmk_cluster_layer_unknown) {
return cluster_layer;
}
cluster = pcmk__env_option(PCMK__ENV_CLUSTER_TYPE);
if (cluster != NULL) {
crm_info("Verifying configured cluster layer '%s'", cluster);
cluster_layer = pcmk_cluster_layer_invalid;
#if SUPPORT_COROSYNC
if (pcmk__str_eq(cluster, PCMK_VALUE_COROSYNC, pcmk__str_casei)) {
cluster_layer = pcmk_cluster_layer_corosync;
}
#endif // SUPPORT_COROSYNC
if (cluster_layer == pcmk_cluster_layer_invalid) {
crm_notice("This installation does not support the '%s' cluster "
"infrastructure: terminating",
cluster);
crm_exit(CRM_EX_FATAL);
}
crm_info("Assuming an active '%s' cluster", cluster);
} else {
// Nothing configured, so test supported cluster layers
#if SUPPORT_COROSYNC
crm_debug("Testing with Corosync");
if (pcmk__corosync_is_active()) {
cluster_layer = pcmk_cluster_layer_corosync;
}
#endif // SUPPORT_COROSYNC
if (cluster_layer == pcmk_cluster_layer_unknown) {
crm_notice("Could not determine the current cluster layer");
} else {
crm_info("Detected an active '%s' cluster",
pcmk_cluster_layer_text(cluster_layer));
}
}
return cluster_layer;
}
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
#include <crm/cluster/compat.h>
void
set_uuid(xmlNode *xml, const char *attr, crm_node_t *node)
{
crm_xml_add(xml, attr, pcmk__cluster_node_uuid(node));
}
gboolean
crm_cluster_connect(pcmk_cluster_t *cluster)
{
return pcmk_cluster_connect(cluster) == pcmk_rc_ok;
}
void
crm_cluster_disconnect(pcmk_cluster_t *cluster)
{
pcmk_cluster_disconnect(cluster);
}
const char *
name_for_cluster_type(enum cluster_type_e type)
{
switch (type) {
case pcmk_cluster_corosync:
return "corosync";
case pcmk_cluster_unknown:
return "unknown";
case pcmk_cluster_invalid:
return "invalid";
}
crm_err("Invalid cluster type: %d", type);
return "invalid";
}
enum cluster_type_e
get_cluster_type(void)
{
return (enum cluster_type_e) pcmk_get_cluster_layer();
}
gboolean
is_corosync_cluster(void)
{
return pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync;
}
gboolean
send_cluster_message(const crm_node_t *node, enum crm_ais_msg_types service,
const xmlNode *data, gboolean ordered)
{
return pcmk__cluster_send_message(node, service, data);
}
const char *
crm_peer_uuid(crm_node_t *peer)
{
return pcmk__cluster_node_uuid(peer);
}
char *
get_node_name(uint32_t nodeid)
{
return pcmk__cluster_node_name(nodeid);
}
const char *
get_local_node_name(void)
{
return pcmk__cluster_local_node_name();
}
// LCOV_EXCL_STOP
// End deprecated API

File Metadata

Mime Type
text/x-diff
Expires
Mon, Jul 21, 3:00 AM (1 d, 11 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2082978
Default Alt Text
(63 KB)

Event Timeline