Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 7b7a2f05b7..d352d65cfe 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -1,1051 +1,1052 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <pacemaker-controld.h>
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
/*
* stonith failure counting
*
* We don't want to get stuck in a permanent fencing loop. Keep track of the
* number of fencing failures for each target node, and the most we'll restart a
* transition for.
*/
struct st_fail_rec {
int count;
};
static bool fence_reaction_panic = FALSE;
static unsigned long int stonith_max_attempts = 10;
static GHashTable *stonith_failures = NULL;
void
update_stonith_max_attempts(const char *value)
{
stonith_max_attempts = char2score(value);
if (stonith_max_attempts < 1UL) {
stonith_max_attempts = 10UL;
}
}
void
set_fence_reaction(const char *reaction_s)
{
if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
fence_reaction_panic = TRUE;
} else {
if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) {
crm_warn("Invalid value '%s' for %s, using 'stop'",
reaction_s, XML_CONFIG_ATTR_FENCE_REACTION);
}
fence_reaction_panic = FALSE;
}
}
static gboolean
too_many_st_failures(const char *target)
{
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *value = NULL;
if (stonith_failures == NULL) {
return FALSE;
}
if (target == NULL) {
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
if (value->count >= stonith_max_attempts) {
target = (const char*)key;
goto too_many;
}
}
} else {
value = g_hash_table_lookup(stonith_failures, target);
if ((value != NULL) && (value->count >= stonith_max_attempts)) {
goto too_many;
}
}
return FALSE;
too_many:
crm_warn("Too many failures (%d) to fence %s, giving up",
value->count, target);
return TRUE;
}
/*!
* \internal
* \brief Reset a stonith fail count
*
* \param[in] target Name of node to reset, or NULL for all
*/
void
st_fail_count_reset(const char *target)
{
if (stonith_failures == NULL) {
return;
}
if (target) {
struct st_fail_rec *rec = NULL;
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count = 0;
}
} else {
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *rec = NULL;
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rec)) {
rec->count = 0;
}
}
}
static void
st_fail_count_increment(const char *target)
{
struct st_fail_rec *rec = NULL;
if (stonith_failures == NULL) {
stonith_failures = pcmk__strkey_table(free, free);
}
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count++;
} else {
rec = malloc(sizeof(struct st_fail_rec));
if(rec == NULL) {
return;
}
rec->count = 1;
g_hash_table_insert(stonith_failures, strdup(target), rec);
}
}
/* end stonith fail count functions */
static void
cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Fencing update %d for %s: failed - %s (%d)",
call_id, (char *)user_data, pcmk_strerror(rc), rc);
crm_log_xml_warn(msg, "Failed update");
abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL);
} else {
crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
}
}
static void
send_stonith_update(crm_action_t *action, const char *target, const char *uuid)
{
int rc = pcmk_ok;
crm_node_t *peer = NULL;
/* We (usually) rely on the membership layer to do node_update_cluster,
* and the peer status callback to do node_update_peer, because the node
* might have already rejoined before we get the stonith result here.
*/
int flags = node_update_join | node_update_expected;
/* zero out the node-status & remove all LRM status info */
xmlNode *node_state = NULL;
CRM_CHECK(target != NULL, return);
CRM_CHECK(uuid != NULL, return);
/* Make sure the membership and join caches are accurate */
peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);
CRM_CHECK(peer != NULL, return);
if (peer->state == NULL) {
/* Usually, we rely on the membership layer to update the cluster state
* in the CIB. However, if the node has never been seen, do it here, so
* the node is not considered unclean.
*/
flags |= node_update_cluster;
}
if (peer->uuid == NULL) {
crm_info("Recording uuid '%s' for node '%s'", uuid, target);
peer->uuid = strdup(uuid);
}
crmd_peer_down(peer, TRUE);
/* Generate a node state update for the CIB */
node_state = create_node_state_update(peer, flags, NULL, __func__);
/* we have to mark whether or not remote nodes have already been fenced */
if (peer->flags & crm_remote_node) {
char *now_s = pcmk__ttoa(time(NULL));
crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
free(now_s);
}
/* Force our known ID */
crm_xml_add(node_state, XML_ATTR_UUID, uuid);
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
cib_quorum_override | cib_scope_local | cib_can_create);
/* Delay processing the trigger until the update completes */
crm_debug("Sending fencing update %d for %s", rc, target);
fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);
/* Make sure it sticks */
/* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */
controld_delete_node_state(peer->uname, controld_section_all,
cib_scope_local);
free_xml(node_state);
return;
}
/*!
* \internal
* \brief Abort transition due to stonith failure
*
* \param[in] abort_action Whether to restart or stop transition
* \param[in] target Don't restart if this (NULL for any) has too many failures
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
*/
static void
abort_for_stonith_failure(enum transition_action abort_action,
const char *target, xmlNode *reason)
{
/* If stonith repeatedly fails, we eventually give up on starting a new
* transition for that reason.
*/
if ((abort_action != tg_stop) && too_many_st_failures(target)) {
abort_action = tg_stop;
}
abort_transition(INFINITY, abort_action, "Stonith failed", reason);
}
/*
* stonith cleanup list
*
* If the DC is shot, proper notifications might not go out.
* The stonith cleanup list allows the cluster to (re-)send
* notifications once a new DC is elected.
*/
static GList *stonith_cleanup_list = NULL;
/*!
* \internal
* \brief Add a node to the stonith cleanup list
*
* \param[in] target Name of node to add
*/
void
add_stonith_cleanup(const char *target) {
stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
}
/*!
* \internal
* \brief Remove a node from the stonith cleanup list
*
* \param[in] Name of node to remove
*/
void
remove_stonith_cleanup(const char *target)
{
GList *iter = stonith_cleanup_list;
while (iter != NULL) {
GList *tmp = iter;
char *iter_name = tmp->data;
iter = iter->next;
if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
crm_trace("Removing %s from the cleanup list", iter_name);
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
free(iter_name);
}
}
}
/*!
* \internal
* \brief Purge all entries from the stonith cleanup list
*/
void
purge_stonith_cleanup()
{
if (stonith_cleanup_list) {
GList *iter = NULL;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
}
/*!
* \internal
* \brief Send stonith updates for all entries in cleanup list, then purge it
*/
void
execute_stonith_cleanup()
{
GList *iter;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_node_t *target_node = crm_get_peer(0, target);
const char *uuid = crm_peer_uuid(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
send_stonith_update(NULL, target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
/* end stonith cleanup list functions */
/* stonith API client
*
* Functions that need to interact directly with the fencer via its API
*/
static stonith_t *stonith_api = NULL;
static crm_trigger_t *stonith_reconnect = NULL;
static char *te_client_id = NULL;
static gboolean
fail_incompletable_stonith(crm_graph_t *graph)
{
GList *lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GList *lpc2 = NULL;
synapse_t *synapse = (synapse_t *) lpc->data;
if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
crm_action_t *action = (crm_action_t *) lpc2->data;
- if (action->type != action_type_crm || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ if ((action->type != pcmk__cluster_graph_action)
+ || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (task && pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
crm__set_graph_action_flags(action, pcmk__graph_action_failed);
last_action = action->xml;
pcmk__update_graph(graph, action);
crm_notice("Failing action %d (%s): fencer terminated",
action->id, ID(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("Fencer failure resulted in unrunnable actions");
abort_for_stonith_failure(tg_restart, NULL, last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
{
te_cleanup_stonith_history_sync(st, FALSE);
if (pcmk_is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_crit("Fencing daemon connection failed");
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencing daemon disconnected");
}
if (stonith_api) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(st);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (AM_I_DC) {
fail_incompletable_stonith(transition_graph);
trigger_graph();
}
}
/*!
* \internal
* \brief Handle an event notification from the fencing API
*
* \param[in] st Fencing API connection
* \param[in] event Fencing API event notification
*/
static void
handle_fence_notification(stonith_t *st, stonith_event_t *event)
{
bool succeeded = true;
const char *executioner = "the cluster";
const char *client = "a client";
const char *reason = NULL;
int exec_status;
if (te_client_id == NULL) {
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
}
if (event == NULL) {
crm_err("Notify data not found");
return;
}
if (event->executioner != NULL) {
executioner = event->executioner;
}
if (event->client_origin != NULL) {
client = event->client_origin;
}
exec_status = stonith__event_execution_status(event);
if ((stonith__event_exit_status(event) != CRM_EX_OK)
|| (exec_status != PCMK_EXEC_DONE)) {
succeeded = false;
if (exec_status == PCMK_EXEC_DONE) {
exec_status = PCMK_EXEC_ERROR;
}
}
reason = stonith__event_exit_reason(event);
crmd_alert_fencing_op(event);
if (pcmk__str_eq("on", event->action, pcmk__str_none)) {
// Unfencing doesn't need special handling, just a log message
if (succeeded) {
crm_notice("%s was unfenced by %s at the request of %s@%s",
event->target, executioner, client, event->origin);
} else {
crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
event->target, executioner,
pcmk_exec_status_str(exec_status),
((reason == NULL)? "" : ": "),
((reason == NULL)? "" : reason),
stonith__event_exit_status(event));
}
return;
}
if (succeeded
&& pcmk__str_eq(event->target, fsa_our_uname, pcmk__str_casei)) {
/* We were notified of our own fencing. Most likely, either fencing was
* misconfigured, or fabric fencing that doesn't cut cluster
* communication is in use.
*
* Either way, shutting down the local host is a good idea, to require
* administrator intervention. Also, other nodes would otherwise likely
* set our status to lost because of the fencing callback and discard
* our subsequent election votes as "not part of our cluster".
*/
crm_crit("We were allegedly just fenced by %s for %s!",
executioner, event->origin); // Dumps blackbox if enabled
if (fence_reaction_panic) {
pcmk__panic(__func__);
} else {
crm_exit(CRM_EX_FATAL);
}
return; // Should never get here
}
/* Update the count of fencing failures for this target, in case we become
* DC later. The current DC has already updated its fail count in
* tengine_stonith_callback().
*/
if (!AM_I_DC) {
if (succeeded) {
st_fail_count_reset(event->target);
} else {
st_fail_count_increment(event->target);
}
}
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
"%s%s%s%s " CRM_XS " event=%s",
event->target, (succeeded? "" : " not"),
event->action, executioner, client, event->origin,
(succeeded? "OK" : pcmk_exec_status_str(exec_status)),
((reason == NULL)? "" : " ("),
((reason == NULL)? "" : reason),
((reason == NULL)? "" : ")"),
event->id);
if (succeeded) {
crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
CRM_GET_PEER_ANY);
const char *uuid = NULL;
if (peer == NULL) {
return;
}
uuid = crm_peer_uuid(peer);
if (AM_I_DC) {
/* The DC always sends updates */
send_stonith_update(NULL, event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
* Unfortunately, the controller doesn't have a simple, reliable way
* to map hosts to guests. It might be possible to track this in the
* peer cache via crm_remote_peer_cache_refresh(). For now, we rely
* on the scheduler creating fence pseudo-events for the guests.
*/
if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
/* Abort the current transition if it wasn't the cluster that
* initiated fencing.
*/
crm_info("External fencing operation from %s fenced %s",
client, event->target);
abort_transition(INFINITY, tg_restart,
"External Fencing Operation", NULL);
}
/* Assume it was our leader if we don't currently have one */
} else if (pcmk__str_eq(fsa_our_dc, event->target,
pcmk__str_null_matches|pcmk__str_casei)
&& !pcmk_is_set(peer->flags, crm_remote_node)) {
crm_notice("Fencing target %s %s our leader",
event->target, (fsa_our_dc? "was" : "may have been"));
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (pcmk__str_eq(event->executioner, fsa_our_uname,
pcmk__str_casei)) {
send_stonith_update(NULL, event->target, uuid);
}
add_stonith_cleanup(event->target);
}
/* If the target is a remote node, and we host its connection,
* immediately fail all monitors so it can be recovered quickly.
* The connection won't necessarily drop when a remote node is fenced,
* so the failure might not otherwise be detected until the next poke.
*/
if (pcmk_is_set(peer->flags, crm_remote_node)) {
remote_ra_fail(event->target);
}
crmd_peer_down(peer, TRUE);
}
}
/*!
* \brief Connect to fencer
*
* \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
*
* \return TRUE
* \note If user_data is NULL, this will wait 2s between attempts, for up to
* 30 attempts, meaning the controller could be blocked as long as 58s.
*/
static gboolean
te_connect_stonith(gpointer user_data)
{
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
return TRUE;
}
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Already connected to fencer, no need to retry");
return TRUE;
}
if (user_data == NULL) {
// Blocking (retry failures now until successful)
rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
if (rc != pcmk_ok) {
crm_err("Could not connect to fencer in 30 attempts: %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
} else {
// Non-blocking (retry failures later in main loop)
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (rc != pcmk_ok) {
if (pcmk_is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_notice("Fencer connection failed (will retry): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencer connection failed (ignoring because no longer required): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
return TRUE;
}
}
if (rc == pcmk_ok) {
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_DISCONNECT,
tengine_stonith_connection_destroy);
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_FENCE,
handle_fence_notification);
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_HISTORY_SYNCED,
tengine_stonith_history_synced);
te_trigger_stonith_history_sync(TRUE);
crm_notice("Fencer successfully connected");
}
return TRUE;
}
/*!
\internal
\brief Schedule fencer connection attempt in main loop
*/
void
controld_trigger_fencer_connect()
{
if (stonith_reconnect == NULL) {
stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
te_connect_stonith,
GINT_TO_POINTER(TRUE));
}
controld_set_fsa_input_flags(R_ST_REQUIRED);
mainloop_set_trigger(stonith_reconnect);
}
void
controld_disconnect_fencer(bool destroy)
{
if (stonith_api) {
// Prevent fencer connection from coming up again
controld_clear_fsa_input_flags(R_ST_REQUIRED);
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(stonith_api);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (destroy) {
if (stonith_api) {
stonith_api->cmds->free(stonith_api);
stonith_api = NULL;
}
if (stonith_reconnect) {
mainloop_destroy_trigger(stonith_reconnect);
stonith_reconnect = NULL;
}
if (te_client_id) {
free(te_client_id);
te_client_id = NULL;
}
}
}
static gboolean
do_stonith_history_sync(gpointer user_data)
{
if (stonith_api && (stonith_api->state != stonith_disconnected)) {
stonith_history_t *history = NULL;
te_cleanup_stonith_history_sync(stonith_api, FALSE);
stonith_api->cmds->history(stonith_api,
st_opt_sync_call | st_opt_broadcast,
NULL, &history, 5);
stonith_history_free(history);
return TRUE;
} else {
crm_info("Skip triggering stonith history-sync as stonith is disconnected");
return FALSE;
}
}
static void
tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
{
char *uuid = NULL;
int stonith_id = -1;
int transition_id = -1;
crm_action_t *action = NULL;
const char *target = NULL;
if ((data == NULL) || (data->userdata == NULL)) {
crm_err("Ignoring fence operation %d result: "
"No transition key given (bug?)",
((data == NULL)? -1 : data->call_id));
return;
}
if (!AM_I_DC) {
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
reason = pcmk_exec_status_str(stonith__execution_status(data));
}
crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
data->call_id, stonith__exit_status(data), reason,
(const char *) data->userdata);
return;
}
CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
&stonith_id, NULL),
goto bail);
if (transition_graph->complete || (stonith_id < 0)
|| !pcmk__str_eq(uuid, te_uuid, pcmk__str_none)
|| (transition_graph->id != transition_id)) {
crm_info("Ignoring fence operation %d result: "
"Not from current transition " CRM_XS
" complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
data->call_id, pcmk__btoa(transition_graph->complete),
stonith_id, uuid, te_uuid, transition_id, transition_graph->id);
goto bail;
}
action = controld_get_action(stonith_id);
if (action == NULL) {
crm_err("Ignoring fence operation %d result: "
"Action %d not found in transition graph (bug?) "
CRM_XS " uuid=%s transition=%d",
data->call_id, stonith_id, uuid, transition_id);
goto bail;
}
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if (target == NULL) {
crm_err("Ignoring fence operation %d result: No target given (bug?)",
data->call_id);
goto bail;
}
stop_te_timer(action->timer);
if (stonith__exit_status(data) == CRM_EX_OK) {
const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
const char *op = crm_meta_value(action->params, "stonith_action");
crm_info("Fence operation %d for %s succeeded", data->call_id, target);
if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
te_action_confirmed(action, NULL);
if (pcmk__str_eq("on", op, pcmk__str_casei)) {
const char *value = NULL;
char *now = pcmk__ttoa(time(NULL));
gboolean is_remote_node = FALSE;
/* This check is not 100% reliable, since this node is not
* guaranteed to have the remote node cached. However, it
* doesn't have to be reliable, since the attribute manager can
* learn a node's "remoteness" by other means sooner or later.
* This allows it to learn more quickly if this node does have
* the information.
*/
if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
is_remote_node = TRUE;
}
update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
is_remote_node);
free(now);
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
is_remote_node);
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
is_remote_node);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
send_stonith_update(action, target, uuid);
crm__set_graph_action_flags(action, pcmk__graph_action_sent_update);
}
}
st_fail_count_reset(target);
} else {
enum transition_action abort_action = tg_restart;
int status = stonith__execution_status(data);
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
if (status == PCMK_EXEC_DONE) {
reason = "Agent returned error";
} else {
reason = pcmk_exec_status_str(status);
}
}
crm__set_graph_action_flags(action, pcmk__graph_action_failed);
/* If no fence devices were available, there's no use in immediately
* checking again, so don't start a new transition in that case.
*/
if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
crm_warn("Fence operation %d for %s failed: %s "
"(aborting transition and giving up for now)",
data->call_id, target, reason);
abort_action = tg_stop;
} else {
crm_notice("Fence operation %d for %s failed: %s "
"(aborting transition)", data->call_id, target, reason);
}
/* Increment the fail count now, so abort_for_stonith_failure() can
* check it. Non-DC nodes will increment it in
* handle_fence_notification().
*/
st_fail_count_increment(target);
abort_for_stonith_failure(abort_action, target, NULL);
}
pcmk__update_graph(transition_graph, action);
trigger_graph();
bail:
free(data->userdata);
free(uuid);
return;
}
static int
fence_with_delay(const char *target, const char *type, const char *delay)
{
uint32_t options = st_opt_none; // Group of enum stonith_call_options
int timeout_sec = (int) (transition_graph->stonith_timeout / 1000);
int delay_i;
if (crmd_join_phase_count(crm_join_confirmed) == 1) {
stonith__set_call_options(options, target, st_opt_allow_suicide);
}
pcmk__scan_min_int(delay, &delay_i, 0);
return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
type, timeout_sec, 0, delay_i);
}
gboolean
te_fence_node(crm_graph_t *graph, crm_action_t *action)
{
int rc = 0;
const char *id = NULL;
const char *uuid = NULL;
const char *target = NULL;
const char *type = NULL;
char *transition_key = NULL;
const char *priority_delay = NULL;
gboolean invalid_action = FALSE;
id = ID(action->xml);
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
type = crm_meta_value(action->params, "stonith_action");
CRM_CHECK(id != NULL, invalid_action = TRUE);
CRM_CHECK(uuid != NULL, invalid_action = TRUE);
CRM_CHECK(type != NULL, invalid_action = TRUE);
CRM_CHECK(target != NULL, invalid_action = TRUE);
if (invalid_action) {
crm_log_xml_warn(action->xml, "BadAction");
return FALSE;
}
priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
crm_notice("Requesting fencing (%s) of node %s "
CRM_XS " action=%s timeout=%u%s%s",
type, target, id, transition_graph->stonith_timeout,
priority_delay ? " priority_delay=" : "",
priority_delay ? priority_delay : "");
/* Passing NULL means block until we can connect... */
te_connect_stonith(NULL);
rc = fence_with_delay(target, type, priority_delay);
transition_key = pcmk__transition_key(transition_graph->id, action->id, 0,
te_uuid),
stonith_api->cmds->register_callback(stonith_api, rc,
(int) (transition_graph->stonith_timeout / 1000),
st_opt_timeout_updates, transition_key,
"tengine_stonith_callback", tengine_stonith_callback);
return TRUE;
}
bool
controld_verify_stonith_watchdog_timeout(const char *value)
{
gboolean rv = TRUE;
if (stonith_api && (stonith_api->state != stonith_disconnected) &&
stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
fsa_our_uname)) {
rv = pcmk__valid_sbd_timeout(value);
}
return rv;
}
/* end stonith API client functions */
/*
* stonith history synchronization
*
* Each node's fencer keeps track of a cluster-wide fencing history. When a node
* joins or leaves, we need to synchronize the history across all nodes.
*/
static crm_trigger_t *stonith_history_sync_trigger = NULL;
static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
void
te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
{
if (free_timers) {
mainloop_timer_del(stonith_history_sync_timer_short);
stonith_history_sync_timer_short = NULL;
mainloop_timer_del(stonith_history_sync_timer_long);
stonith_history_sync_timer_long = NULL;
} else {
mainloop_timer_stop(stonith_history_sync_timer_short);
mainloop_timer_stop(stonith_history_sync_timer_long);
}
if (st) {
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
}
}
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
{
te_cleanup_stonith_history_sync(st, FALSE);
crm_debug("Fence-history synced - cancel all timers");
}
static gboolean
stonith_history_sync_set_trigger(gpointer user_data)
{
mainloop_set_trigger(stonith_history_sync_trigger);
return FALSE;
}
void
te_trigger_stonith_history_sync(bool long_timeout)
{
/* trigger a sync in 5s to give more nodes the
* chance to show up so that we don't create
* unnecessary stonith-history-sync traffic
*
* the long timeout of 30s is there as a fallback
* so that after a successful connection to fenced
* we will wait for 30s for the DC to trigger a
* history-sync
* if this doesn't happen we trigger a sync locally
* (e.g. fenced segfaults and is restarted by pacemakerd)
*/
/* as we are finally checking the stonith-connection
* in do_stonith_history_sync we should be fine
* leaving stonith_history_sync_time & stonith_history_sync_trigger
* around
*/
if (stonith_history_sync_trigger == NULL) {
stonith_history_sync_trigger =
mainloop_add_trigger(G_PRIORITY_LOW,
do_stonith_history_sync, NULL);
}
if (long_timeout) {
if(stonith_history_sync_timer_long == NULL) {
stonith_history_sync_timer_long =
mainloop_timer_add("history_sync_long", 30000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
mainloop_timer_start(stonith_history_sync_timer_long);
} else {
if(stonith_history_sync_timer_short == NULL) {
stonith_history_sync_timer_short =
mainloop_timer_add("history_sync_short", 5000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
mainloop_timer_start(stonith_history_sync_timer_short);
}
}
/* end stonith history synchronization functions */
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
index d854cbeab2..f5445bdcdb 100644
--- a/daemons/controld/controld_te_actions.c
+++ b/daemons/controld/controld_te_actions.c
@@ -1,688 +1,688 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event()
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-internal.h>
#include <pacemaker-controld.h>
char *te_uuid = NULL;
GHashTable *te_targets = NULL;
void send_rsc_command(crm_action_t * action);
static void te_update_job_count(crm_action_t * action, int offset);
static void
te_start_action_timer(crm_graph_t * graph, crm_action_t * action)
{
action->timer = calloc(1, sizeof(crm_action_timer_t));
action->timer->timeout = action->timeout;
action->timer->action = action;
action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay,
action_timer_callback, (void *)action->timer);
CRM_ASSERT(action->timer->source_id != 0);
}
static gboolean
te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
{
const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
/* send to peers as well? */
if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) {
GHashTableIter iter;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *cmd = NULL;
if (pcmk__str_eq(fsa_our_uname, node->uname, pcmk__str_casei)) {
continue;
}
cmd = create_request(task, pseudo->xml, node->uname,
CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
free_xml(cmd);
}
remote_ra_process_maintenance_nodes(pseudo->xml);
} else {
/* Check action for Pacemaker Remote node side effects */
remote_ra_process_pseudo(pseudo->xml);
}
crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
te_action_confirmed(pseudo, graph);
return TRUE;
}
static int
get_target_rc(crm_action_t * action)
{
int exit_status;
pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC),
&exit_status, 0);
return exit_status;
}
static gboolean
te_crm_command(crm_graph_t * graph, crm_action_t * action)
{
char *counter = NULL;
xmlNode *cmd = NULL;
gboolean is_local = FALSE;
const char *id = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
id = ID(action->xml);
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) {
router_node = fsa_our_uname;
}
}
}
if (pcmk__str_empty(on_node)) {
crm_err("Corrupted command (id=%s) %s: no node",
pcmk__s(id, "<null>"), pcmk__s(task, "without task"));
return FALSE;
}
if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_info("Executing crm-event (%s)%s%s: %s on %s",
pcmk__s(id, "<null>"), (is_local? " locally" : ""),
(no_wait? " without waiting" : ""),
pcmk__s(task, "unspecified task"), on_node);
if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
/* defer until everything else completes */
crm_info("crm-event (%s) is a local shutdown", pcmk__s(id, "<null>"));
graph->completion_action = tg_shutdown;
graph->abort_reason = "local shutdown";
te_action_confirmed(action, graph);
return TRUE;
} else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
crm_node_t *peer = crm_get_peer(0, router_node);
pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
}
cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
counter = pcmk__transition_key(transition_graph->id, action->id,
get_target_rc(action), te_uuid);
crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
free(counter);
free_xml(cmd);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return FALSE;
} else if (no_wait) {
te_action_confirmed(action, graph);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_start_action_timer(graph, action);
}
return TRUE;
}
/*!
* \internal
* \brief Synthesize an executor event for a resource action timeout
*
* \param[in] action Resource action that timed out
* \param[in] target_rc Expected result of action that timed out
*
* Synthesize an executor event for a resource action timeout. (If the executor
* gets a timeout while waiting for a resource action to complete, that will be
* reported via the usual callback. This timeout means we didn't hear from the
* executor itself or the controller that relayed the action to the executor.)
*
* \return Newly created executor event for result of \p action
* \note The caller is responsible for freeing the return value using
* lrmd_free_event().
*/
static lrmd_event_data_t *
synthesize_timeout_event(crm_action_t *action, int target_rc)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *reason = NULL;
char *dynamic_reason = NULL;
if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
reason = "Local executor did not return result in time";
} else {
const char *router_node = NULL;
router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (router_node == NULL) {
router_node = target;
}
dynamic_reason = crm_strdup_printf("Controller on %s did not return "
"result in time", router_node);
reason = dynamic_reason;
}
op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
PCMK_OCF_UNKNOWN_ERROR, reason);
op->call_id = -1;
op->user_data = pcmk__transition_key(transition_graph->id, action->id,
target_rc, te_uuid);
free(dynamic_reason);
return op;
}
static void
controld_record_action_event(crm_action_t *action, lrmd_event_data_t *op)
{
xmlNode *state = NULL;
xmlNode *rsc = NULL;
xmlNode *action_rsc = NULL;
int rc = pcmk_ok;
const char *rsc_id = NULL;
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
int call_options = cib_quorum_override | cib_scope_local;
int target_rc = get_target_rc(action);
action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
if (action_rsc == NULL) {
return;
}
rsc_id = ID(action_rsc);
CRM_CHECK(rsc_id != NULL,
crm_log_xml_err(action->xml, "Bad:action"); return);
/*
update the CIB
<node_state id="hadev">
<lrm>
<lrm_resources>
<lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
*/
state = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(state, XML_ATTR_UUID, target_uuid);
crm_xml_add(state, XML_ATTR_UNAME, target);
rsc = create_xml_node(state, XML_CIB_TAG_LRM);
crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE);
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
__func__);
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
free_xml(state);
crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
rc, action->id, task_uuid, target);
crm__set_graph_action_flags(action, pcmk__graph_action_sent_update);
}
void
controld_record_action_timeout(crm_action_t *action)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
int target_rc = get_target_rc(action);
crm_warn("%s %d: %s on %s timed out",
crm_element_name(action->xml), action->id, task_uuid, target);
op = synthesize_timeout_event(action, target_rc);
controld_record_action_event(action, op);
lrmd_free_event(op);
}
static gboolean
te_rsc_command(crm_graph_t * graph, crm_action_t * action)
{
/* never overwrite stop actions in the CIB with
* anything other than completed results
*
* Writing pending stops makes it look like the
* resource is running again
*/
xmlNode *cmd = NULL;
xmlNode *rsc_op = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
gboolean is_local = FALSE;
char *counter = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
const char *task_uuid = NULL;
CRM_ASSERT(action != NULL);
CRM_ASSERT(action->xml != NULL);
crm__clear_graph_action_flags(action, pcmk__graph_action_executed);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if (pcmk__str_empty(on_node)) {
crm_err("Corrupted command(id=%s) %s: no node",
ID(action->xml), pcmk__s(task, "without task"));
return FALSE;
}
rsc_op = action->xml;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
}
counter = pcmk__transition_key(transition_graph->id, action->id,
get_target_rc(action), te_uuid);
crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
task, task_uuid, (is_local? " locally" : ""), on_node,
(no_wait? " without waiting" : ""), action->id);
cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
if (is_local) {
/* shortcut local resource commands */
ha_msg_input_t data = {
.msg = cmd,
.xml = rsc_op,
};
fsa_data_t msg = {
.id = 0,
.data = &data,
.data_type = fsa_dt_ha_msg,
.fsa_input = I_NULL,
.fsa_cause = C_FSA_INTERNAL,
.actions = A_LRM_INVOKE,
.origin = __func__,
};
do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);
} else {
rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
}
free(counter);
free_xml(cmd);
crm__set_graph_action_flags(action, pcmk__graph_action_executed);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return FALSE;
} else if (no_wait) {
crm_info("Action %d confirmed - no wait", action->id);
crm__set_graph_action_flags(action, pcmk__graph_action_confirmed); /* Just mark confirmed.
* Don't bump the job count only to immediately decrement it
*/
pcmk__update_graph(transition_graph, action);
trigger_graph();
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
action->id, task, task_uuid, on_node, action->timeout);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_update_job_count(action, 1);
te_start_action_timer(graph, action);
}
return TRUE;
}
struct te_peer_s
{
char *name;
int jobs;
int migrate_jobs;
};
static void te_peer_free(gpointer p)
{
struct te_peer_s *peer = p;
free(peer->name);
free(peer);
}
void te_reset_job_counts(void)
{
GHashTableIter iter;
struct te_peer_s *peer = NULL;
if(te_targets == NULL) {
te_targets = pcmk__strkey_table(NULL, te_peer_free);
}
g_hash_table_iter_init(&iter, te_targets);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
peer->jobs = 0;
peer->migrate_jobs = 0;
}
}
static void
te_update_job_count_on(const char *target, int offset, bool migrate)
{
struct te_peer_s *r = NULL;
if(target == NULL || te_targets == NULL) {
return;
}
r = g_hash_table_lookup(te_targets, target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
r->jobs += offset;
if(migrate) {
r->migrate_jobs += offset;
}
crm_trace("jobs[%s] = %d", target, r->jobs);
}
static void
te_update_job_count(crm_action_t * action, int offset)
{
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
- if (action->type != action_type_rsc || target == NULL) {
+ if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
/* No limit on these */
return;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
CRMD_ACTION_MIGRATED, NULL)) {
const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
te_update_job_count_on(t1, offset, TRUE);
te_update_job_count_on(t2, offset, TRUE);
return;
} else if (target == NULL) {
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
}
te_update_job_count_on(target, offset, FALSE);
}
static gboolean
te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const char *target)
{
int limit = 0;
struct te_peer_s *r = NULL;
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
if(target == NULL) {
/* No limit on these */
return TRUE;
} else if(te_targets == NULL) {
return FALSE;
}
r = g_hash_table_lookup(te_targets, target);
limit = throttle_get_job_limit(target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
if(limit <= r->jobs) {
crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
target, limit, r->jobs, id);
return FALSE;
} else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
target, graph->migration_limit, r->migrate_jobs, id);
return FALSE;
}
}
crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
return TRUE;
}
static gboolean
te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
{
const char *target = NULL;
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
- if (action->type != action_type_rsc) {
+ if (action->type != pcmk__rsc_graph_action) {
/* No limit on these */
return TRUE;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
CRMD_ACTION_MIGRATED, NULL)) {
target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
if(te_should_perform_action_on(graph, action, target) == FALSE) {
return FALSE;
}
target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
} else if (target == NULL) {
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
}
return te_should_perform_action_on(graph, action, target);
}
/*!
* \brief Confirm a graph action (and optionally update graph)
*
* \param[in] action Action to confirm
* \param[in] graph Update and trigger this graph (if non-NULL)
*/
void
te_action_confirmed(crm_action_t *action, crm_graph_t *graph)
{
if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
- if ((action->type == action_type_rsc)
+ if ((action->type == pcmk__rsc_graph_action)
&& (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) {
te_update_job_count(action, -1);
}
crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
}
if (graph) {
pcmk__update_graph(graph, action);
trigger_graph();
}
}
crm_graph_functions_t te_graph_fns = {
te_pseudo_action,
te_rsc_command,
te_crm_command,
te_fence_node,
te_should_perform_action,
};
void
notify_crmd(crm_graph_t * graph)
{
const char *type = "unknown";
enum crmd_fsa_input event = I_NULL;
crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state));
CRM_CHECK(graph->complete, graph->complete = TRUE);
switch (graph->completion_action) {
case tg_stop:
type = "stop";
if (fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case tg_done:
type = "done";
if (fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case tg_restart:
type = "restart";
if (fsa_state == S_TRANSITION_ENGINE) {
if (transition_timer->period_ms > 0) {
controld_stop_timer(transition_timer);
controld_start_timer(transition_timer);
} else {
event = I_PE_CALC;
}
} else if (fsa_state == S_POLICY_ENGINE) {
controld_set_fsa_action_flags(A_PE_INVOKE);
trigger_fsa();
}
break;
case tg_shutdown:
type = "shutdown";
if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
event = I_STOP;
} else {
crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
event = I_TERMINATE;
}
}
crm_debug("Transition %d status: %s - %s", graph->id, type,
pcmk__s(graph->abort_reason, "unspecified reason"));
graph->abort_reason = NULL;
graph->completion_action = tg_done;
controld_clear_fsa_input_flags(R_IN_TRANSITION);
if (event != I_NULL) {
register_fsa_input(C_FSA_INTERNAL, event, NULL);
} else if (fsa_source) {
mainloop_set_trigger(fsa_source);
}
}
diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c
index fcf2dfd8bd..ad450d83df 100644
--- a/daemons/controld/controld_te_callbacks.c
+++ b/daemons/controld/controld_te_callbacks.c
@@ -1,677 +1,677 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/msg_xml.h>
#include <crm/cluster.h> /* For ONLINESTATUS etc */
#include <pacemaker-controld.h>
void te_update_confirm(const char *event, xmlNode * msg);
extern char *te_uuid;
gboolean shuttingdown = FALSE;
crm_graph_t *transition_graph;
crm_trigger_t *transition_trigger = NULL;
/* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */
#define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']"
// An explicit shutdown-lock of 0 means the lock has been cleared
static bool
shutdown_lock_cleared(xmlNode *lrm_resource)
{
time_t shutdown_lock = 0;
return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
&shutdown_lock) == pcmk_ok)
&& (shutdown_lock == 0);
}
static void
te_update_diff_v1(const char *event, xmlNode *diff)
{
int lpc, max;
xmlXPathObject *xpathObj = NULL;
CRM_CHECK(diff != NULL, return);
xml_log_patchset(LOG_TRACE, __func__, diff);
if (cib_config_changed(NULL, NULL, &diff)) {
abort_transition(INFINITY, tg_restart, "Non-status change", diff);
goto bail; /* configuration changed */
}
/* Tickets Attributes - Added/Updated */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS);
if (numXpathResults(xpathObj) > 0) {
xmlNode *aborted = getXpathResult(xpathObj, 0);
abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted);
goto bail;
}
freeXpathObject(xpathObj);
/* Tickets Attributes - Removed */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS);
if (numXpathResults(xpathObj) > 0) {
xmlNode *aborted = getXpathResult(xpathObj, 0);
abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted);
goto bail;
}
freeXpathObject(xpathObj);
/* Transient Attributes - Removed */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//"
XML_TAG_TRANSIENT_NODEATTRS);
if (numXpathResults(xpathObj) > 0) {
xmlNode *aborted = getXpathResult(xpathObj, 0);
abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted);
goto bail;
}
freeXpathObject(xpathObj);
// Check for lrm_resource entries
xpathObj = xpath_search(diff,
"//" F_CIB_UPDATE_RESULT
"//" XML_TAG_DIFF_ADDED
"//" XML_LRM_TAG_RESOURCE);
max = numXpathResults(xpathObj);
/*
* Updates by, or in response to, graph actions will never affect more than
* one resource at a time, so such updates indicate an LRM refresh. In that
* case, start a new transition rather than check each result individually,
* which can result in _huge_ speedups in large clusters.
*
* Unfortunately, we can only do so when there are no pending actions.
* Otherwise, we could mistakenly throw away those results here, and
* the cluster will stall waiting for them and time out the operation.
*/
if ((transition_graph->pending == 0) && (max > 1)) {
crm_debug("Ignoring resource operation updates due to history refresh of %d resources",
max);
crm_log_xml_trace(diff, "lrm-refresh");
abort_transition(INFINITY, tg_restart, "History refresh", NULL);
goto bail;
}
if (max == 1) {
xmlNode *lrm_resource = getXpathResult(xpathObj, 0);
if (shutdown_lock_cleared(lrm_resource)) {
// @TODO would be more efficient to abort once after transition done
abort_transition(INFINITY, tg_restart, "Shutdown lock cleared",
lrm_resource);
// Still process results, so we stop timers and update failcounts
}
}
freeXpathObject(xpathObj);
/* Process operation updates */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP);
max = numXpathResults(xpathObj);
if (max > 0) {
int lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
const char *node = get_node_id(rsc_op);
process_graph_event(rsc_op, node);
}
}
freeXpathObject(xpathObj);
/* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */
xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP);
max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
int path_max = 0;
const char *op_id = NULL;
char *rsc_op_xpath = NULL;
xmlXPathObject *op_match = NULL;
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match == NULL) { continue; };
op_id = ID(match);
path_max = strlen(RSC_OP_TEMPLATE) + strlen(op_id) + 1;
rsc_op_xpath = calloc(1, path_max);
snprintf(rsc_op_xpath, path_max, RSC_OP_TEMPLATE, op_id);
op_match = xpath_search(diff, rsc_op_xpath);
if (numXpathResults(op_match) == 0) {
/* Prevent false positives by matching cancelations too */
const char *node = get_node_id(match);
crm_action_t *cancelled = get_cancel_action(op_id, node);
if (cancelled == NULL) {
crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id,
node);
abort_transition(INFINITY, tg_restart, "Resource op removal", match);
freeXpathObject(op_match);
free(rsc_op_xpath);
goto bail;
} else {
crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d",
op_id, node, cancelled->id);
}
}
freeXpathObject(op_match);
free(rsc_op_xpath);
}
bail:
freeXpathObject(xpathObj);
}
static void
process_lrm_resource_diff(xmlNode *lrm_resource, const char *node)
{
for (xmlNode *rsc_op = pcmk__xml_first_child(lrm_resource); rsc_op != NULL;
rsc_op = pcmk__xml_next(rsc_op)) {
process_graph_event(rsc_op, node);
}
if (shutdown_lock_cleared(lrm_resource)) {
// @TODO would be more efficient to abort once after transition done
abort_transition(INFINITY, tg_restart, "Shutdown lock cleared",
lrm_resource);
}
}
static void
process_resource_updates(const char *node, xmlNode *xml, xmlNode *change,
const char *op, const char *xpath)
{
xmlNode *rsc = NULL;
if (xml == NULL) {
return;
}
if (strcmp(TYPE(xml), XML_CIB_TAG_LRM) == 0) {
xml = first_named_child(xml, XML_LRM_TAG_RESOURCES);
CRM_CHECK(xml != NULL, return);
}
CRM_CHECK(strcmp(TYPE(xml), XML_LRM_TAG_RESOURCES) == 0, return);
/*
* Updates by, or in response to, TE actions will never contain updates
* for more than one resource at a time, so such updates indicate an
* LRM refresh.
*
* In that case, start a new transition rather than check each result
* individually, which can result in _huge_ speedups in large clusters.
*
* Unfortunately, we can only do so when there are no pending actions.
* Otherwise, we could mistakenly throw away those results here, and
* the cluster will stall waiting for them and time out the operation.
*/
if ((transition_graph->pending == 0)
&& xml->children && xml->children->next) {
crm_log_xml_trace(change, "lrm-refresh");
abort_transition(INFINITY, tg_restart, "History refresh", NULL);
return;
}
for (rsc = pcmk__xml_first_child(xml); rsc != NULL;
rsc = pcmk__xml_next(rsc)) {
crm_trace("Processing %s", ID(rsc));
process_lrm_resource_diff(rsc, node);
}
}
static char *extract_node_uuid(const char *xpath)
{
char *mutable_path = strdup(xpath);
char *node_uuid = NULL;
char *search = NULL;
char *match = NULL;
match = strstr(mutable_path, "node_state[@id=\'");
if (match == NULL) {
free(mutable_path);
return NULL;
}
match += strlen("node_state[@id=\'");
search = strchr(match, '\'');
if (search == NULL) {
free(mutable_path);
return NULL;
}
search[0] = 0;
node_uuid = strdup(match);
free(mutable_path);
return node_uuid;
}
static void
abort_unless_down(const char *xpath, const char *op, xmlNode *change,
const char *reason)
{
char *node_uuid = NULL;
crm_action_t *down = NULL;
if(!pcmk__str_eq(op, "delete", pcmk__str_casei)) {
abort_transition(INFINITY, tg_restart, reason, change);
return;
}
node_uuid = extract_node_uuid(xpath);
if(node_uuid == NULL) {
crm_err("Could not extract node ID from %s", xpath);
abort_transition(INFINITY, tg_restart, reason, change);
return;
}
down = match_down_event(node_uuid);
if (down == NULL) {
crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath);
abort_transition(INFINITY, tg_restart, reason, change);
} else {
crm_trace("Expecting changes to %s (%s)", node_uuid, xpath);
}
free(node_uuid);
}
static void
process_op_deletion(const char *xpath, xmlNode *change)
{
char *mutable_key = strdup(xpath);
char *key;
char *node_uuid;
// Extract the part of xpath between last pair of single quotes
key = strrchr(mutable_key, '\'');
if (key != NULL) {
*key = '\0';
key = strrchr(mutable_key, '\'');
}
if (key == NULL) {
crm_warn("Ignoring malformed CIB update (resource deletion of %s)",
xpath);
free(mutable_key);
return;
}
++key;
node_uuid = extract_node_uuid(xpath);
if (confirm_cancel_action(key, node_uuid) == FALSE) {
abort_transition(INFINITY, tg_restart, "Resource operation removal",
change);
}
free(mutable_key);
free(node_uuid);
}
static void
process_delete_diff(const char *xpath, const char *op, xmlNode *change)
{
if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) {
process_op_deletion(xpath, change);
} else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) {
abort_unless_down(xpath, op, change, "Resource state removal");
} else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) {
abort_unless_down(xpath, op, change, "Node state removal");
} else {
crm_trace("Ignoring delete of %s", xpath);
}
}
static void
process_node_state_diff(xmlNode *state, xmlNode *change, const char *op,
const char *xpath)
{
xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);
process_resource_updates(ID(state), lrm, change, op, xpath);
}
static void
process_status_diff(xmlNode *status, xmlNode *change, const char *op,
const char *xpath)
{
for (xmlNode *state = pcmk__xml_first_child(status); state != NULL;
state = pcmk__xml_next(state)) {
process_node_state_diff(state, change, op, xpath);
}
}
static void
process_cib_diff(xmlNode *cib, xmlNode *change, const char *op,
const char *xpath)
{
xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS);
xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION);
if (status) {
process_status_diff(status, change, op, xpath);
}
if (config) {
abort_transition(INFINITY, tg_restart,
"Non-status-only change", change);
}
}
static void
te_update_diff_v2(xmlNode *diff)
{
crm_log_xml_trace(diff, "Patch:Raw");
for (xmlNode *change = pcmk__xml_first_child(diff); change != NULL;
change = pcmk__xml_next(change)) {
xmlNode *match = NULL;
const char *name = NULL;
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
// Possible ops: create, modify, delete, move
const char *op = crm_element_value(change, XML_DIFF_OP);
// Ignore uninteresting updates
if (op == NULL) {
continue;
} else if (xpath == NULL) {
crm_trace("Ignoring %s change for version field", op);
continue;
} else if (strcmp(op, "move") == 0) {
crm_trace("Ignoring move change at %s", xpath);
continue;
}
// Find the result of create/modify ops
if (strcmp(op, "create") == 0) {
match = change->children;
} else if (strcmp(op, "modify") == 0) {
match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
match = match->children;
}
} else if (strcmp(op, "delete") != 0) {
crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)",
op, xpath);
continue;
}
if (match) {
if (match->type == XML_COMMENT_NODE) {
crm_trace("Ignoring %s operation for comment at %s", op, xpath);
continue;
}
name = (const char *)match->name;
}
crm_trace("Handling %s operation for %s%s%s",
op, (xpath? xpath : "CIB"),
(name? " matched by " : ""), (name? name : ""));
if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) {
abort_transition(INFINITY, tg_restart, "Configuration change",
change);
break; // Won't be packaged with operation results we may be waiting for
} else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS)
|| pcmk__str_eq(name, XML_CIB_TAG_TICKETS, pcmk__str_casei)) {
abort_transition(INFINITY, tg_restart, "Ticket attribute change", change);
break; // Won't be packaged with operation results we may be waiting for
} else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[")
|| pcmk__str_eq(name, XML_TAG_TRANSIENT_NODEATTRS, pcmk__str_casei)) {
abort_unless_down(xpath, op, change, "Transient attribute change");
break; // Won't be packaged with operation results we may be waiting for
} else if (strcmp(op, "delete") == 0) {
process_delete_diff(xpath, op, change);
} else if (name == NULL) {
crm_warn("Ignoring malformed CIB update (%s at %s has no result)",
op, xpath);
} else if (strcmp(name, XML_TAG_CIB) == 0) {
process_cib_diff(match, change, op, xpath);
} else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) {
process_status_diff(match, change, op, xpath);
} else if (strcmp(name, XML_CIB_TAG_STATE) == 0) {
process_node_state_diff(match, change, op, xpath);
} else if (strcmp(name, XML_CIB_TAG_LRM) == 0) {
process_resource_updates(ID(match), match, change, op, xpath);
} else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, "lrm");
process_resource_updates(local_node, match, change, op, xpath);
free(local_node);
} else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, "lrm");
process_lrm_resource_diff(match, local_node);
free(local_node);
} else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, "lrm");
process_graph_event(match, local_node);
free(local_node);
} else {
crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)",
op, xpath, name);
}
}
}
void
te_update_diff(const char *event, xmlNode * msg)
{
xmlNode *diff = NULL;
const char *op = NULL;
int rc = -EINVAL;
int format = 1;
int p_add[] = { 0, 0, 0 };
int p_del[] = { 0, 0, 0 };
CRM_CHECK(msg != NULL, return);
crm_element_value_int(msg, F_CIB_RC, &rc);
if (transition_graph == NULL) {
crm_trace("No graph");
return;
} else if (rc < pcmk_ok) {
crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
return;
} else if (transition_graph->complete
&& fsa_state != S_IDLE
&& fsa_state != S_TRANSITION_ENGINE
&& fsa_state != S_POLICY_ENGINE) {
crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state),
transition_graph->complete);
return;
}
op = crm_element_value(msg, F_CIB_OPERATION);
diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
xml_patch_versions(diff, p_add, p_del);
crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op,
p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2],
fsa_state2string(fsa_state));
crm_element_value_int(diff, "format", &format);
switch (format) {
case 1:
te_update_diff_v1(event, diff);
break;
case 2:
te_update_diff_v2(diff);
break;
default:
crm_warn("Ignoring malformed CIB update (unknown patch format %d)",
format);
}
}
gboolean
process_te_message(xmlNode * msg, xmlNode * xml_data)
{
const char *from = crm_element_value(msg, F_ORIG);
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
const char *ref = crm_element_value(msg, F_CRM_REFERENCE);
const char *op = crm_element_value(msg, F_CRM_TASK);
const char *type = crm_element_value(msg, F_CRM_MSG_TYPE);
crm_trace("Processing %s (%s) message", op, ref);
crm_log_xml_trace(msg, "ipc");
if (op == NULL) {
/* error */
} else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) {
crm_trace("Bad sys-to: %s", pcmk__s(sys_to, "missing"));
return FALSE;
} else if (pcmk__str_eq(op, CRM_OP_INVOKE_LRM, pcmk__str_casei)
&& pcmk__str_eq(sys_from, CRM_SYSTEM_LRMD, pcmk__str_casei)
/* && pcmk__str_eq(type, XML_ATTR_RESPONSE, pcmk__str_casei) */
) {
xmlXPathObject *xpathObj = NULL;
crm_log_xml_trace(msg, "Processing (N)ACK");
crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from);
xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP);
if (numXpathResults(xpathObj)) {
int lpc = 0, max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
const char *node = get_node_id(rsc_op);
process_graph_event(rsc_op, node);
}
freeXpathObject(xpathObj);
} else {
crm_log_xml_err(msg, "Invalid (N)ACK");
freeXpathObject(xpathObj);
return FALSE;
}
} else {
crm_err("Unknown command: %s::%s from %s", type, op, sys_from);
}
crm_trace("finished processing message");
return TRUE;
}
void
cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc));
}
}
/*!
* \brief Handle a timeout in node-to-node communication
*
* \param[in] data Pointer to action timer
*
* \return FALSE (indicating that source should be not be re-added)
*/
gboolean
action_timer_callback(gpointer data)
{
crm_action_timer_t *timer = NULL;
const char *task = NULL;
const char *on_node = NULL;
const char *via_node = NULL;
CRM_CHECK(data != NULL, return FALSE);
timer = (crm_action_timer_t *) data;
stop_te_timer(timer);
CRM_CHECK(timer->action != NULL, return FALSE);
task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK);
on_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_TARGET);
via_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (transition_graph->complete) {
crm_notice("Node %s did not send %s result (via %s) within %dms "
"(ignoring because transition not in progress)",
(on_node? on_node : ""), (task? task : "unknown action"),
(via_node? via_node : "controller"), timer->timeout);
} else {
/* fail the action */
crm_err("Node %s did not send %s result (via %s) within %dms "
"(action timeout plus cluster-delay)",
(on_node? on_node : ""), (task? task : "unknown action"),
(via_node? via_node : "controller"),
timer->timeout + transition_graph->network_delay);
pcmk__log_graph_action(LOG_ERR, timer->action);
crm__set_graph_action_flags(timer->action, pcmk__graph_action_failed);
te_action_confirmed(timer->action, transition_graph);
abort_transition(INFINITY, tg_restart, "Action lost", NULL);
// Record timeout in the CIB if appropriate
- if ((timer->action->type == action_type_rsc)
+ if ((timer->action->type == pcmk__rsc_graph_action)
&& controld_action_is_recordable(task)) {
controld_record_action_timeout(timer->action);
}
}
return FALSE;
}
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
index 1fd7129922..7b049d945a 100644
--- a/daemons/controld/controld_te_events.c
+++ b/daemons/controld/controld_te_events.c
@@ -1,507 +1,508 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <pacemaker-controld.h>
char *failed_stop_offset = NULL;
char *failed_start_offset = NULL;
gboolean
fail_incompletable_actions(crm_graph_t * graph, const char *down_node)
{
const char *target_uuid = NULL;
const char *router = NULL;
const char *router_uuid = NULL;
xmlNode *last_action = NULL;
GList *gIter = NULL;
GList *gIter2 = NULL;
if (graph == NULL || graph->complete) {
return FALSE;
}
gIter = graph->synapses;
for (; gIter != NULL; gIter = gIter->next) {
synapse_t *synapse = (synapse_t *) gIter->data;
if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
/* We've already been here */
continue;
}
gIter2 = synapse->actions;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
crm_action_t *action = (crm_action_t *) gIter2->data;
- if (action->type == action_type_pseudo || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ if ((action->type == pcmk__pseudo_graph_action)
+ || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
- } else if (action->type == action_type_crm) {
+ } else if (action->type == pcmk__cluster_graph_action) {
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
continue;
}
}
target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (router) {
crm_node_t *node = crm_get_peer(0, router);
if (node) {
router_uuid = node->uuid;
}
}
if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
crm__set_graph_action_flags(action, pcmk__graph_action_failed);
pcmk__set_synapse_flags(synapse, pcmk__synapse_failed);
last_action = action->xml;
stop_te_timer(action->timer);
pcmk__update_graph(graph, action);
if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
crm_notice("Action %d (%s) was pending on %s (offline)",
action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
} else {
crm_info("Action %d (%s) is scheduled for %s (offline)",
action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
}
}
}
}
if (last_action != NULL) {
crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
abort_transition(INFINITY, tg_restart, "Node failure", last_action);
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Update failure-related node attributes if warranted
*
* \param[in] event XML describing operation that (maybe) failed
* \param[in] event_node_uuid Node that event occurred on
* \param[in] rc Actual operation return code
* \param[in] target_rc Expected operation return code
* \param[in] do_update If TRUE, do update regardless of operation type
* \param[in] ignore_failures If TRUE, update last failure but not fail count
*
* \return TRUE if this was not a direct nack, success or lrm status refresh
*/
static gboolean
update_failcount(xmlNode * event, const char *event_node_uuid, int rc,
int target_rc, gboolean do_update, gboolean ignore_failures)
{
guint interval_ms = 0;
char *task = NULL;
char *rsc_id = NULL;
const char *value = NULL;
const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
const char *on_uname = crm_peer_uname(event_node_uuid);
const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);
// Nothing needs to be done for success or status refresh
if (rc == target_rc) {
return FALSE;
} else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
id, rc, on_uname);
return FALSE;
}
/* Sanity check */
CRM_CHECK(on_uname != NULL, return TRUE);
CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
crm_err("Couldn't parse: %s", ID(event)); goto bail);
/* Decide whether update is necessary and what value to use */
if ((interval_ms > 0) || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)
|| pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) {
do_update = TRUE;
} else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) {
do_update = TRUE;
if (failed_start_offset == NULL) {
failed_start_offset = strdup(CRM_INFINITY_S);
}
value = failed_start_offset;
} else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) {
do_update = TRUE;
if (failed_stop_offset == NULL) {
failed_stop_offset = strdup(CRM_INFINITY_S);
}
value = failed_stop_offset;
}
/* Fail count will be either incremented or set to infinity */
if (!pcmk_str_is_infinity(value)) {
value = XML_NVPAIR_ATTR_VALUE "++";
}
if (do_update) {
char *now = pcmk__ttoa(time(NULL));
char *attr_name = NULL;
gboolean is_remote_node = FALSE;
if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
is_remote_node = TRUE;
}
crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
(ignore_failures? "last failure" : "failcount"),
rsc_id, on_uname, task, rc, value, now);
/* Update the fail count, if we're not ignoring failures */
if (!ignore_failures) {
attr_name = pcmk__failcount_name(rsc_id, task, interval_ms);
update_attrd(on_uname, attr_name, value, NULL, is_remote_node);
free(attr_name);
}
/* Update the last failure time (even if we're ignoring failures,
* so that failure can still be detected and shown, e.g. by crm_mon)
*/
attr_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
update_attrd(on_uname, attr_name, now, NULL, is_remote_node);
free(attr_name);
free(now);
}
bail:
free(rsc_id);
free(task);
return TRUE;
}
crm_action_t *
controld_get_action(int id)
{
for (GList *item = transition_graph->synapses; item; item = item->next) {
synapse_t *synapse = (synapse_t *) item->data;
for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
crm_action_t *action = (crm_action_t *) item2->data;
if (action->id == id) {
return action;
}
}
}
return NULL;
}
crm_action_t *
get_cancel_action(const char *id, const char *node)
{
GList *gIter = NULL;
GList *gIter2 = NULL;
gIter = transition_graph->synapses;
for (; gIter != NULL; gIter = gIter->next) {
synapse_t *synapse = (synapse_t *) gIter->data;
gIter2 = synapse->actions;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
const char *task = NULL;
const char *target = NULL;
crm_action_t *action = (crm_action_t *) gIter2->data;
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
crm_trace("Wrong key %s for %s on %s", task, id, node);
continue;
}
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
crm_trace("Wrong node %s for %s on %s", target, id, node);
continue;
}
crm_trace("Found %s on %s", id, node);
return action;
}
}
return NULL;
}
bool
confirm_cancel_action(const char *id, const char *node_id)
{
const char *op_key = NULL;
const char *node_name = NULL;
crm_action_t *cancel = get_cancel_action(id, node_id);
if (cancel == NULL) {
return FALSE;
}
op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY);
node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET);
stop_te_timer(cancel->timer);
te_action_confirmed(cancel, transition_graph);
crm_info("Cancellation of %s on %s confirmed (action %d)",
op_key, node_name, cancel->id);
return TRUE;
}
/* downed nodes are listed like: <downed> <node id="UUID1" /> ... </downed> */
#define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
"/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']"
/*!
* \brief Find a transition event that would have made a specified node down
*
* \param[in] target UUID of node to match
*
* \return Matching event if found, NULL otherwise
*/
crm_action_t *
match_down_event(const char *target)
{
crm_action_t *match = NULL;
xmlXPathObjectPtr xpath_ret = NULL;
GList *gIter, *gIter2;
char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
for (gIter = transition_graph->synapses;
gIter != NULL && match == NULL;
gIter = gIter->next) {
for (gIter2 = ((synapse_t*)gIter->data)->actions;
gIter2 != NULL && match == NULL;
gIter2 = gIter2->next) {
match = (crm_action_t*)gIter2->data;
if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
xpath_ret = xpath_search(match->xml, xpath);
if (numXpathResults(xpath_ret) < 1) {
match = NULL;
}
freeXpathObject(xpath_ret);
} else {
// Only actions that were actually started can match
match = NULL;
}
}
}
free(xpath);
if (match != NULL) {
crm_debug("Shutdown action %d (%s) found for node %s", match->id,
crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target);
} else {
crm_debug("No reason to expect node %s to be down", target);
}
return match;
}
void
process_graph_event(xmlNode *event, const char *event_node)
{
int rc = -1; // Actual result
int target_rc = -1; // Expected result
int status = -1; // Executor status
int callid = -1; // Executor call ID
int transition_num = -1; // Transition number
int action_num = -1; // Action number within transition
char *update_te_uuid = NULL;
bool ignore_failures = FALSE;
const char *id = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *uname = NULL;
CRM_ASSERT(event != NULL);
/*
<lrm_rsc_op id="rsc_east-05_last_0" operation_key="rsc_east-05_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" transition-magic="0:7;9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" call-id="17" rc-code="7" op-status="0" interval="0" last-rc-change="1355361636" exec-time="128" queue-time="0" op-digest="c81f5f40b1c9e859c992e800b1aa6972"/>
*/
magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY);
if (magic == NULL) {
/* non-change */
return;
}
crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status);
if (status == PCMK_EXEC_PENDING) {
return;
}
id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
crm_element_value_int(event, XML_LRM_ATTR_RC, &rc);
crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid);
rc = pcmk__effective_rc(rc);
if (decode_transition_key(magic, &update_te_uuid, &transition_num,
&action_num, &target_rc) == FALSE) {
// decode_transition_key() already logged the bad key
crm_err("Can't process action %s result: Incompatible versions? "
CRM_XS " call-id=%d", id, callid);
abort_transition(INFINITY, tg_restart, "Bad event", event);
return;
}
if (transition_num == -1) {
// E.g. crm_resource --fail
desc = "initiated outside of the cluster";
abort_transition(INFINITY, tg_restart, "Unexpected event", event);
} else if ((action_num < 0) || !pcmk__str_eq(update_te_uuid, te_uuid, pcmk__str_none)) {
desc = "initiated by a different DC";
abort_transition(INFINITY, tg_restart, "Foreign event", event);
} else if ((transition_graph->id != transition_num)
|| (transition_graph->complete)) {
// Action is not from currently active transition
guint interval_ms = 0;
if (parse_op_key(id, NULL, NULL, &interval_ms)
&& (interval_ms != 0)) {
/* Recurring actions have the transition number they were first
* scheduled in.
*/
if (status == PCMK_EXEC_CANCELLED) {
confirm_cancel_action(id, get_node_id(event));
goto bail;
}
desc = "arrived after initial scheduling";
abort_transition(INFINITY, tg_restart, "Change in recurring result",
event);
} else if (transition_graph->id != transition_num) {
desc = "arrived really late";
abort_transition(INFINITY, tg_restart, "Old event", event);
} else {
desc = "arrived late";
abort_transition(INFINITY, tg_restart, "Inactive graph", event);
}
} else {
// Event is result of an action from currently active transition
crm_action_t *action = controld_get_action(action_num);
if (action == NULL) {
// Should never happen
desc = "unknown";
abort_transition(INFINITY, tg_restart, "Unknown event", event);
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
/* Nothing further needs to be done if the action has already been
* confirmed. This can happen e.g. when processing both an
* "xxx_last_0" or "xxx_last_failure_0" record as well as the main
* history record, which would otherwise result in incorrectly
* bumping the fail count twice.
*/
crm_log_xml_debug(event, "Event already confirmed:");
goto bail;
} else {
/* An action result needs to be confirmed.
* (This is the only case where desc == NULL.)
*/
if (pcmk__str_eq(crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore", pcmk__str_casei)) {
ignore_failures = TRUE;
} else if (rc != target_rc) {
crm__set_graph_action_flags(action, pcmk__graph_action_failed);
}
stop_te_timer(action->timer);
te_action_confirmed(action, transition_graph);
if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) {
abort_transition(action->synapse->priority + 1, tg_restart,
"Event failed", event);
}
}
}
if (id == NULL) {
id = "unknown action";
}
uname = crm_element_value(event, XML_LRM_ATTR_TARGET);
if (uname == NULL) {
uname = "unknown node";
}
if (status == PCMK_EXEC_INVALID) {
// We couldn't attempt the action
crm_info("Transition %d action %d (%s on %s): %s",
transition_num, action_num, id, uname,
pcmk_exec_status_str(status));
} else if (desc && update_failcount(event, event_node, rc, target_rc,
(transition_num == -1), FALSE)) {
crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(target_rc),
services_ocf_exitcode_str(rc),
target_rc, rc, callid, desc);
} else if (desc) {
crm_info("Transition %d action %d (%s on %s): %s "
CRM_XS " rc=%d target-rc=%d call-id=%d",
transition_num, action_num, id, uname,
desc, rc, target_rc, callid);
} else if (rc == target_rc) {
crm_info("Transition %d action %d (%s on %s) confirmed: %s "
CRM_XS " rc=%d call-id=%d",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(rc), rc, callid);
} else {
update_failcount(event, event_node, rc, target_rc,
(transition_num == -1), ignore_failures);
crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
CRM_XS " target-rc=%d rc=%d call-id=%d",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(target_rc),
services_ocf_exitcode_str(rc),
target_rc, rc, callid);
}
bail:
free(update_te_uuid);
}
diff --git a/include/pcmki/pcmki_transition.h b/include/pcmki/pcmki_transition.h
index 69680854df..bf6464e435 100644
--- a/include/pcmki/pcmki_transition.h
+++ b/include/pcmki/pcmki_transition.h
@@ -1,175 +1,175 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__PCMKI_PCMKI_TRANSITION__H
# define PCMK__PCMKI_PCMKI_TRANSITION__H
# include <glib.h>
# include <crm/crm.h>
# include <crm/msg_xml.h>
# include <crm/common/xml.h>
#ifdef __cplusplus
extern "C" {
#endif
-typedef enum {
- action_type_pseudo,
- action_type_rsc,
- action_type_crm
-} action_type_e;
+enum pcmk__graph_action_type {
+ pcmk__pseudo_graph_action,
+ pcmk__rsc_graph_action,
+ pcmk__cluster_graph_action,
+};
typedef struct te_timer_s crm_action_timer_t;
typedef struct crm_graph_s crm_graph_t;
enum pcmk__synapse_flags {
pcmk__synapse_ready = (1 << 0),
pcmk__synapse_failed = (1 << 1),
pcmk__synapse_executed = (1 << 2),
pcmk__synapse_confirmed = (1 << 3),
};
typedef struct synapse_s {
int id;
int priority;
uint32_t flags; // Group of pcmk__synapse_flags
GList *actions; /* crm_action_t* */
GList *inputs; /* crm_action_t* */
} synapse_t;
const char *synapse_state_str(synapse_t *synapse);
#define pcmk__set_synapse_flags(synapse, flags_to_set) do { \
(synapse)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Synapse", "synapse", \
(synapse)->flags, (flags_to_set), #flags_to_set); \
} while (0)
#define pcmk__clear_synapse_flags(synapse, flags_to_clear) do { \
(synapse)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Synapse", "synapse", \
(synapse)->flags, (flags_to_clear), #flags_to_clear); \
} while (0)
enum pcmk__graph_action_flags {
pcmk__graph_action_sent_update = (1 << 0), /* sent to the CIB */
pcmk__graph_action_executed = (1 << 1), /* sent to the CRM */
pcmk__graph_action_confirmed = (1 << 2),
pcmk__graph_action_failed = (1 << 3),
pcmk__graph_action_can_fail = (1 << 4), //! \deprecated Will be removed in a future release
};
typedef struct crm_action_s {
int id;
int timeout;
guint interval_ms;
GHashTable *params;
- action_type_e type;
+ enum pcmk__graph_action_type type;
crm_action_timer_t *timer;
synapse_t *synapse;
uint32_t flags; // Group of pcmk__graph_action_flags
xmlNode *xml;
} crm_action_t;
#define crm__set_graph_action_flags(action, flags_to_set) do { \
(action)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Action", "action", \
(action)->flags, (flags_to_set), #flags_to_set); \
} while (0)
#define crm__clear_graph_action_flags(action, flags_to_clear) do { \
(action)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Action", "action", \
(action)->flags, (flags_to_clear), #flags_to_clear); \
} while (0)
struct te_timer_s {
int source_id;
int timeout;
crm_action_t *action;
};
/* order matters here */
enum transition_action {
tg_done,
tg_stop,
tg_restart,
tg_shutdown,
};
struct crm_graph_s {
int id;
char *source;
int abort_priority;
gboolean complete;
const char *abort_reason;
enum transition_action completion_action;
int num_actions;
int num_synapses;
int batch_limit;
guint network_delay;
guint stonith_timeout;
int fired;
int pending;
int skipped;
int completed;
int incomplete;
GList *synapses; /* synapse_t* */
int migration_limit;
};
typedef struct crm_graph_functions_s {
gboolean(*pseudo) (crm_graph_t * graph, crm_action_t * action);
gboolean(*rsc) (crm_graph_t * graph, crm_action_t * action);
gboolean(*crmd) (crm_graph_t * graph, crm_action_t * action);
gboolean(*stonith) (crm_graph_t * graph, crm_action_t * action);
gboolean(*allowed) (crm_graph_t * graph, crm_action_t * action);
} crm_graph_functions_t;
enum transition_status {
transition_active,
transition_pending, /* active but no actions performed this time */
transition_complete,
transition_terminated,
};
void pcmk__set_graph_functions(crm_graph_functions_t *fns);
crm_graph_t *pcmk__unpack_graph(xmlNode *xml_graph, const char *reference);
enum transition_status pcmk__execute_graph(crm_graph_t *graph);
void pcmk__update_graph(crm_graph_t *graph, crm_action_t *action);
void pcmk__free_graph(crm_graph_t *graph);
const char *pcmk__graph_status2text(enum transition_status state);
void pcmk__log_graph(unsigned int log_level, crm_graph_t *graph);
void pcmk__log_graph_action(int log_level, crm_action_t *action);
lrmd_event_data_t *pcmk__event_from_graph_action(xmlNode *resource,
crm_action_t *action,
int status, int rc,
const char *exit_reason);
#ifdef __cplusplus
}
#endif
#endif
diff --git a/lib/pacemaker/pcmk_graph_consumer.c b/lib/pacemaker/pcmk_graph_consumer.c
index 88b75b4a31..a8c1af22bb 100644
--- a/lib/pacemaker/pcmk_graph_consumer.c
+++ b/lib/pacemaker/pcmk_graph_consumer.c
@@ -1,854 +1,854 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/lrmd_internal.h>
#include <pacemaker-internal.h>
/*
* Functions for updating graph
*/
/*!
* \internal
* \brief Update synapse after completed prerequisite
*
* A synapse is ready to be executed once all its prerequisite actions (inputs)
* complete. Given a completed action, check whether it is an input for a given
* synapse, and if so, mark the input as confirmed, and mark the synapse as
* ready if appropriate.
*
* \param[in] synapse Transition graph synapse to update
* \param[in] action_id ID of an action that completed
*
* \note The only substantial effect here is confirming synapse inputs.
* should_fire_synapse() will recalculate pcmk__synapse_ready, so the only
* thing that uses the pcmk__synapse_ready from here is
* synapse_state_str().
*/
static void
update_synapse_ready(synapse_t *synapse, int action_id)
{
if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
return; // All inputs have already been confirmed
}
pcmk__set_synapse_flags(synapse, pcmk__synapse_ready); // Presume ready until proven otherwise
for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
crm_action_t *prereq = (crm_action_t *) lpc->data;
if (prereq->id == action_id) {
crm_trace("Confirming input %d of synapse %d",
action_id, synapse->id);
crm__set_graph_action_flags(prereq, pcmk__graph_action_confirmed);
} else if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) {
pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
crm_trace("Synapse %d still not ready after action %d",
synapse->id, action_id);
}
}
if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
crm_trace("Synapse %d is now ready to execute", synapse->id);
}
}
/*!
* \internal
* \brief Update action and synapse confirmation after action completion
*
* \param[in] synapse Transition graph synapse that action belongs to
* \param[in] action_id ID of action that completed
*/
static void
update_synapse_confirmed(synapse_t *synapse, int action_id)
{
bool all_confirmed = true;
for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
crm_action_t *action = (crm_action_t *) lpc->data;
if (action->id == action_id) {
crm_trace("Confirmed action %d of synapse %d",
action_id, synapse->id);
crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
} else if (all_confirmed && !(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
all_confirmed = false;
crm_trace("Synapse %d still not confirmed after action %d",
synapse->id, action_id);
}
}
if (all_confirmed && !(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) {
crm_trace("Confirmed synapse %d", synapse->id);
pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed);
}
}
/*!
* \internal
* \brief Update the transition graph with a completed action result
*
* \param[in,out] graph Transition graph to update
* \param[in] action Action that completed
*/
void
pcmk__update_graph(crm_graph_t *graph, crm_action_t *action)
{
for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
synapse_t *synapse = (synapse_t *) lpc->data;
if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
continue; // This synapse already completed
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
update_synapse_confirmed(synapse, action->id);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_failed)) || (synapse->priority == INFINITY)) {
update_synapse_ready(synapse, action->id);
}
}
}
/*
* Functions for executing graph
*/
/* A transition graph consists of various types of actions. The library caller
* registers execution functions for each action type, which will be stored
* here.
*/
static crm_graph_functions_t *graph_fns = NULL;
/*!
* \internal
* \brief Set transition graph execution functions
*
* \param[in] Execution functions to use
*/
void
pcmk__set_graph_functions(crm_graph_functions_t *fns)
{
crm_debug("Setting custom functions for executing transition graphs");
graph_fns = fns;
CRM_ASSERT(graph_fns != NULL);
CRM_ASSERT(graph_fns->rsc != NULL);
CRM_ASSERT(graph_fns->crmd != NULL);
CRM_ASSERT(graph_fns->pseudo != NULL);
CRM_ASSERT(graph_fns->stonith != NULL);
}
/*!
* \internal
* \brief Check whether a graph synapse is ready to be executed
*
* \param[in] graph Transition graph that synapse is part of
* \param[in] synapse Synapse to check
*
* \return true if synapse is ready, false otherwise
*/
static bool
should_fire_synapse(crm_graph_t *graph, synapse_t *synapse)
{
GList *lpc = NULL;
pcmk__set_synapse_flags(synapse, pcmk__synapse_ready);
for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
crm_action_t *prereq = (crm_action_t *) lpc->data;
if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) {
crm_trace("Input %d for synapse %d not yet confirmed",
prereq->id, synapse->id);
pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
break;
} else if (pcmk_is_set(prereq->flags, pcmk__graph_action_failed) && !(pcmk_is_set(prereq->flags, pcmk__graph_action_can_fail))) {
crm_trace("Input %d for synapse %d confirmed but failed",
prereq->id, synapse->id);
pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
break;
}
}
if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
crm_trace("Synapse %d is ready to execute", synapse->id);
} else {
return false;
}
for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
crm_action_t *a = (crm_action_t *) lpc->data;
- if (a->type == action_type_pseudo) {
+ if (a->type == pcmk__pseudo_graph_action) {
/* None of the below applies to pseudo ops */
} else if (synapse->priority < graph->abort_priority) {
crm_trace("Skipping synapse %d: priority %d is less than "
"abort priority %d",
synapse->id, synapse->priority, graph->abort_priority);
graph->skipped++;
return false;
} else if (graph_fns->allowed && !(graph_fns->allowed(graph, a))) {
crm_trace("Deferring synapse %d: not allowed", synapse->id);
return false;
}
}
return true;
}
/*!
* \internal
* \brief Initiate an action from a transition graph
*
* \param[in] graph Transition graph containing action
* \param[in] action Action to execute
*
* \return Standard Pacemaker return code
*/
static int
initiate_action(crm_graph_t *graph, crm_action_t *action)
{
const char *id = ID(action->xml);
CRM_CHECK(id != NULL, return EINVAL);
CRM_CHECK(!pcmk_is_set(action->flags, pcmk__graph_action_executed),
return pcmk_rc_already);
crm__set_graph_action_flags(action, pcmk__graph_action_executed);
switch (action->type) {
- case action_type_pseudo:
+ case pcmk__pseudo_graph_action:
crm_trace("Executing pseudo-action %d (%s)", action->id, id);
return graph_fns->pseudo(graph, action)? pcmk_rc_ok : pcmk_rc_error;
- case action_type_rsc:
+ case pcmk__rsc_graph_action:
crm_trace("Executing resource action %d (%s)", action->id, id);
return graph_fns->rsc(graph, action)? pcmk_rc_ok : pcmk_rc_error;
- case action_type_crm:
+ case pcmk__cluster_graph_action:
if (pcmk__str_eq(crm_element_value(action->xml, XML_LRM_ATTR_TASK),
CRM_OP_FENCE, pcmk__str_casei)) {
crm_trace("Executing fencing action %d (%s)",
action->id, id);
return graph_fns->stonith(graph, action)? pcmk_rc_ok : pcmk_rc_error;
}
crm_trace("Executing control action %d (%s)", action->id, id);
return graph_fns->crmd(graph, action)? pcmk_rc_ok : pcmk_rc_error;
default:
crm_err("Unsupported graph action type <%s id='%s'> (bug?)",
crm_element_name(action->xml), id);
return EINVAL;
}
}
/*!
* \internal
* \brief Execute a graph synapse
*
* \param[in] graph Transition graph with synapse to execute
* \param[in] synapse Synapse to execute
*
* \return Standard Pacemaker return value
*/
static int
fire_synapse(crm_graph_t *graph, synapse_t *synapse)
{
pcmk__set_synapse_flags(synapse, pcmk__synapse_executed);
for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
crm_action_t *action = (crm_action_t *) lpc->data;
if (initiate_action(graph, action) != pcmk_rc_ok) {
crm_err("Failed initiating <%s id=%d> in synapse %d",
crm_element_name(action->xml), action->id, synapse->id);
pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed);
crm__set_graph_action_flags(action, pcmk__graph_action_confirmed | pcmk__graph_action_failed);
return pcmk_rc_error;
}
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Dummy graph method that can be used with simulations
*
* \param[in] graph Transition graph containing action
* \param[in] action Action to be initiated
*
* \retval TRUE Action initiation was (simulated to be) successful
* \retval FALSE Action initiation was (simulated to be) failed (due to the
* PE_fail environment variable being set to the action ID)
*/
static gboolean
pseudo_action_dummy(crm_graph_t * graph, crm_action_t * action)
{
static int fail = -1;
if (fail < 0) {
long long fail_ll;
if ((pcmk__scan_ll(getenv("PE_fail"), &fail_ll, 0LL) == pcmk_rc_ok)
&& (fail_ll > 0LL) && (fail_ll <= INT_MAX)) {
fail = (int) fail_ll;
} else {
fail = 0;
}
}
if (action->id == fail) {
crm_err("Dummy event handler: pretending action %d failed", action->id);
crm__set_graph_action_flags(action, pcmk__graph_action_failed);
graph->abort_priority = INFINITY;
} else {
crm_trace("Dummy event handler: action %d initiated", action->id);
}
crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(graph, action);
return TRUE;
}
static crm_graph_functions_t default_fns = {
pseudo_action_dummy,
pseudo_action_dummy,
pseudo_action_dummy,
pseudo_action_dummy
};
/*!
* \internal
* \brief Execute all actions in a transition graph
*
* \param[in] graph Transition graph to execute
*
* \return Status of transition after execution
*/
enum transition_status
pcmk__execute_graph(crm_graph_t *graph)
{
GList *lpc = NULL;
int log_level = LOG_DEBUG;
enum transition_status pass_result = transition_active;
const char *status = "In progress";
if (graph_fns == NULL) {
graph_fns = &default_fns;
}
if (graph == NULL) {
return transition_complete;
}
graph->fired = 0;
graph->pending = 0;
graph->skipped = 0;
graph->completed = 0;
graph->incomplete = 0;
// Count completed and in-flight synapses
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
synapse_t *synapse = (synapse_t *) lpc->data;
if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
graph->completed++;
} else if (!(pcmk_is_set(synapse->flags, pcmk__synapse_failed)) && pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
graph->pending++;
}
}
crm_trace("Executing graph %d (%d synapses already completed, %d pending)",
graph->id, graph->completed, graph->pending);
// Execute any synapses that are ready
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
synapse_t *synapse = (synapse_t *) lpc->data;
if ((graph->batch_limit > 0)
&& (graph->pending >= graph->batch_limit)) {
crm_debug("Throttling graph execution: batch limit (%d) reached",
graph->batch_limit);
break;
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_failed)) {
graph->skipped++;
continue;
} else if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_executed)) {
continue; // Already handled
} else if (should_fire_synapse(graph, synapse)) {
graph->fired++;
if (fire_synapse(graph, synapse) != pcmk_rc_ok) {
crm_err("Synapse %d failed to fire", synapse->id);
log_level = LOG_ERR;
graph->abort_priority = INFINITY;
graph->incomplete++;
graph->fired--;
}
if (!(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) {
graph->pending++;
}
} else {
crm_trace("Synapse %d cannot fire", synapse->id);
graph->incomplete++;
}
}
if ((graph->pending == 0) && (graph->fired == 0)) {
graph->complete = TRUE;
if ((graph->incomplete != 0) && (graph->abort_priority <= 0)) {
log_level = LOG_WARNING;
pass_result = transition_terminated;
status = "Terminated";
} else if (graph->skipped != 0) {
log_level = LOG_NOTICE;
pass_result = transition_complete;
status = "Stopped";
} else {
log_level = LOG_NOTICE;
pass_result = transition_complete;
status = "Complete";
}
} else if (graph->fired == 0) {
pass_result = transition_pending;
}
do_crm_log(log_level,
"Transition %d (Complete=%d, Pending=%d,"
" Fired=%d, Skipped=%d, Incomplete=%d, Source=%s): %s",
graph->id, graph->completed, graph->pending, graph->fired,
graph->skipped, graph->incomplete, graph->source, status);
return pass_result;
}
/*
* Functions for unpacking transition graph XML into structs
*/
/*!
* \internal
* \brief Unpack a transition graph action from XML
*
* \param[in] parent Synapse that action is part of
* \param[in] xml_action Action XML to unparse
*
* \return Newly allocated action on success, or NULL otherwise
*/
static crm_action_t *
unpack_action(synapse_t *parent, xmlNode *xml_action)
{
- action_type_e action_type;
+ enum pcmk__graph_action_type action_type;
crm_action_t *action = NULL;
const char *element = TYPE(xml_action);
const char *value = ID(xml_action);
if (value == NULL) {
crm_err("Ignoring transition graph action without id (bug?)");
crm_log_xml_trace(xml_action, "invalid");
return NULL;
}
if (pcmk__str_eq(element, XML_GRAPH_TAG_RSC_OP, pcmk__str_casei)) {
- action_type = action_type_rsc;
+ action_type = pcmk__rsc_graph_action;
} else if (pcmk__str_eq(element, XML_GRAPH_TAG_PSEUDO_EVENT,
pcmk__str_casei)) {
- action_type = action_type_pseudo;
+ action_type = pcmk__pseudo_graph_action;
} else if (pcmk__str_eq(element, XML_GRAPH_TAG_CRM_EVENT,
pcmk__str_casei)) {
- action_type = action_type_crm;
+ action_type = pcmk__cluster_graph_action;
} else {
crm_err("Ignoring transition graph action of unknown type '%s' (bug?)",
element);
crm_log_xml_trace(xml_action, "invalid");
return NULL;
}
action = calloc(1, sizeof(crm_action_t));
if (action == NULL) {
crm_perror(LOG_CRIT, "Cannot unpack transition graph action");
crm_log_xml_trace(xml_action, "lost");
return NULL;
}
pcmk__scan_min_int(value, &(action->id), -1);
- action->type = action_type_rsc;
+ action->type = pcmk__rsc_graph_action;
action->xml = copy_xml(xml_action);
action->synapse = parent;
action->type = action_type;
action->params = xml2list(action->xml);
value = g_hash_table_lookup(action->params, "CRM_meta_timeout");
pcmk__scan_min_int(value, &(action->timeout), 0);
/* Take start-delay into account for the timeout of the action timer */
value = g_hash_table_lookup(action->params, "CRM_meta_start_delay");
{
int start_delay;
pcmk__scan_min_int(value, &start_delay, 0);
action->timeout += start_delay;
}
if (pcmk__guint_from_hash(action->params,
CRM_META "_" XML_LRM_ATTR_INTERVAL, 0,
&(action->interval_ms)) != pcmk_rc_ok) {
action->interval_ms = 0;
}
value = g_hash_table_lookup(action->params, "CRM_meta_can_fail");
if (value != NULL) {
gboolean can_fail = FALSE;
crm_str_to_boolean(value, &can_fail);
if (can_fail) {
crm__set_graph_action_flags(action, pcmk__graph_action_can_fail);
} else {
crm__clear_graph_action_flags(action, pcmk__graph_action_can_fail);
}
#ifndef PCMK__COMPAT_2_0
if (pcmk_is_set(action->flags, pcmk__graph_action_can_fail)) {
crm_warn("Support for the can_fail meta-attribute is deprecated"
" and will be removed in a future release");
}
#endif
}
crm_trace("Action %d has timer set to %dms", action->id, action->timeout);
return action;
}
/*!
* \internal
* \brief Unpack transition graph synapse from XML
*
* \param[in] new_graph Transition graph that synapse is part of
* \param[in] xml_synapse Synapse XML
*
* \return Newly allocated synapse on success, or NULL otherwise
*/
static synapse_t *
unpack_synapse(crm_graph_t *new_graph, xmlNode *xml_synapse)
{
const char *value = NULL;
xmlNode *action_set = NULL;
synapse_t *new_synapse = NULL;
crm_trace("Unpacking synapse %s", ID(xml_synapse));
new_synapse = calloc(1, sizeof(synapse_t));
if (new_synapse == NULL) {
return NULL;
}
pcmk__scan_min_int(ID(xml_synapse), &(new_synapse->id), 0);
value = crm_element_value(xml_synapse, XML_CIB_ATTR_PRIORITY);
pcmk__scan_min_int(value, &(new_synapse->priority), 0);
CRM_CHECK(new_synapse->id >= 0, free(new_synapse);
return NULL);
new_graph->num_synapses++;
crm_trace("Unpacking synapse %s action sets",
crm_element_value(xml_synapse, XML_ATTR_ID));
for (action_set = first_named_child(xml_synapse, "action_set");
action_set != NULL; action_set = crm_next_same_xml(action_set)) {
for (xmlNode *action = pcmk__xml_first_child(action_set);
action != NULL; action = pcmk__xml_next(action)) {
crm_action_t *new_action = unpack_action(new_synapse, action);
if (new_action == NULL) {
continue;
}
crm_trace("Adding action %d to synapse %d",
new_action->id, new_synapse->id);
new_graph->num_actions++;
new_synapse->actions = g_list_append(new_synapse->actions,
new_action);
}
}
crm_trace("Unpacking synapse %s inputs", ID(xml_synapse));
for (xmlNode *inputs = first_named_child(xml_synapse, "inputs");
inputs != NULL; inputs = crm_next_same_xml(inputs)) {
for (xmlNode *trigger = first_named_child(inputs, "trigger");
trigger != NULL; trigger = crm_next_same_xml(trigger)) {
for (xmlNode *input = pcmk__xml_first_child(trigger);
input != NULL; input = pcmk__xml_next(input)) {
crm_action_t *new_input = unpack_action(new_synapse, input);
if (new_input == NULL) {
continue;
}
crm_trace("Adding input %d to synapse %d",
new_input->id, new_synapse->id);
new_synapse->inputs = g_list_append(new_synapse->inputs,
new_input);
}
}
}
return new_synapse;
}
/*!
* \internal
* \brief Unpack transition graph XML
*
* \param[in] xml_graph Transition graph XML to unpack
* \param[in] reference Where the XML came from (for logging)
*
* \return Newly allocated transition graph on success, NULL otherwise
* \note The caller is responsible for freeing the return value using
* pcmk__free_graph().
* \note The XML is expected to be structured like:
<transition_graph ...>
<synapse id="0">
<action_set>
<rsc_op id="2" ...>
...
</action_set>
<inputs>
<rsc_op id="1" ...
...
</inputs>
</synapse>
...
</transition_graph>
*/
crm_graph_t *
pcmk__unpack_graph(xmlNode *xml_graph, const char *reference)
{
crm_graph_t *new_graph = NULL;
const char *t_id = NULL;
const char *time = NULL;
new_graph = calloc(1, sizeof(crm_graph_t));
if (new_graph == NULL) {
return NULL;
}
new_graph->source = strdup((reference == NULL)? "unknown" : reference);
if (new_graph->source == NULL) {
free(new_graph);
return NULL;
}
new_graph->id = -1;
new_graph->abort_priority = 0;
new_graph->network_delay = 0;
new_graph->stonith_timeout = 0;
new_graph->completion_action = tg_done;
// Parse top-level attributes from <transition_graph>
if (xml_graph != NULL) {
t_id = crm_element_value(xml_graph, "transition_id");
CRM_CHECK(t_id != NULL, free(new_graph);
return NULL);
pcmk__scan_min_int(t_id, &(new_graph->id), -1);
time = crm_element_value(xml_graph, "cluster-delay");
CRM_CHECK(time != NULL, free(new_graph);
return NULL);
new_graph->network_delay = crm_parse_interval_spec(time);
time = crm_element_value(xml_graph, "stonith-timeout");
if (time == NULL) {
new_graph->stonith_timeout = new_graph->network_delay;
} else {
new_graph->stonith_timeout = crm_parse_interval_spec(time);
}
// Use 0 (dynamic limit) as default/invalid, -1 (no limit) as minimum
t_id = crm_element_value(xml_graph, "batch-limit");
if ((t_id == NULL)
|| (pcmk__scan_min_int(t_id, &(new_graph->batch_limit),
-1) != pcmk_rc_ok)) {
new_graph->batch_limit = 0;
}
t_id = crm_element_value(xml_graph, "migration-limit");
pcmk__scan_min_int(t_id, &(new_graph->migration_limit), -1);
}
// Unpack each child <synapse> element
for (xmlNode *synapse_xml = first_named_child(xml_graph, "synapse");
synapse_xml != NULL; synapse_xml = crm_next_same_xml(synapse_xml)) {
synapse_t *new_synapse = unpack_synapse(new_graph, synapse_xml);
if (new_synapse != NULL) {
new_graph->synapses = g_list_append(new_graph->synapses,
new_synapse);
}
}
crm_debug("Unpacked transition %d from %s: %d actions in %d synapses",
new_graph->id, new_graph->source, new_graph->num_actions,
new_graph->num_synapses);
return new_graph;
}
/*
* Functions for freeing transition graph objects
*/
/*!
* \internal
* \brief Free a transition graph action object
*
* \param[in] user_data Action to free
*/
static void
free_graph_action(gpointer user_data)
{
crm_action_t *action = user_data;
if ((action->timer != NULL) && (action->timer->source_id != 0)) {
crm_warn("Cancelling timer for graph action %d", action->id);
g_source_remove(action->timer->source_id);
}
if (action->params != NULL) {
g_hash_table_destroy(action->params);
}
free_xml(action->xml);
free(action->timer);
free(action);
}
/*!
* \internal
* \brief Free a transition graph synapse object
*
* \param[in] user_data Synapse to free
*/
static void
free_graph_synapse(gpointer user_data)
{
synapse_t *synapse = user_data;
g_list_free_full(synapse->actions, free_graph_action);
g_list_free_full(synapse->inputs, free_graph_action);
free(synapse);
}
/*!
* \internal
* \brief Free a transition graph object
*
* \param[in] graph Transition graph to free
*/
void
pcmk__free_graph(crm_graph_t *graph)
{
if (graph != NULL) {
g_list_free_full(graph->synapses, free_graph_synapse);
free(graph->source);
free(graph);
}
}
/*
* Other transition graph utilities
*/
/*!
* \internal
* \brief Synthesize an executor event from a graph action
*
* \param[in] resource If not NULL, use greater call ID than in this XML
* \param[in] action Graph action
* \param[in] status What to use as event execution status
* \param[in] rc What to use as event exit status
* \param[in] exit_reason What to use as event exit reason
*
* \return Newly allocated executor event on success, or NULL otherwise
*/
lrmd_event_data_t *
pcmk__event_from_graph_action(xmlNode *resource, crm_action_t *action,
int status, int rc, const char *exit_reason)
{
lrmd_event_data_t *op = NULL;
GHashTableIter iter;
const char *name = NULL;
const char *value = NULL;
xmlNode *action_resource = NULL;
CRM_CHECK(action != NULL, return NULL);
- CRM_CHECK(action->type == action_type_rsc, return NULL);
+ CRM_CHECK(action->type == pcmk__rsc_graph_action, return NULL);
action_resource = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "invalid");
return NULL);
op = lrmd_new_event(ID(action_resource),
crm_element_value(action->xml, XML_LRM_ATTR_TASK),
action->interval_ms);
lrmd__set_result(op, rc, status, exit_reason);
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
op->params = pcmk__strkey_table(free, free);
g_hash_table_iter_init(&iter, action->params);
while (g_hash_table_iter_next(&iter, (void **)&name, (void **)&value)) {
g_hash_table_insert(op->params, strdup(name), strdup(value));
}
for (xmlNode *xop = pcmk__xml_first_child(resource); xop != NULL;
xop = pcmk__xml_next(xop)) {
int tmp = 0;
crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp);
crm_debug("Got call_id=%d for %s", tmp, ID(resource));
if (tmp > op->call_id) {
op->call_id = tmp;
}
}
op->call_id++;
return op;
}
diff --git a/lib/pacemaker/pcmk_graph_logging.c b/lib/pacemaker/pcmk_graph_logging.c
index 0a3d14f49d..b03b2a3ff7 100644
--- a/lib/pacemaker/pcmk_graph_logging.c
+++ b/lib/pacemaker/pcmk_graph_logging.c
@@ -1,210 +1,210 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <pacemaker-internal.h>
/*!
* \internal
* \brief Return text equivalent of an enum transition_status for logging
*
* \param[in] state Transition status
*
* \return Human-readable text equivalent of \p state
*/
const char *
pcmk__graph_status2text(enum transition_status state)
{
switch (state) {
case transition_active:
return "active";
case transition_pending:
return "pending";
case transition_complete:
return "complete";
case transition_terminated:
return "terminated";
}
return "unknown";
}
static const char *
-actiontype2text(action_type_e type)
+actiontype2text(enum pcmk__graph_action_type type)
{
switch (type) {
- case action_type_pseudo:
+ case pcmk__pseudo_graph_action:
return "pseudo";
- case action_type_rsc:
+ case pcmk__rsc_graph_action:
return "resource";
- case action_type_crm:
+ case pcmk__cluster_graph_action:
return "cluster";
}
return "invalid";
}
/*!
* \internal
* \brief Find a transition graph action by ID
*
* \param[in] graph Transition graph to search
* \param[in] id Action ID to search for
*
* \return Transition graph action corresponding to \p id, or NULL if none
*/
static crm_action_t *
find_graph_action_by_id(crm_graph_t *graph, int id)
{
if (graph == NULL) {
return NULL;
}
for (GList *sIter = graph->synapses; sIter != NULL; sIter = sIter->next) {
synapse_t *synapse = (synapse_t *) sIter->data;
for (GList *aIter = synapse->actions; aIter != NULL;
aIter = aIter->next) {
crm_action_t *action = (crm_action_t *) aIter->data;
if (action->id == id) {
return action;
}
}
}
return NULL;
}
const char *
synapse_state_str(synapse_t *synapse)
{
if (pcmk_is_set(synapse->flags, pcmk__synapse_failed)) {
return "Failed";
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
return "Completed";
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
return "In-flight";
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
return "Ready";
}
return "Pending";
}
// List action IDs of inputs in graph that haven't completed successfully
static char *
synapse_pending_inputs(crm_graph_t *graph, synapse_t *synapse)
{
char *pending = NULL;
size_t pending_len = 0;
for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
crm_action_t *input = (crm_action_t *) lpc->data;
if (pcmk_is_set(input->flags, pcmk__graph_action_failed)) {
pcmk__add_word(&pending, &pending_len, ID(input->xml));
} else if (pcmk_is_set(input->flags, pcmk__graph_action_confirmed)) {
// Confirmed successful inputs are not pending
} else if (find_graph_action_by_id(graph, input->id) != NULL) {
// In-flight or pending
pcmk__add_word(&pending, &pending_len, ID(input->xml));
}
}
if (pending == NULL) {
pending = strdup("none");
}
return pending;
}
// Log synapse inputs that aren't in graph
static void
log_unresolved_inputs(unsigned int log_level, crm_graph_t *graph,
synapse_t *synapse)
{
for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
crm_action_t *input = (crm_action_t *) lpc->data;
const char *key = crm_element_value(input->xml, XML_LRM_ATTR_TASK_KEY);
const char *host = crm_element_value(input->xml, XML_LRM_ATTR_TARGET);
if (find_graph_action_by_id(graph, input->id) == NULL) {
do_crm_log(log_level,
" * [Input %2d]: Unresolved dependency %s op %s%s%s",
input->id, actiontype2text(input->type), key,
(host? " on " : ""), (host? host : ""));
}
}
}
static void
log_synapse_action(unsigned int log_level, synapse_t *synapse,
crm_action_t *action, const char *pending_inputs)
{
const char *key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
const char *host = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
char *desc = crm_strdup_printf("%s %s op %s",
synapse_state_str(synapse),
actiontype2text(action->type), key);
do_crm_log(log_level,
"[Action %4d]: %-50s%s%s (priority: %d, waiting: %s)",
action->id, desc, (host? " on " : ""), (host? host : ""),
synapse->priority, pending_inputs);
free(desc);
}
static void
log_synapse(unsigned int log_level, crm_graph_t *graph, synapse_t *synapse)
{
char *pending = NULL;
if (!pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
pending = synapse_pending_inputs(graph, synapse);
}
for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
log_synapse_action(log_level, synapse, (crm_action_t *) lpc->data,
pending);
}
free(pending);
if (!pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
log_unresolved_inputs(log_level, graph, synapse);
}
}
void
pcmk__log_graph_action(int log_level, crm_action_t *action)
{
log_synapse(log_level, NULL, action->synapse);
}
void
pcmk__log_graph(unsigned int log_level, crm_graph_t *graph)
{
if ((graph == NULL) || (graph->num_actions == 0)) {
if (log_level == LOG_TRACE) {
crm_debug("Empty transition graph");
}
return;
}
do_crm_log(log_level, "Graph %d with %d actions:"
" batch-limit=%d jobs, network-delay=%ums",
graph->id, graph->num_actions,
graph->batch_limit, graph->network_delay);
for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
log_synapse(log_level, graph, (synapse_t *) lpc->data);
}
}

File Metadata

Mime Type
text/x-diff
Expires
Wed, Jun 4, 5:54 AM (6 h, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1854708
Default Alt Text
(140 KB)

Event Timeline