Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4149365
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
140 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 7b7a2f05b7..d352d65cfe 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -1,1051 +1,1052 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <pacemaker-controld.h>
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
/*
* stonith failure counting
*
* We don't want to get stuck in a permanent fencing loop. Keep track of the
* number of fencing failures for each target node, and the most we'll restart a
* transition for.
*/
struct st_fail_rec {
int count;
};
static bool fence_reaction_panic = FALSE;
static unsigned long int stonith_max_attempts = 10;
static GHashTable *stonith_failures = NULL;
void
update_stonith_max_attempts(const char *value)
{
stonith_max_attempts = char2score(value);
if (stonith_max_attempts < 1UL) {
stonith_max_attempts = 10UL;
}
}
void
set_fence_reaction(const char *reaction_s)
{
if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
fence_reaction_panic = TRUE;
} else {
if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) {
crm_warn("Invalid value '%s' for %s, using 'stop'",
reaction_s, XML_CONFIG_ATTR_FENCE_REACTION);
}
fence_reaction_panic = FALSE;
}
}
static gboolean
too_many_st_failures(const char *target)
{
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *value = NULL;
if (stonith_failures == NULL) {
return FALSE;
}
if (target == NULL) {
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
if (value->count >= stonith_max_attempts) {
target = (const char*)key;
goto too_many;
}
}
} else {
value = g_hash_table_lookup(stonith_failures, target);
if ((value != NULL) && (value->count >= stonith_max_attempts)) {
goto too_many;
}
}
return FALSE;
too_many:
crm_warn("Too many failures (%d) to fence %s, giving up",
value->count, target);
return TRUE;
}
/*!
* \internal
* \brief Reset a stonith fail count
*
* \param[in] target Name of node to reset, or NULL for all
*/
void
st_fail_count_reset(const char *target)
{
if (stonith_failures == NULL) {
return;
}
if (target) {
struct st_fail_rec *rec = NULL;
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count = 0;
}
} else {
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *rec = NULL;
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rec)) {
rec->count = 0;
}
}
}
static void
st_fail_count_increment(const char *target)
{
struct st_fail_rec *rec = NULL;
if (stonith_failures == NULL) {
stonith_failures = pcmk__strkey_table(free, free);
}
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count++;
} else {
rec = malloc(sizeof(struct st_fail_rec));
if(rec == NULL) {
return;
}
rec->count = 1;
g_hash_table_insert(stonith_failures, strdup(target), rec);
}
}
/* end stonith fail count functions */
static void
cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Fencing update %d for %s: failed - %s (%d)",
call_id, (char *)user_data, pcmk_strerror(rc), rc);
crm_log_xml_warn(msg, "Failed update");
abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL);
} else {
crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
}
}
static void
send_stonith_update(crm_action_t *action, const char *target, const char *uuid)
{
int rc = pcmk_ok;
crm_node_t *peer = NULL;
/* We (usually) rely on the membership layer to do node_update_cluster,
* and the peer status callback to do node_update_peer, because the node
* might have already rejoined before we get the stonith result here.
*/
int flags = node_update_join | node_update_expected;
/* zero out the node-status & remove all LRM status info */
xmlNode *node_state = NULL;
CRM_CHECK(target != NULL, return);
CRM_CHECK(uuid != NULL, return);
/* Make sure the membership and join caches are accurate */
peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);
CRM_CHECK(peer != NULL, return);
if (peer->state == NULL) {
/* Usually, we rely on the membership layer to update the cluster state
* in the CIB. However, if the node has never been seen, do it here, so
* the node is not considered unclean.
*/
flags |= node_update_cluster;
}
if (peer->uuid == NULL) {
crm_info("Recording uuid '%s' for node '%s'", uuid, target);
peer->uuid = strdup(uuid);
}
crmd_peer_down(peer, TRUE);
/* Generate a node state update for the CIB */
node_state = create_node_state_update(peer, flags, NULL, __func__);
/* we have to mark whether or not remote nodes have already been fenced */
if (peer->flags & crm_remote_node) {
char *now_s = pcmk__ttoa(time(NULL));
crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
free(now_s);
}
/* Force our known ID */
crm_xml_add(node_state, XML_ATTR_UUID, uuid);
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
cib_quorum_override | cib_scope_local | cib_can_create);
/* Delay processing the trigger until the update completes */
crm_debug("Sending fencing update %d for %s", rc, target);
fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);
/* Make sure it sticks */
/* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */
controld_delete_node_state(peer->uname, controld_section_all,
cib_scope_local);
free_xml(node_state);
return;
}
/*!
* \internal
* \brief Abort transition due to stonith failure
*
* \param[in] abort_action Whether to restart or stop transition
* \param[in] target Don't restart if this (NULL for any) has too many failures
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
*/
static void
abort_for_stonith_failure(enum transition_action abort_action,
const char *target, xmlNode *reason)
{
/* If stonith repeatedly fails, we eventually give up on starting a new
* transition for that reason.
*/
if ((abort_action != tg_stop) && too_many_st_failures(target)) {
abort_action = tg_stop;
}
abort_transition(INFINITY, abort_action, "Stonith failed", reason);
}
/*
* stonith cleanup list
*
* If the DC is shot, proper notifications might not go out.
* The stonith cleanup list allows the cluster to (re-)send
* notifications once a new DC is elected.
*/
static GList *stonith_cleanup_list = NULL;
/*!
* \internal
* \brief Add a node to the stonith cleanup list
*
* \param[in] target Name of node to add
*/
void
add_stonith_cleanup(const char *target) {
stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
}
/*!
* \internal
* \brief Remove a node from the stonith cleanup list
*
* \param[in] Name of node to remove
*/
void
remove_stonith_cleanup(const char *target)
{
GList *iter = stonith_cleanup_list;
while (iter != NULL) {
GList *tmp = iter;
char *iter_name = tmp->data;
iter = iter->next;
if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
crm_trace("Removing %s from the cleanup list", iter_name);
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
free(iter_name);
}
}
}
/*!
* \internal
* \brief Purge all entries from the stonith cleanup list
*/
void
purge_stonith_cleanup()
{
if (stonith_cleanup_list) {
GList *iter = NULL;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
}
/*!
* \internal
* \brief Send stonith updates for all entries in cleanup list, then purge it
*/
void
execute_stonith_cleanup()
{
GList *iter;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_node_t *target_node = crm_get_peer(0, target);
const char *uuid = crm_peer_uuid(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
send_stonith_update(NULL, target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
/* end stonith cleanup list functions */
/* stonith API client
*
* Functions that need to interact directly with the fencer via its API
*/
static stonith_t *stonith_api = NULL;
static crm_trigger_t *stonith_reconnect = NULL;
static char *te_client_id = NULL;
static gboolean
fail_incompletable_stonith(crm_graph_t *graph)
{
GList *lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GList *lpc2 = NULL;
synapse_t *synapse = (synapse_t *) lpc->data;
if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
crm_action_t *action = (crm_action_t *) lpc2->data;
- if (action->type != action_type_crm || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ if ((action->type != pcmk__cluster_graph_action)
+ || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (task && pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
crm__set_graph_action_flags(action, pcmk__graph_action_failed);
last_action = action->xml;
pcmk__update_graph(graph, action);
crm_notice("Failing action %d (%s): fencer terminated",
action->id, ID(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("Fencer failure resulted in unrunnable actions");
abort_for_stonith_failure(tg_restart, NULL, last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
{
te_cleanup_stonith_history_sync(st, FALSE);
if (pcmk_is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_crit("Fencing daemon connection failed");
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencing daemon disconnected");
}
if (stonith_api) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(st);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (AM_I_DC) {
fail_incompletable_stonith(transition_graph);
trigger_graph();
}
}
/*!
* \internal
* \brief Handle an event notification from the fencing API
*
* \param[in] st Fencing API connection
* \param[in] event Fencing API event notification
*/
static void
handle_fence_notification(stonith_t *st, stonith_event_t *event)
{
bool succeeded = true;
const char *executioner = "the cluster";
const char *client = "a client";
const char *reason = NULL;
int exec_status;
if (te_client_id == NULL) {
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
}
if (event == NULL) {
crm_err("Notify data not found");
return;
}
if (event->executioner != NULL) {
executioner = event->executioner;
}
if (event->client_origin != NULL) {
client = event->client_origin;
}
exec_status = stonith__event_execution_status(event);
if ((stonith__event_exit_status(event) != CRM_EX_OK)
|| (exec_status != PCMK_EXEC_DONE)) {
succeeded = false;
if (exec_status == PCMK_EXEC_DONE) {
exec_status = PCMK_EXEC_ERROR;
}
}
reason = stonith__event_exit_reason(event);
crmd_alert_fencing_op(event);
if (pcmk__str_eq("on", event->action, pcmk__str_none)) {
// Unfencing doesn't need special handling, just a log message
if (succeeded) {
crm_notice("%s was unfenced by %s at the request of %s@%s",
event->target, executioner, client, event->origin);
} else {
crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
event->target, executioner,
pcmk_exec_status_str(exec_status),
((reason == NULL)? "" : ": "),
((reason == NULL)? "" : reason),
stonith__event_exit_status(event));
}
return;
}
if (succeeded
&& pcmk__str_eq(event->target, fsa_our_uname, pcmk__str_casei)) {
/* We were notified of our own fencing. Most likely, either fencing was
* misconfigured, or fabric fencing that doesn't cut cluster
* communication is in use.
*
* Either way, shutting down the local host is a good idea, to require
* administrator intervention. Also, other nodes would otherwise likely
* set our status to lost because of the fencing callback and discard
* our subsequent election votes as "not part of our cluster".
*/
crm_crit("We were allegedly just fenced by %s for %s!",
executioner, event->origin); // Dumps blackbox if enabled
if (fence_reaction_panic) {
pcmk__panic(__func__);
} else {
crm_exit(CRM_EX_FATAL);
}
return; // Should never get here
}
/* Update the count of fencing failures for this target, in case we become
* DC later. The current DC has already updated its fail count in
* tengine_stonith_callback().
*/
if (!AM_I_DC) {
if (succeeded) {
st_fail_count_reset(event->target);
} else {
st_fail_count_increment(event->target);
}
}
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
"%s%s%s%s " CRM_XS " event=%s",
event->target, (succeeded? "" : " not"),
event->action, executioner, client, event->origin,
(succeeded? "OK" : pcmk_exec_status_str(exec_status)),
((reason == NULL)? "" : " ("),
((reason == NULL)? "" : reason),
((reason == NULL)? "" : ")"),
event->id);
if (succeeded) {
crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
CRM_GET_PEER_ANY);
const char *uuid = NULL;
if (peer == NULL) {
return;
}
uuid = crm_peer_uuid(peer);
if (AM_I_DC) {
/* The DC always sends updates */
send_stonith_update(NULL, event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
* Unfortunately, the controller doesn't have a simple, reliable way
* to map hosts to guests. It might be possible to track this in the
* peer cache via crm_remote_peer_cache_refresh(). For now, we rely
* on the scheduler creating fence pseudo-events for the guests.
*/
if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
/* Abort the current transition if it wasn't the cluster that
* initiated fencing.
*/
crm_info("External fencing operation from %s fenced %s",
client, event->target);
abort_transition(INFINITY, tg_restart,
"External Fencing Operation", NULL);
}
/* Assume it was our leader if we don't currently have one */
} else if (pcmk__str_eq(fsa_our_dc, event->target,
pcmk__str_null_matches|pcmk__str_casei)
&& !pcmk_is_set(peer->flags, crm_remote_node)) {
crm_notice("Fencing target %s %s our leader",
event->target, (fsa_our_dc? "was" : "may have been"));
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (pcmk__str_eq(event->executioner, fsa_our_uname,
pcmk__str_casei)) {
send_stonith_update(NULL, event->target, uuid);
}
add_stonith_cleanup(event->target);
}
/* If the target is a remote node, and we host its connection,
* immediately fail all monitors so it can be recovered quickly.
* The connection won't necessarily drop when a remote node is fenced,
* so the failure might not otherwise be detected until the next poke.
*/
if (pcmk_is_set(peer->flags, crm_remote_node)) {
remote_ra_fail(event->target);
}
crmd_peer_down(peer, TRUE);
}
}
/*!
* \brief Connect to fencer
*
* \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
*
* \return TRUE
* \note If user_data is NULL, this will wait 2s between attempts, for up to
* 30 attempts, meaning the controller could be blocked as long as 58s.
*/
static gboolean
te_connect_stonith(gpointer user_data)
{
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
return TRUE;
}
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Already connected to fencer, no need to retry");
return TRUE;
}
if (user_data == NULL) {
// Blocking (retry failures now until successful)
rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
if (rc != pcmk_ok) {
crm_err("Could not connect to fencer in 30 attempts: %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
} else {
// Non-blocking (retry failures later in main loop)
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (rc != pcmk_ok) {
if (pcmk_is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_notice("Fencer connection failed (will retry): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencer connection failed (ignoring because no longer required): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
return TRUE;
}
}
if (rc == pcmk_ok) {
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_DISCONNECT,
tengine_stonith_connection_destroy);
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_FENCE,
handle_fence_notification);
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_HISTORY_SYNCED,
tengine_stonith_history_synced);
te_trigger_stonith_history_sync(TRUE);
crm_notice("Fencer successfully connected");
}
return TRUE;
}
/*!
\internal
\brief Schedule fencer connection attempt in main loop
*/
void
controld_trigger_fencer_connect()
{
if (stonith_reconnect == NULL) {
stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
te_connect_stonith,
GINT_TO_POINTER(TRUE));
}
controld_set_fsa_input_flags(R_ST_REQUIRED);
mainloop_set_trigger(stonith_reconnect);
}
void
controld_disconnect_fencer(bool destroy)
{
if (stonith_api) {
// Prevent fencer connection from coming up again
controld_clear_fsa_input_flags(R_ST_REQUIRED);
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(stonith_api);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (destroy) {
if (stonith_api) {
stonith_api->cmds->free(stonith_api);
stonith_api = NULL;
}
if (stonith_reconnect) {
mainloop_destroy_trigger(stonith_reconnect);
stonith_reconnect = NULL;
}
if (te_client_id) {
free(te_client_id);
te_client_id = NULL;
}
}
}
static gboolean
do_stonith_history_sync(gpointer user_data)
{
if (stonith_api && (stonith_api->state != stonith_disconnected)) {
stonith_history_t *history = NULL;
te_cleanup_stonith_history_sync(stonith_api, FALSE);
stonith_api->cmds->history(stonith_api,
st_opt_sync_call | st_opt_broadcast,
NULL, &history, 5);
stonith_history_free(history);
return TRUE;
} else {
crm_info("Skip triggering stonith history-sync as stonith is disconnected");
return FALSE;
}
}
static void
tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
{
char *uuid = NULL;
int stonith_id = -1;
int transition_id = -1;
crm_action_t *action = NULL;
const char *target = NULL;
if ((data == NULL) || (data->userdata == NULL)) {
crm_err("Ignoring fence operation %d result: "
"No transition key given (bug?)",
((data == NULL)? -1 : data->call_id));
return;
}
if (!AM_I_DC) {
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
reason = pcmk_exec_status_str(stonith__execution_status(data));
}
crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
data->call_id, stonith__exit_status(data), reason,
(const char *) data->userdata);
return;
}
CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
&stonith_id, NULL),
goto bail);
if (transition_graph->complete || (stonith_id < 0)
|| !pcmk__str_eq(uuid, te_uuid, pcmk__str_none)
|| (transition_graph->id != transition_id)) {
crm_info("Ignoring fence operation %d result: "
"Not from current transition " CRM_XS
" complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
data->call_id, pcmk__btoa(transition_graph->complete),
stonith_id, uuid, te_uuid, transition_id, transition_graph->id);
goto bail;
}
action = controld_get_action(stonith_id);
if (action == NULL) {
crm_err("Ignoring fence operation %d result: "
"Action %d not found in transition graph (bug?) "
CRM_XS " uuid=%s transition=%d",
data->call_id, stonith_id, uuid, transition_id);
goto bail;
}
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if (target == NULL) {
crm_err("Ignoring fence operation %d result: No target given (bug?)",
data->call_id);
goto bail;
}
stop_te_timer(action->timer);
if (stonith__exit_status(data) == CRM_EX_OK) {
const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
const char *op = crm_meta_value(action->params, "stonith_action");
crm_info("Fence operation %d for %s succeeded", data->call_id, target);
if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
te_action_confirmed(action, NULL);
if (pcmk__str_eq("on", op, pcmk__str_casei)) {
const char *value = NULL;
char *now = pcmk__ttoa(time(NULL));
gboolean is_remote_node = FALSE;
/* This check is not 100% reliable, since this node is not
* guaranteed to have the remote node cached. However, it
* doesn't have to be reliable, since the attribute manager can
* learn a node's "remoteness" by other means sooner or later.
* This allows it to learn more quickly if this node does have
* the information.
*/
if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
is_remote_node = TRUE;
}
update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
is_remote_node);
free(now);
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
is_remote_node);
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
is_remote_node);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
send_stonith_update(action, target, uuid);
crm__set_graph_action_flags(action, pcmk__graph_action_sent_update);
}
}
st_fail_count_reset(target);
} else {
enum transition_action abort_action = tg_restart;
int status = stonith__execution_status(data);
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
if (status == PCMK_EXEC_DONE) {
reason = "Agent returned error";
} else {
reason = pcmk_exec_status_str(status);
}
}
crm__set_graph_action_flags(action, pcmk__graph_action_failed);
/* If no fence devices were available, there's no use in immediately
* checking again, so don't start a new transition in that case.
*/
if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
crm_warn("Fence operation %d for %s failed: %s "
"(aborting transition and giving up for now)",
data->call_id, target, reason);
abort_action = tg_stop;
} else {
crm_notice("Fence operation %d for %s failed: %s "
"(aborting transition)", data->call_id, target, reason);
}
/* Increment the fail count now, so abort_for_stonith_failure() can
* check it. Non-DC nodes will increment it in
* handle_fence_notification().
*/
st_fail_count_increment(target);
abort_for_stonith_failure(abort_action, target, NULL);
}
pcmk__update_graph(transition_graph, action);
trigger_graph();
bail:
free(data->userdata);
free(uuid);
return;
}
static int
fence_with_delay(const char *target, const char *type, const char *delay)
{
uint32_t options = st_opt_none; // Group of enum stonith_call_options
int timeout_sec = (int) (transition_graph->stonith_timeout / 1000);
int delay_i;
if (crmd_join_phase_count(crm_join_confirmed) == 1) {
stonith__set_call_options(options, target, st_opt_allow_suicide);
}
pcmk__scan_min_int(delay, &delay_i, 0);
return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
type, timeout_sec, 0, delay_i);
}
gboolean
te_fence_node(crm_graph_t *graph, crm_action_t *action)
{
int rc = 0;
const char *id = NULL;
const char *uuid = NULL;
const char *target = NULL;
const char *type = NULL;
char *transition_key = NULL;
const char *priority_delay = NULL;
gboolean invalid_action = FALSE;
id = ID(action->xml);
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
type = crm_meta_value(action->params, "stonith_action");
CRM_CHECK(id != NULL, invalid_action = TRUE);
CRM_CHECK(uuid != NULL, invalid_action = TRUE);
CRM_CHECK(type != NULL, invalid_action = TRUE);
CRM_CHECK(target != NULL, invalid_action = TRUE);
if (invalid_action) {
crm_log_xml_warn(action->xml, "BadAction");
return FALSE;
}
priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
crm_notice("Requesting fencing (%s) of node %s "
CRM_XS " action=%s timeout=%u%s%s",
type, target, id, transition_graph->stonith_timeout,
priority_delay ? " priority_delay=" : "",
priority_delay ? priority_delay : "");
/* Passing NULL means block until we can connect... */
te_connect_stonith(NULL);
rc = fence_with_delay(target, type, priority_delay);
transition_key = pcmk__transition_key(transition_graph->id, action->id, 0,
te_uuid),
stonith_api->cmds->register_callback(stonith_api, rc,
(int) (transition_graph->stonith_timeout / 1000),
st_opt_timeout_updates, transition_key,
"tengine_stonith_callback", tengine_stonith_callback);
return TRUE;
}
bool
controld_verify_stonith_watchdog_timeout(const char *value)
{
gboolean rv = TRUE;
if (stonith_api && (stonith_api->state != stonith_disconnected) &&
stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
fsa_our_uname)) {
rv = pcmk__valid_sbd_timeout(value);
}
return rv;
}
/* end stonith API client functions */
/*
* stonith history synchronization
*
* Each node's fencer keeps track of a cluster-wide fencing history. When a node
* joins or leaves, we need to synchronize the history across all nodes.
*/
static crm_trigger_t *stonith_history_sync_trigger = NULL;
static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
void
te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
{
if (free_timers) {
mainloop_timer_del(stonith_history_sync_timer_short);
stonith_history_sync_timer_short = NULL;
mainloop_timer_del(stonith_history_sync_timer_long);
stonith_history_sync_timer_long = NULL;
} else {
mainloop_timer_stop(stonith_history_sync_timer_short);
mainloop_timer_stop(stonith_history_sync_timer_long);
}
if (st) {
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
}
}
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
{
te_cleanup_stonith_history_sync(st, FALSE);
crm_debug("Fence-history synced - cancel all timers");
}
static gboolean
stonith_history_sync_set_trigger(gpointer user_data)
{
mainloop_set_trigger(stonith_history_sync_trigger);
return FALSE;
}
void
te_trigger_stonith_history_sync(bool long_timeout)
{
/* trigger a sync in 5s to give more nodes the
* chance to show up so that we don't create
* unnecessary stonith-history-sync traffic
*
* the long timeout of 30s is there as a fallback
* so that after a successful connection to fenced
* we will wait for 30s for the DC to trigger a
* history-sync
* if this doesn't happen we trigger a sync locally
* (e.g. fenced segfaults and is restarted by pacemakerd)
*/
/* as we are finally checking the stonith-connection
* in do_stonith_history_sync we should be fine
* leaving stonith_history_sync_time & stonith_history_sync_trigger
* around
*/
if (stonith_history_sync_trigger == NULL) {
stonith_history_sync_trigger =
mainloop_add_trigger(G_PRIORITY_LOW,
do_stonith_history_sync, NULL);
}
if (long_timeout) {
if(stonith_history_sync_timer_long == NULL) {
stonith_history_sync_timer_long =
mainloop_timer_add("history_sync_long", 30000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
mainloop_timer_start(stonith_history_sync_timer_long);
} else {
if(stonith_history_sync_timer_short == NULL) {
stonith_history_sync_timer_short =
mainloop_timer_add("history_sync_short", 5000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
mainloop_timer_start(stonith_history_sync_timer_short);
}
}
/* end stonith history synchronization functions */
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
index d854cbeab2..f5445bdcdb 100644
--- a/daemons/controld/controld_te_actions.c
+++ b/daemons/controld/controld_te_actions.c
@@ -1,688 +1,688 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event()
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-internal.h>
#include <pacemaker-controld.h>
char *te_uuid = NULL;
GHashTable *te_targets = NULL;
void send_rsc_command(crm_action_t * action);
static void te_update_job_count(crm_action_t * action, int offset);
static void
te_start_action_timer(crm_graph_t * graph, crm_action_t * action)
{
action->timer = calloc(1, sizeof(crm_action_timer_t));
action->timer->timeout = action->timeout;
action->timer->action = action;
action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay,
action_timer_callback, (void *)action->timer);
CRM_ASSERT(action->timer->source_id != 0);
}
static gboolean
te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
{
const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
/* send to peers as well? */
if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) {
GHashTableIter iter;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *cmd = NULL;
if (pcmk__str_eq(fsa_our_uname, node->uname, pcmk__str_casei)) {
continue;
}
cmd = create_request(task, pseudo->xml, node->uname,
CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
free_xml(cmd);
}
remote_ra_process_maintenance_nodes(pseudo->xml);
} else {
/* Check action for Pacemaker Remote node side effects */
remote_ra_process_pseudo(pseudo->xml);
}
crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
te_action_confirmed(pseudo, graph);
return TRUE;
}
static int
get_target_rc(crm_action_t * action)
{
int exit_status;
pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC),
&exit_status, 0);
return exit_status;
}
static gboolean
te_crm_command(crm_graph_t * graph, crm_action_t * action)
{
char *counter = NULL;
xmlNode *cmd = NULL;
gboolean is_local = FALSE;
const char *id = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
id = ID(action->xml);
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) {
router_node = fsa_our_uname;
}
}
}
if (pcmk__str_empty(on_node)) {
crm_err("Corrupted command (id=%s) %s: no node",
pcmk__s(id, "<null>"), pcmk__s(task, "without task"));
return FALSE;
}
if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_info("Executing crm-event (%s)%s%s: %s on %s",
pcmk__s(id, "<null>"), (is_local? " locally" : ""),
(no_wait? " without waiting" : ""),
pcmk__s(task, "unspecified task"), on_node);
if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
/* defer until everything else completes */
crm_info("crm-event (%s) is a local shutdown", pcmk__s(id, "<null>"));
graph->completion_action = tg_shutdown;
graph->abort_reason = "local shutdown";
te_action_confirmed(action, graph);
return TRUE;
} else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
crm_node_t *peer = crm_get_peer(0, router_node);
pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
}
cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
counter = pcmk__transition_key(transition_graph->id, action->id,
get_target_rc(action), te_uuid);
crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
free(counter);
free_xml(cmd);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return FALSE;
} else if (no_wait) {
te_action_confirmed(action, graph);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_start_action_timer(graph, action);
}
return TRUE;
}
/*!
* \internal
* \brief Synthesize an executor event for a resource action timeout
*
* \param[in] action Resource action that timed out
* \param[in] target_rc Expected result of action that timed out
*
* Synthesize an executor event for a resource action timeout. (If the executor
* gets a timeout while waiting for a resource action to complete, that will be
* reported via the usual callback. This timeout means we didn't hear from the
* executor itself or the controller that relayed the action to the executor.)
*
* \return Newly created executor event for result of \p action
* \note The caller is responsible for freeing the return value using
* lrmd_free_event().
*/
static lrmd_event_data_t *
synthesize_timeout_event(crm_action_t *action, int target_rc)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *reason = NULL;
char *dynamic_reason = NULL;
if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
reason = "Local executor did not return result in time";
} else {
const char *router_node = NULL;
router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (router_node == NULL) {
router_node = target;
}
dynamic_reason = crm_strdup_printf("Controller on %s did not return "
"result in time", router_node);
reason = dynamic_reason;
}
op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
PCMK_OCF_UNKNOWN_ERROR, reason);
op->call_id = -1;
op->user_data = pcmk__transition_key(transition_graph->id, action->id,
target_rc, te_uuid);
free(dynamic_reason);
return op;
}
static void
controld_record_action_event(crm_action_t *action, lrmd_event_data_t *op)
{
xmlNode *state = NULL;
xmlNode *rsc = NULL;
xmlNode *action_rsc = NULL;
int rc = pcmk_ok;
const char *rsc_id = NULL;
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
int call_options = cib_quorum_override | cib_scope_local;
int target_rc = get_target_rc(action);
action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
if (action_rsc == NULL) {
return;
}
rsc_id = ID(action_rsc);
CRM_CHECK(rsc_id != NULL,
crm_log_xml_err(action->xml, "Bad:action"); return);
/*
update the CIB
<node_state id="hadev">
<lrm>
<lrm_resources>
<lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
*/
state = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(state, XML_ATTR_UUID, target_uuid);
crm_xml_add(state, XML_ATTR_UNAME, target);
rsc = create_xml_node(state, XML_CIB_TAG_LRM);
crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE);
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
__func__);
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
free_xml(state);
crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
rc, action->id, task_uuid, target);
crm__set_graph_action_flags(action, pcmk__graph_action_sent_update);
}
void
controld_record_action_timeout(crm_action_t *action)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
int target_rc = get_target_rc(action);
crm_warn("%s %d: %s on %s timed out",
crm_element_name(action->xml), action->id, task_uuid, target);
op = synthesize_timeout_event(action, target_rc);
controld_record_action_event(action, op);
lrmd_free_event(op);
}
static gboolean
te_rsc_command(crm_graph_t * graph, crm_action_t * action)
{
/* never overwrite stop actions in the CIB with
* anything other than completed results
*
* Writing pending stops makes it look like the
* resource is running again
*/
xmlNode *cmd = NULL;
xmlNode *rsc_op = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
gboolean is_local = FALSE;
char *counter = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
const char *task_uuid = NULL;
CRM_ASSERT(action != NULL);
CRM_ASSERT(action->xml != NULL);
crm__clear_graph_action_flags(action, pcmk__graph_action_executed);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if (pcmk__str_empty(on_node)) {
crm_err("Corrupted command(id=%s) %s: no node",
ID(action->xml), pcmk__s(task, "without task"));
return FALSE;
}
rsc_op = action->xml;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
}
counter = pcmk__transition_key(transition_graph->id, action->id,
get_target_rc(action), te_uuid);
crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
task, task_uuid, (is_local? " locally" : ""), on_node,
(no_wait? " without waiting" : ""), action->id);
cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
if (is_local) {
/* shortcut local resource commands */
ha_msg_input_t data = {
.msg = cmd,
.xml = rsc_op,
};
fsa_data_t msg = {
.id = 0,
.data = &data,
.data_type = fsa_dt_ha_msg,
.fsa_input = I_NULL,
.fsa_cause = C_FSA_INTERNAL,
.actions = A_LRM_INVOKE,
.origin = __func__,
};
do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);
} else {
rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
}
free(counter);
free_xml(cmd);
crm__set_graph_action_flags(action, pcmk__graph_action_executed);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return FALSE;
} else if (no_wait) {
crm_info("Action %d confirmed - no wait", action->id);
crm__set_graph_action_flags(action, pcmk__graph_action_confirmed); /* Just mark confirmed.
* Don't bump the job count only to immediately decrement it
*/
pcmk__update_graph(transition_graph, action);
trigger_graph();
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
action->id, task, task_uuid, on_node, action->timeout);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_update_job_count(action, 1);
te_start_action_timer(graph, action);
}
return TRUE;
}
struct te_peer_s
{
char *name;
int jobs;
int migrate_jobs;
};
static void te_peer_free(gpointer p)
{
struct te_peer_s *peer = p;
free(peer->name);
free(peer);
}
void te_reset_job_counts(void)
{
GHashTableIter iter;
struct te_peer_s *peer = NULL;
if(te_targets == NULL) {
te_targets = pcmk__strkey_table(NULL, te_peer_free);
}
g_hash_table_iter_init(&iter, te_targets);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
peer->jobs = 0;
peer->migrate_jobs = 0;
}
}
static void
te_update_job_count_on(const char *target, int offset, bool migrate)
{
struct te_peer_s *r = NULL;
if(target == NULL || te_targets == NULL) {
return;
}
r = g_hash_table_lookup(te_targets, target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
r->jobs += offset;
if(migrate) {
r->migrate_jobs += offset;
}
crm_trace("jobs[%s] = %d", target, r->jobs);
}
static void
te_update_job_count(crm_action_t * action, int offset)
{
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
- if (action->type != action_type_rsc || target == NULL) {
+ if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
/* No limit on these */
return;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
CRMD_ACTION_MIGRATED, NULL)) {
const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
te_update_job_count_on(t1, offset, TRUE);
te_update_job_count_on(t2, offset, TRUE);
return;
} else if (target == NULL) {
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
}
te_update_job_count_on(target, offset, FALSE);
}
static gboolean
te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const char *target)
{
int limit = 0;
struct te_peer_s *r = NULL;
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
if(target == NULL) {
/* No limit on these */
return TRUE;
} else if(te_targets == NULL) {
return FALSE;
}
r = g_hash_table_lookup(te_targets, target);
limit = throttle_get_job_limit(target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
if(limit <= r->jobs) {
crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
target, limit, r->jobs, id);
return FALSE;
} else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
target, graph->migration_limit, r->migrate_jobs, id);
return FALSE;
}
}
crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
return TRUE;
}
static gboolean
te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
{
const char *target = NULL;
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
- if (action->type != action_type_rsc) {
+ if (action->type != pcmk__rsc_graph_action) {
/* No limit on these */
return TRUE;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
CRMD_ACTION_MIGRATED, NULL)) {
target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
if(te_should_perform_action_on(graph, action, target) == FALSE) {
return FALSE;
}
target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
} else if (target == NULL) {
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
}
return te_should_perform_action_on(graph, action, target);
}
/*!
* \brief Confirm a graph action (and optionally update graph)
*
* \param[in] action Action to confirm
* \param[in] graph Update and trigger this graph (if non-NULL)
*/
void
te_action_confirmed(crm_action_t *action, crm_graph_t *graph)
{
if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
- if ((action->type == action_type_rsc)
+ if ((action->type == pcmk__rsc_graph_action)
&& (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) {
te_update_job_count(action, -1);
}
crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
}
if (graph) {
pcmk__update_graph(graph, action);
trigger_graph();
}
}
crm_graph_functions_t te_graph_fns = {
te_pseudo_action,
te_rsc_command,
te_crm_command,
te_fence_node,
te_should_perform_action,
};
void
notify_crmd(crm_graph_t * graph)
{
const char *type = "unknown";
enum crmd_fsa_input event = I_NULL;
crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state));
CRM_CHECK(graph->complete, graph->complete = TRUE);
switch (graph->completion_action) {
case tg_stop:
type = "stop";
if (fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case tg_done:
type = "done";
if (fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case tg_restart:
type = "restart";
if (fsa_state == S_TRANSITION_ENGINE) {
if (transition_timer->period_ms > 0) {
controld_stop_timer(transition_timer);
controld_start_timer(transition_timer);
} else {
event = I_PE_CALC;
}
} else if (fsa_state == S_POLICY_ENGINE) {
controld_set_fsa_action_flags(A_PE_INVOKE);
trigger_fsa();
}
break;
case tg_shutdown:
type = "shutdown";
if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
event = I_STOP;
} else {
crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
event = I_TERMINATE;
}
}
crm_debug("Transition %d status: %s - %s", graph->id, type,
pcmk__s(graph->abort_reason, "unspecified reason"));
graph->abort_reason = NULL;
graph->completion_action = tg_done;
controld_clear_fsa_input_flags(R_IN_TRANSITION);
if (event != I_NULL) {
register_fsa_input(C_FSA_INTERNAL, event, NULL);
} else if (fsa_source) {
mainloop_set_trigger(fsa_source);
}
}
diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c
index fcf2dfd8bd..ad450d83df 100644
--- a/daemons/controld/controld_te_callbacks.c
+++ b/daemons/controld/controld_te_callbacks.c
@@ -1,677 +1,677 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/msg_xml.h>
#include <crm/cluster.h> /* For ONLINESTATUS etc */
#include <pacemaker-controld.h>
void te_update_confirm(const char *event, xmlNode * msg);
extern char *te_uuid;
gboolean shuttingdown = FALSE;
crm_graph_t *transition_graph;
crm_trigger_t *transition_trigger = NULL;
/* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */
#define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']"
// An explicit shutdown-lock of 0 means the lock has been cleared
static bool
shutdown_lock_cleared(xmlNode *lrm_resource)
{
time_t shutdown_lock = 0;
return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
&shutdown_lock) == pcmk_ok)
&& (shutdown_lock == 0);
}
static void
te_update_diff_v1(const char *event, xmlNode *diff)
{
int lpc, max;
xmlXPathObject *xpathObj = NULL;
CRM_CHECK(diff != NULL, return);
xml_log_patchset(LOG_TRACE, __func__, diff);
if (cib_config_changed(NULL, NULL, &diff)) {
abort_transition(INFINITY, tg_restart, "Non-status change", diff);
goto bail; /* configuration changed */
}
/* Tickets Attributes - Added/Updated */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS);
if (numXpathResults(xpathObj) > 0) {
xmlNode *aborted = getXpathResult(xpathObj, 0);
abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted);
goto bail;
}
freeXpathObject(xpathObj);
/* Tickets Attributes - Removed */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS);
if (numXpathResults(xpathObj) > 0) {
xmlNode *aborted = getXpathResult(xpathObj, 0);
abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted);
goto bail;
}
freeXpathObject(xpathObj);
/* Transient Attributes - Removed */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//"
XML_TAG_TRANSIENT_NODEATTRS);
if (numXpathResults(xpathObj) > 0) {
xmlNode *aborted = getXpathResult(xpathObj, 0);
abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted);
goto bail;
}
freeXpathObject(xpathObj);
// Check for lrm_resource entries
xpathObj = xpath_search(diff,
"//" F_CIB_UPDATE_RESULT
"//" XML_TAG_DIFF_ADDED
"//" XML_LRM_TAG_RESOURCE);
max = numXpathResults(xpathObj);
/*
* Updates by, or in response to, graph actions will never affect more than
* one resource at a time, so such updates indicate an LRM refresh. In that
* case, start a new transition rather than check each result individually,
* which can result in _huge_ speedups in large clusters.
*
* Unfortunately, we can only do so when there are no pending actions.
* Otherwise, we could mistakenly throw away those results here, and
* the cluster will stall waiting for them and time out the operation.
*/
if ((transition_graph->pending == 0) && (max > 1)) {
crm_debug("Ignoring resource operation updates due to history refresh of %d resources",
max);
crm_log_xml_trace(diff, "lrm-refresh");
abort_transition(INFINITY, tg_restart, "History refresh", NULL);
goto bail;
}
if (max == 1) {
xmlNode *lrm_resource = getXpathResult(xpathObj, 0);
if (shutdown_lock_cleared(lrm_resource)) {
// @TODO would be more efficient to abort once after transition done
abort_transition(INFINITY, tg_restart, "Shutdown lock cleared",
lrm_resource);
// Still process results, so we stop timers and update failcounts
}
}
freeXpathObject(xpathObj);
/* Process operation updates */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP);
max = numXpathResults(xpathObj);
if (max > 0) {
int lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
const char *node = get_node_id(rsc_op);
process_graph_event(rsc_op, node);
}
}
freeXpathObject(xpathObj);
/* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */
xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP);
max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
int path_max = 0;
const char *op_id = NULL;
char *rsc_op_xpath = NULL;
xmlXPathObject *op_match = NULL;
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match == NULL) { continue; };
op_id = ID(match);
path_max = strlen(RSC_OP_TEMPLATE) + strlen(op_id) + 1;
rsc_op_xpath = calloc(1, path_max);
snprintf(rsc_op_xpath, path_max, RSC_OP_TEMPLATE, op_id);
op_match = xpath_search(diff, rsc_op_xpath);
if (numXpathResults(op_match) == 0) {
/* Prevent false positives by matching cancelations too */
const char *node = get_node_id(match);
crm_action_t *cancelled = get_cancel_action(op_id, node);
if (cancelled == NULL) {
crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id,
node);
abort_transition(INFINITY, tg_restart, "Resource op removal", match);
freeXpathObject(op_match);
free(rsc_op_xpath);
goto bail;
} else {
crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d",
op_id, node, cancelled->id);
}
}
freeXpathObject(op_match);
free(rsc_op_xpath);
}
bail:
freeXpathObject(xpathObj);
}
static void
process_lrm_resource_diff(xmlNode *lrm_resource, const char *node)
{
for (xmlNode *rsc_op = pcmk__xml_first_child(lrm_resource); rsc_op != NULL;
rsc_op = pcmk__xml_next(rsc_op)) {
process_graph_event(rsc_op, node);
}
if (shutdown_lock_cleared(lrm_resource)) {
// @TODO would be more efficient to abort once after transition done
abort_transition(INFINITY, tg_restart, "Shutdown lock cleared",
lrm_resource);
}
}
static void
process_resource_updates(const char *node, xmlNode *xml, xmlNode *change,
const char *op, const char *xpath)
{
xmlNode *rsc = NULL;
if (xml == NULL) {
return;
}
if (strcmp(TYPE(xml), XML_CIB_TAG_LRM) == 0) {
xml = first_named_child(xml, XML_LRM_TAG_RESOURCES);
CRM_CHECK(xml != NULL, return);
}
CRM_CHECK(strcmp(TYPE(xml), XML_LRM_TAG_RESOURCES) == 0, return);
/*
* Updates by, or in response to, TE actions will never contain updates
* for more than one resource at a time, so such updates indicate an
* LRM refresh.
*
* In that case, start a new transition rather than check each result
* individually, which can result in _huge_ speedups in large clusters.
*
* Unfortunately, we can only do so when there are no pending actions.
* Otherwise, we could mistakenly throw away those results here, and
* the cluster will stall waiting for them and time out the operation.
*/
if ((transition_graph->pending == 0)
&& xml->children && xml->children->next) {
crm_log_xml_trace(change, "lrm-refresh");
abort_transition(INFINITY, tg_restart, "History refresh", NULL);
return;
}
for (rsc = pcmk__xml_first_child(xml); rsc != NULL;
rsc = pcmk__xml_next(rsc)) {
crm_trace("Processing %s", ID(rsc));
process_lrm_resource_diff(rsc, node);
}
}
static char *extract_node_uuid(const char *xpath)
{
char *mutable_path = strdup(xpath);
char *node_uuid = NULL;
char *search = NULL;
char *match = NULL;
match = strstr(mutable_path, "node_state[@id=\'");
if (match == NULL) {
free(mutable_path);
return NULL;
}
match += strlen("node_state[@id=\'");
search = strchr(match, '\'');
if (search == NULL) {
free(mutable_path);
return NULL;
}
search[0] = 0;
node_uuid = strdup(match);
free(mutable_path);
return node_uuid;
}
static void
abort_unless_down(const char *xpath, const char *op, xmlNode *change,
const char *reason)
{
char *node_uuid = NULL;
crm_action_t *down = NULL;
if(!pcmk__str_eq(op, "delete", pcmk__str_casei)) {
abort_transition(INFINITY, tg_restart, reason, change);
return;
}
node_uuid = extract_node_uuid(xpath);
if(node_uuid == NULL) {
crm_err("Could not extract node ID from %s", xpath);
abort_transition(INFINITY, tg_restart, reason, change);
return;
}
down = match_down_event(node_uuid);
if (down == NULL) {
crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath);
abort_transition(INFINITY, tg_restart, reason, change);
} else {
crm_trace("Expecting changes to %s (%s)", node_uuid, xpath);
}
free(node_uuid);
}
static void
process_op_deletion(const char *xpath, xmlNode *change)
{
char *mutable_key = strdup(xpath);
char *key;
char *node_uuid;
// Extract the part of xpath between last pair of single quotes
key = strrchr(mutable_key, '\'');
if (key != NULL) {
*key = '\0';
key = strrchr(mutable_key, '\'');
}
if (key == NULL) {
crm_warn("Ignoring malformed CIB update (resource deletion of %s)",
xpath);
free(mutable_key);
return;
}
++key;
node_uuid = extract_node_uuid(xpath);
if (confirm_cancel_action(key, node_uuid) == FALSE) {
abort_transition(INFINITY, tg_restart, "Resource operation removal",
change);
}
free(mutable_key);
free(node_uuid);
}
static void
process_delete_diff(const char *xpath, const char *op, xmlNode *change)
{
if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) {
process_op_deletion(xpath, change);
} else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) {
abort_unless_down(xpath, op, change, "Resource state removal");
} else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) {
abort_unless_down(xpath, op, change, "Node state removal");
} else {
crm_trace("Ignoring delete of %s", xpath);
}
}
static void
process_node_state_diff(xmlNode *state, xmlNode *change, const char *op,
const char *xpath)
{
xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);
process_resource_updates(ID(state), lrm, change, op, xpath);
}
static void
process_status_diff(xmlNode *status, xmlNode *change, const char *op,
const char *xpath)
{
for (xmlNode *state = pcmk__xml_first_child(status); state != NULL;
state = pcmk__xml_next(state)) {
process_node_state_diff(state, change, op, xpath);
}
}
static void
process_cib_diff(xmlNode *cib, xmlNode *change, const char *op,
const char *xpath)
{
xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS);
xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION);
if (status) {
process_status_diff(status, change, op, xpath);
}
if (config) {
abort_transition(INFINITY, tg_restart,
"Non-status-only change", change);
}
}
static void
te_update_diff_v2(xmlNode *diff)
{
crm_log_xml_trace(diff, "Patch:Raw");
for (xmlNode *change = pcmk__xml_first_child(diff); change != NULL;
change = pcmk__xml_next(change)) {
xmlNode *match = NULL;
const char *name = NULL;
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
// Possible ops: create, modify, delete, move
const char *op = crm_element_value(change, XML_DIFF_OP);
// Ignore uninteresting updates
if (op == NULL) {
continue;
} else if (xpath == NULL) {
crm_trace("Ignoring %s change for version field", op);
continue;
} else if (strcmp(op, "move") == 0) {
crm_trace("Ignoring move change at %s", xpath);
continue;
}
// Find the result of create/modify ops
if (strcmp(op, "create") == 0) {
match = change->children;
} else if (strcmp(op, "modify") == 0) {
match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
match = match->children;
}
} else if (strcmp(op, "delete") != 0) {
crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)",
op, xpath);
continue;
}
if (match) {
if (match->type == XML_COMMENT_NODE) {
crm_trace("Ignoring %s operation for comment at %s", op, xpath);
continue;
}
name = (const char *)match->name;
}
crm_trace("Handling %s operation for %s%s%s",
op, (xpath? xpath : "CIB"),
(name? " matched by " : ""), (name? name : ""));
if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) {
abort_transition(INFINITY, tg_restart, "Configuration change",
change);
break; // Won't be packaged with operation results we may be waiting for
} else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS)
|| pcmk__str_eq(name, XML_CIB_TAG_TICKETS, pcmk__str_casei)) {
abort_transition(INFINITY, tg_restart, "Ticket attribute change", change);
break; // Won't be packaged with operation results we may be waiting for
} else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[")
|| pcmk__str_eq(name, XML_TAG_TRANSIENT_NODEATTRS, pcmk__str_casei)) {
abort_unless_down(xpath, op, change, "Transient attribute change");
break; // Won't be packaged with operation results we may be waiting for
} else if (strcmp(op, "delete") == 0) {
process_delete_diff(xpath, op, change);
} else if (name == NULL) {
crm_warn("Ignoring malformed CIB update (%s at %s has no result)",
op, xpath);
} else if (strcmp(name, XML_TAG_CIB) == 0) {
process_cib_diff(match, change, op, xpath);
} else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) {
process_status_diff(match, change, op, xpath);
} else if (strcmp(name, XML_CIB_TAG_STATE) == 0) {
process_node_state_diff(match, change, op, xpath);
} else if (strcmp(name, XML_CIB_TAG_LRM) == 0) {
process_resource_updates(ID(match), match, change, op, xpath);
} else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, "lrm");
process_resource_updates(local_node, match, change, op, xpath);
free(local_node);
} else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, "lrm");
process_lrm_resource_diff(match, local_node);
free(local_node);
} else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, "lrm");
process_graph_event(match, local_node);
free(local_node);
} else {
crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)",
op, xpath, name);
}
}
}
void
te_update_diff(const char *event, xmlNode * msg)
{
xmlNode *diff = NULL;
const char *op = NULL;
int rc = -EINVAL;
int format = 1;
int p_add[] = { 0, 0, 0 };
int p_del[] = { 0, 0, 0 };
CRM_CHECK(msg != NULL, return);
crm_element_value_int(msg, F_CIB_RC, &rc);
if (transition_graph == NULL) {
crm_trace("No graph");
return;
} else if (rc < pcmk_ok) {
crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
return;
} else if (transition_graph->complete
&& fsa_state != S_IDLE
&& fsa_state != S_TRANSITION_ENGINE
&& fsa_state != S_POLICY_ENGINE) {
crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state),
transition_graph->complete);
return;
}
op = crm_element_value(msg, F_CIB_OPERATION);
diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
xml_patch_versions(diff, p_add, p_del);
crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op,
p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2],
fsa_state2string(fsa_state));
crm_element_value_int(diff, "format", &format);
switch (format) {
case 1:
te_update_diff_v1(event, diff);
break;
case 2:
te_update_diff_v2(diff);
break;
default:
crm_warn("Ignoring malformed CIB update (unknown patch format %d)",
format);
}
}
gboolean
process_te_message(xmlNode * msg, xmlNode * xml_data)
{
const char *from = crm_element_value(msg, F_ORIG);
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
const char *ref = crm_element_value(msg, F_CRM_REFERENCE);
const char *op = crm_element_value(msg, F_CRM_TASK);
const char *type = crm_element_value(msg, F_CRM_MSG_TYPE);
crm_trace("Processing %s (%s) message", op, ref);
crm_log_xml_trace(msg, "ipc");
if (op == NULL) {
/* error */
} else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) {
crm_trace("Bad sys-to: %s", pcmk__s(sys_to, "missing"));
return FALSE;
} else if (pcmk__str_eq(op, CRM_OP_INVOKE_LRM, pcmk__str_casei)
&& pcmk__str_eq(sys_from, CRM_SYSTEM_LRMD, pcmk__str_casei)
/* && pcmk__str_eq(type, XML_ATTR_RESPONSE, pcmk__str_casei) */
) {
xmlXPathObject *xpathObj = NULL;
crm_log_xml_trace(msg, "Processing (N)ACK");
crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from);
xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP);
if (numXpathResults(xpathObj)) {
int lpc = 0, max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
const char *node = get_node_id(rsc_op);
process_graph_event(rsc_op, node);
}
freeXpathObject(xpathObj);
} else {
crm_log_xml_err(msg, "Invalid (N)ACK");
freeXpathObject(xpathObj);
return FALSE;
}
} else {
crm_err("Unknown command: %s::%s from %s", type, op, sys_from);
}
crm_trace("finished processing message");
return TRUE;
}
void
cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc));
}
}
/*!
* \brief Handle a timeout in node-to-node communication
*
* \param[in] data Pointer to action timer
*
* \return FALSE (indicating that source should be not be re-added)
*/
gboolean
action_timer_callback(gpointer data)
{
crm_action_timer_t *timer = NULL;
const char *task = NULL;
const char *on_node = NULL;
const char *via_node = NULL;
CRM_CHECK(data != NULL, return FALSE);
timer = (crm_action_timer_t *) data;
stop_te_timer(timer);
CRM_CHECK(timer->action != NULL, return FALSE);
task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK);
on_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_TARGET);
via_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (transition_graph->complete) {
crm_notice("Node %s did not send %s result (via %s) within %dms "
"(ignoring because transition not in progress)",
(on_node? on_node : ""), (task? task : "unknown action"),
(via_node? via_node : "controller"), timer->timeout);
} else {
/* fail the action */
crm_err("Node %s did not send %s result (via %s) within %dms "
"(action timeout plus cluster-delay)",
(on_node? on_node : ""), (task? task : "unknown action"),
(via_node? via_node : "controller"),
timer->timeout + transition_graph->network_delay);
pcmk__log_graph_action(LOG_ERR, timer->action);
crm__set_graph_action_flags(timer->action, pcmk__graph_action_failed);
te_action_confirmed(timer->action, transition_graph);
abort_transition(INFINITY, tg_restart, "Action lost", NULL);
// Record timeout in the CIB if appropriate
- if ((timer->action->type == action_type_rsc)
+ if ((timer->action->type == pcmk__rsc_graph_action)
&& controld_action_is_recordable(task)) {
controld_record_action_timeout(timer->action);
}
}
return FALSE;
}
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
index 1fd7129922..7b049d945a 100644
--- a/daemons/controld/controld_te_events.c
+++ b/daemons/controld/controld_te_events.c
@@ -1,507 +1,508 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <pacemaker-controld.h>
char *failed_stop_offset = NULL;
char *failed_start_offset = NULL;
gboolean
fail_incompletable_actions(crm_graph_t * graph, const char *down_node)
{
const char *target_uuid = NULL;
const char *router = NULL;
const char *router_uuid = NULL;
xmlNode *last_action = NULL;
GList *gIter = NULL;
GList *gIter2 = NULL;
if (graph == NULL || graph->complete) {
return FALSE;
}
gIter = graph->synapses;
for (; gIter != NULL; gIter = gIter->next) {
synapse_t *synapse = (synapse_t *) gIter->data;
if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
/* We've already been here */
continue;
}
gIter2 = synapse->actions;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
crm_action_t *action = (crm_action_t *) gIter2->data;
- if (action->type == action_type_pseudo || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
+ if ((action->type == pcmk__pseudo_graph_action)
+ || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
- } else if (action->type == action_type_crm) {
+ } else if (action->type == pcmk__cluster_graph_action) {
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
continue;
}
}
target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (router) {
crm_node_t *node = crm_get_peer(0, router);
if (node) {
router_uuid = node->uuid;
}
}
if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
crm__set_graph_action_flags(action, pcmk__graph_action_failed);
pcmk__set_synapse_flags(synapse, pcmk__synapse_failed);
last_action = action->xml;
stop_te_timer(action->timer);
pcmk__update_graph(graph, action);
if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
crm_notice("Action %d (%s) was pending on %s (offline)",
action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
} else {
crm_info("Action %d (%s) is scheduled for %s (offline)",
action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
}
}
}
}
if (last_action != NULL) {
crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
abort_transition(INFINITY, tg_restart, "Node failure", last_action);
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Update failure-related node attributes if warranted
*
* \param[in] event XML describing operation that (maybe) failed
* \param[in] event_node_uuid Node that event occurred on
* \param[in] rc Actual operation return code
* \param[in] target_rc Expected operation return code
* \param[in] do_update If TRUE, do update regardless of operation type
* \param[in] ignore_failures If TRUE, update last failure but not fail count
*
* \return TRUE if this was not a direct nack, success or lrm status refresh
*/
static gboolean
update_failcount(xmlNode * event, const char *event_node_uuid, int rc,
int target_rc, gboolean do_update, gboolean ignore_failures)
{
guint interval_ms = 0;
char *task = NULL;
char *rsc_id = NULL;
const char *value = NULL;
const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
const char *on_uname = crm_peer_uname(event_node_uuid);
const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);
// Nothing needs to be done for success or status refresh
if (rc == target_rc) {
return FALSE;
} else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
id, rc, on_uname);
return FALSE;
}
/* Sanity check */
CRM_CHECK(on_uname != NULL, return TRUE);
CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
crm_err("Couldn't parse: %s", ID(event)); goto bail);
/* Decide whether update is necessary and what value to use */
if ((interval_ms > 0) || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)
|| pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) {
do_update = TRUE;
} else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) {
do_update = TRUE;
if (failed_start_offset == NULL) {
failed_start_offset = strdup(CRM_INFINITY_S);
}
value = failed_start_offset;
} else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) {
do_update = TRUE;
if (failed_stop_offset == NULL) {
failed_stop_offset = strdup(CRM_INFINITY_S);
}
value = failed_stop_offset;
}
/* Fail count will be either incremented or set to infinity */
if (!pcmk_str_is_infinity(value)) {
value = XML_NVPAIR_ATTR_VALUE "++";
}
if (do_update) {
char *now = pcmk__ttoa(time(NULL));
char *attr_name = NULL;
gboolean is_remote_node = FALSE;
if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
is_remote_node = TRUE;
}
crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
(ignore_failures? "last failure" : "failcount"),
rsc_id, on_uname, task, rc, value, now);
/* Update the fail count, if we're not ignoring failures */
if (!ignore_failures) {
attr_name = pcmk__failcount_name(rsc_id, task, interval_ms);
update_attrd(on_uname, attr_name, value, NULL, is_remote_node);
free(attr_name);
}
/* Update the last failure time (even if we're ignoring failures,
* so that failure can still be detected and shown, e.g. by crm_mon)
*/
attr_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
update_attrd(on_uname, attr_name, now, NULL, is_remote_node);
free(attr_name);
free(now);
}
bail:
free(rsc_id);
free(task);
return TRUE;
}
crm_action_t *
controld_get_action(int id)
{
for (GList *item = transition_graph->synapses; item; item = item->next) {
synapse_t *synapse = (synapse_t *) item->data;
for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
crm_action_t *action = (crm_action_t *) item2->data;
if (action->id == id) {
return action;
}
}
}
return NULL;
}
crm_action_t *
get_cancel_action(const char *id, const char *node)
{
GList *gIter = NULL;
GList *gIter2 = NULL;
gIter = transition_graph->synapses;
for (; gIter != NULL; gIter = gIter->next) {
synapse_t *synapse = (synapse_t *) gIter->data;
gIter2 = synapse->actions;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
const char *task = NULL;
const char *target = NULL;
crm_action_t *action = (crm_action_t *) gIter2->data;
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
crm_trace("Wrong key %s for %s on %s", task, id, node);
continue;
}
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
crm_trace("Wrong node %s for %s on %s", target, id, node);
continue;
}
crm_trace("Found %s on %s", id, node);
return action;
}
}
return NULL;
}
bool
confirm_cancel_action(const char *id, const char *node_id)
{
const char *op_key = NULL;
const char *node_name = NULL;
crm_action_t *cancel = get_cancel_action(id, node_id);
if (cancel == NULL) {
return FALSE;
}
op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY);
node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET);
stop_te_timer(cancel->timer);
te_action_confirmed(cancel, transition_graph);
crm_info("Cancellation of %s on %s confirmed (action %d)",
op_key, node_name, cancel->id);
return TRUE;
}
/* downed nodes are listed like: <downed> <node id="UUID1" /> ... </downed> */
#define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
"/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']"
/*!
* \brief Find a transition event that would have made a specified node down
*
* \param[in] target UUID of node to match
*
* \return Matching event if found, NULL otherwise
*/
crm_action_t *
match_down_event(const char *target)
{
crm_action_t *match = NULL;
xmlXPathObjectPtr xpath_ret = NULL;
GList *gIter, *gIter2;
char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
for (gIter = transition_graph->synapses;
gIter != NULL && match == NULL;
gIter = gIter->next) {
for (gIter2 = ((synapse_t*)gIter->data)->actions;
gIter2 != NULL && match == NULL;
gIter2 = gIter2->next) {
match = (crm_action_t*)gIter2->data;
if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
xpath_ret = xpath_search(match->xml, xpath);
if (numXpathResults(xpath_ret) < 1) {
match = NULL;
}
freeXpathObject(xpath_ret);
} else {
// Only actions that were actually started can match
match = NULL;
}
}
}
free(xpath);
if (match != NULL) {
crm_debug("Shutdown action %d (%s) found for node %s", match->id,
crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target);
} else {
crm_debug("No reason to expect node %s to be down", target);
}
return match;
}
void
process_graph_event(xmlNode *event, const char *event_node)
{
int rc = -1; // Actual result
int target_rc = -1; // Expected result
int status = -1; // Executor status
int callid = -1; // Executor call ID
int transition_num = -1; // Transition number
int action_num = -1; // Action number within transition
char *update_te_uuid = NULL;
bool ignore_failures = FALSE;
const char *id = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *uname = NULL;
CRM_ASSERT(event != NULL);
/*
<lrm_rsc_op id="rsc_east-05_last_0" operation_key="rsc_east-05_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" transition-magic="0:7;9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" call-id="17" rc-code="7" op-status="0" interval="0" last-rc-change="1355361636" exec-time="128" queue-time="0" op-digest="c81f5f40b1c9e859c992e800b1aa6972"/>
*/
magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY);
if (magic == NULL) {
/* non-change */
return;
}
crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status);
if (status == PCMK_EXEC_PENDING) {
return;
}
id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
crm_element_value_int(event, XML_LRM_ATTR_RC, &rc);
crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid);
rc = pcmk__effective_rc(rc);
if (decode_transition_key(magic, &update_te_uuid, &transition_num,
&action_num, &target_rc) == FALSE) {
// decode_transition_key() already logged the bad key
crm_err("Can't process action %s result: Incompatible versions? "
CRM_XS " call-id=%d", id, callid);
abort_transition(INFINITY, tg_restart, "Bad event", event);
return;
}
if (transition_num == -1) {
// E.g. crm_resource --fail
desc = "initiated outside of the cluster";
abort_transition(INFINITY, tg_restart, "Unexpected event", event);
} else if ((action_num < 0) || !pcmk__str_eq(update_te_uuid, te_uuid, pcmk__str_none)) {
desc = "initiated by a different DC";
abort_transition(INFINITY, tg_restart, "Foreign event", event);
} else if ((transition_graph->id != transition_num)
|| (transition_graph->complete)) {
// Action is not from currently active transition
guint interval_ms = 0;
if (parse_op_key(id, NULL, NULL, &interval_ms)
&& (interval_ms != 0)) {
/* Recurring actions have the transition number they were first
* scheduled in.
*/
if (status == PCMK_EXEC_CANCELLED) {
confirm_cancel_action(id, get_node_id(event));
goto bail;
}
desc = "arrived after initial scheduling";
abort_transition(INFINITY, tg_restart, "Change in recurring result",
event);
} else if (transition_graph->id != transition_num) {
desc = "arrived really late";
abort_transition(INFINITY, tg_restart, "Old event", event);
} else {
desc = "arrived late";
abort_transition(INFINITY, tg_restart, "Inactive graph", event);
}
} else {
// Event is result of an action from currently active transition
crm_action_t *action = controld_get_action(action_num);
if (action == NULL) {
// Should never happen
desc = "unknown";
abort_transition(INFINITY, tg_restart, "Unknown event", event);
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
/* Nothing further needs to be done if the action has already been
* confirmed. This can happen e.g. when processing both an
* "xxx_last_0" or "xxx_last_failure_0" record as well as the main
* history record, which would otherwise result in incorrectly
* bumping the fail count twice.
*/
crm_log_xml_debug(event, "Event already confirmed:");
goto bail;
} else {
/* An action result needs to be confirmed.
* (This is the only case where desc == NULL.)
*/
if (pcmk__str_eq(crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore", pcmk__str_casei)) {
ignore_failures = TRUE;
} else if (rc != target_rc) {
crm__set_graph_action_flags(action, pcmk__graph_action_failed);
}
stop_te_timer(action->timer);
te_action_confirmed(action, transition_graph);
if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) {
abort_transition(action->synapse->priority + 1, tg_restart,
"Event failed", event);
}
}
}
if (id == NULL) {
id = "unknown action";
}
uname = crm_element_value(event, XML_LRM_ATTR_TARGET);
if (uname == NULL) {
uname = "unknown node";
}
if (status == PCMK_EXEC_INVALID) {
// We couldn't attempt the action
crm_info("Transition %d action %d (%s on %s): %s",
transition_num, action_num, id, uname,
pcmk_exec_status_str(status));
} else if (desc && update_failcount(event, event_node, rc, target_rc,
(transition_num == -1), FALSE)) {
crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(target_rc),
services_ocf_exitcode_str(rc),
target_rc, rc, callid, desc);
} else if (desc) {
crm_info("Transition %d action %d (%s on %s): %s "
CRM_XS " rc=%d target-rc=%d call-id=%d",
transition_num, action_num, id, uname,
desc, rc, target_rc, callid);
} else if (rc == target_rc) {
crm_info("Transition %d action %d (%s on %s) confirmed: %s "
CRM_XS " rc=%d call-id=%d",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(rc), rc, callid);
} else {
update_failcount(event, event_node, rc, target_rc,
(transition_num == -1), ignore_failures);
crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
CRM_XS " target-rc=%d rc=%d call-id=%d",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(target_rc),
services_ocf_exitcode_str(rc),
target_rc, rc, callid);
}
bail:
free(update_te_uuid);
}
diff --git a/include/pcmki/pcmki_transition.h b/include/pcmki/pcmki_transition.h
index 69680854df..bf6464e435 100644
--- a/include/pcmki/pcmki_transition.h
+++ b/include/pcmki/pcmki_transition.h
@@ -1,175 +1,175 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__PCMKI_PCMKI_TRANSITION__H
# define PCMK__PCMKI_PCMKI_TRANSITION__H
# include <glib.h>
# include <crm/crm.h>
# include <crm/msg_xml.h>
# include <crm/common/xml.h>
#ifdef __cplusplus
extern "C" {
#endif
-typedef enum {
- action_type_pseudo,
- action_type_rsc,
- action_type_crm
-} action_type_e;
+enum pcmk__graph_action_type {
+ pcmk__pseudo_graph_action,
+ pcmk__rsc_graph_action,
+ pcmk__cluster_graph_action,
+};
typedef struct te_timer_s crm_action_timer_t;
typedef struct crm_graph_s crm_graph_t;
enum pcmk__synapse_flags {
pcmk__synapse_ready = (1 << 0),
pcmk__synapse_failed = (1 << 1),
pcmk__synapse_executed = (1 << 2),
pcmk__synapse_confirmed = (1 << 3),
};
typedef struct synapse_s {
int id;
int priority;
uint32_t flags; // Group of pcmk__synapse_flags
GList *actions; /* crm_action_t* */
GList *inputs; /* crm_action_t* */
} synapse_t;
const char *synapse_state_str(synapse_t *synapse);
#define pcmk__set_synapse_flags(synapse, flags_to_set) do { \
(synapse)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Synapse", "synapse", \
(synapse)->flags, (flags_to_set), #flags_to_set); \
} while (0)
#define pcmk__clear_synapse_flags(synapse, flags_to_clear) do { \
(synapse)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Synapse", "synapse", \
(synapse)->flags, (flags_to_clear), #flags_to_clear); \
} while (0)
enum pcmk__graph_action_flags {
pcmk__graph_action_sent_update = (1 << 0), /* sent to the CIB */
pcmk__graph_action_executed = (1 << 1), /* sent to the CRM */
pcmk__graph_action_confirmed = (1 << 2),
pcmk__graph_action_failed = (1 << 3),
pcmk__graph_action_can_fail = (1 << 4), //! \deprecated Will be removed in a future release
};
typedef struct crm_action_s {
int id;
int timeout;
guint interval_ms;
GHashTable *params;
- action_type_e type;
+ enum pcmk__graph_action_type type;
crm_action_timer_t *timer;
synapse_t *synapse;
uint32_t flags; // Group of pcmk__graph_action_flags
xmlNode *xml;
} crm_action_t;
#define crm__set_graph_action_flags(action, flags_to_set) do { \
(action)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Action", "action", \
(action)->flags, (flags_to_set), #flags_to_set); \
} while (0)
#define crm__clear_graph_action_flags(action, flags_to_clear) do { \
(action)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Action", "action", \
(action)->flags, (flags_to_clear), #flags_to_clear); \
} while (0)
struct te_timer_s {
int source_id;
int timeout;
crm_action_t *action;
};
/* order matters here */
enum transition_action {
tg_done,
tg_stop,
tg_restart,
tg_shutdown,
};
struct crm_graph_s {
int id;
char *source;
int abort_priority;
gboolean complete;
const char *abort_reason;
enum transition_action completion_action;
int num_actions;
int num_synapses;
int batch_limit;
guint network_delay;
guint stonith_timeout;
int fired;
int pending;
int skipped;
int completed;
int incomplete;
GList *synapses; /* synapse_t* */
int migration_limit;
};
typedef struct crm_graph_functions_s {
gboolean(*pseudo) (crm_graph_t * graph, crm_action_t * action);
gboolean(*rsc) (crm_graph_t * graph, crm_action_t * action);
gboolean(*crmd) (crm_graph_t * graph, crm_action_t * action);
gboolean(*stonith) (crm_graph_t * graph, crm_action_t * action);
gboolean(*allowed) (crm_graph_t * graph, crm_action_t * action);
} crm_graph_functions_t;
enum transition_status {
transition_active,
transition_pending, /* active but no actions performed this time */
transition_complete,
transition_terminated,
};
void pcmk__set_graph_functions(crm_graph_functions_t *fns);
crm_graph_t *pcmk__unpack_graph(xmlNode *xml_graph, const char *reference);
enum transition_status pcmk__execute_graph(crm_graph_t *graph);
void pcmk__update_graph(crm_graph_t *graph, crm_action_t *action);
void pcmk__free_graph(crm_graph_t *graph);
const char *pcmk__graph_status2text(enum transition_status state);
void pcmk__log_graph(unsigned int log_level, crm_graph_t *graph);
void pcmk__log_graph_action(int log_level, crm_action_t *action);
lrmd_event_data_t *pcmk__event_from_graph_action(xmlNode *resource,
crm_action_t *action,
int status, int rc,
const char *exit_reason);
#ifdef __cplusplus
}
#endif
#endif
diff --git a/lib/pacemaker/pcmk_graph_consumer.c b/lib/pacemaker/pcmk_graph_consumer.c
index 88b75b4a31..a8c1af22bb 100644
--- a/lib/pacemaker/pcmk_graph_consumer.c
+++ b/lib/pacemaker/pcmk_graph_consumer.c
@@ -1,854 +1,854 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/lrmd_internal.h>
#include <pacemaker-internal.h>
/*
* Functions for updating graph
*/
/*!
* \internal
* \brief Update synapse after completed prerequisite
*
* A synapse is ready to be executed once all its prerequisite actions (inputs)
* complete. Given a completed action, check whether it is an input for a given
* synapse, and if so, mark the input as confirmed, and mark the synapse as
* ready if appropriate.
*
* \param[in] synapse Transition graph synapse to update
* \param[in] action_id ID of an action that completed
*
* \note The only substantial effect here is confirming synapse inputs.
* should_fire_synapse() will recalculate pcmk__synapse_ready, so the only
* thing that uses the pcmk__synapse_ready from here is
* synapse_state_str().
*/
static void
update_synapse_ready(synapse_t *synapse, int action_id)
{
if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
return; // All inputs have already been confirmed
}
pcmk__set_synapse_flags(synapse, pcmk__synapse_ready); // Presume ready until proven otherwise
for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
crm_action_t *prereq = (crm_action_t *) lpc->data;
if (prereq->id == action_id) {
crm_trace("Confirming input %d of synapse %d",
action_id, synapse->id);
crm__set_graph_action_flags(prereq, pcmk__graph_action_confirmed);
} else if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) {
pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
crm_trace("Synapse %d still not ready after action %d",
synapse->id, action_id);
}
}
if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
crm_trace("Synapse %d is now ready to execute", synapse->id);
}
}
/*!
* \internal
* \brief Update action and synapse confirmation after action completion
*
* \param[in] synapse Transition graph synapse that action belongs to
* \param[in] action_id ID of action that completed
*/
static void
update_synapse_confirmed(synapse_t *synapse, int action_id)
{
bool all_confirmed = true;
for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
crm_action_t *action = (crm_action_t *) lpc->data;
if (action->id == action_id) {
crm_trace("Confirmed action %d of synapse %d",
action_id, synapse->id);
crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
} else if (all_confirmed && !(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
all_confirmed = false;
crm_trace("Synapse %d still not confirmed after action %d",
synapse->id, action_id);
}
}
if (all_confirmed && !(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) {
crm_trace("Confirmed synapse %d", synapse->id);
pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed);
}
}
/*!
* \internal
* \brief Update the transition graph with a completed action result
*
* \param[in,out] graph Transition graph to update
* \param[in] action Action that completed
*/
void
pcmk__update_graph(crm_graph_t *graph, crm_action_t *action)
{
for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
synapse_t *synapse = (synapse_t *) lpc->data;
if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
continue; // This synapse already completed
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
update_synapse_confirmed(synapse, action->id);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_failed)) || (synapse->priority == INFINITY)) {
update_synapse_ready(synapse, action->id);
}
}
}
/*
* Functions for executing graph
*/
/* A transition graph consists of various types of actions. The library caller
* registers execution functions for each action type, which will be stored
* here.
*/
static crm_graph_functions_t *graph_fns = NULL;
/*!
* \internal
* \brief Set transition graph execution functions
*
* \param[in] Execution functions to use
*/
void
pcmk__set_graph_functions(crm_graph_functions_t *fns)
{
crm_debug("Setting custom functions for executing transition graphs");
graph_fns = fns;
CRM_ASSERT(graph_fns != NULL);
CRM_ASSERT(graph_fns->rsc != NULL);
CRM_ASSERT(graph_fns->crmd != NULL);
CRM_ASSERT(graph_fns->pseudo != NULL);
CRM_ASSERT(graph_fns->stonith != NULL);
}
/*!
* \internal
* \brief Check whether a graph synapse is ready to be executed
*
* \param[in] graph Transition graph that synapse is part of
* \param[in] synapse Synapse to check
*
* \return true if synapse is ready, false otherwise
*/
static bool
should_fire_synapse(crm_graph_t *graph, synapse_t *synapse)
{
GList *lpc = NULL;
pcmk__set_synapse_flags(synapse, pcmk__synapse_ready);
for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
crm_action_t *prereq = (crm_action_t *) lpc->data;
if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) {
crm_trace("Input %d for synapse %d not yet confirmed",
prereq->id, synapse->id);
pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
break;
} else if (pcmk_is_set(prereq->flags, pcmk__graph_action_failed) && !(pcmk_is_set(prereq->flags, pcmk__graph_action_can_fail))) {
crm_trace("Input %d for synapse %d confirmed but failed",
prereq->id, synapse->id);
pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
break;
}
}
if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
crm_trace("Synapse %d is ready to execute", synapse->id);
} else {
return false;
}
for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
crm_action_t *a = (crm_action_t *) lpc->data;
- if (a->type == action_type_pseudo) {
+ if (a->type == pcmk__pseudo_graph_action) {
/* None of the below applies to pseudo ops */
} else if (synapse->priority < graph->abort_priority) {
crm_trace("Skipping synapse %d: priority %d is less than "
"abort priority %d",
synapse->id, synapse->priority, graph->abort_priority);
graph->skipped++;
return false;
} else if (graph_fns->allowed && !(graph_fns->allowed(graph, a))) {
crm_trace("Deferring synapse %d: not allowed", synapse->id);
return false;
}
}
return true;
}
/*!
* \internal
* \brief Initiate an action from a transition graph
*
* \param[in] graph Transition graph containing action
* \param[in] action Action to execute
*
* \return Standard Pacemaker return code
*/
static int
initiate_action(crm_graph_t *graph, crm_action_t *action)
{
const char *id = ID(action->xml);
CRM_CHECK(id != NULL, return EINVAL);
CRM_CHECK(!pcmk_is_set(action->flags, pcmk__graph_action_executed),
return pcmk_rc_already);
crm__set_graph_action_flags(action, pcmk__graph_action_executed);
switch (action->type) {
- case action_type_pseudo:
+ case pcmk__pseudo_graph_action:
crm_trace("Executing pseudo-action %d (%s)", action->id, id);
return graph_fns->pseudo(graph, action)? pcmk_rc_ok : pcmk_rc_error;
- case action_type_rsc:
+ case pcmk__rsc_graph_action:
crm_trace("Executing resource action %d (%s)", action->id, id);
return graph_fns->rsc(graph, action)? pcmk_rc_ok : pcmk_rc_error;
- case action_type_crm:
+ case pcmk__cluster_graph_action:
if (pcmk__str_eq(crm_element_value(action->xml, XML_LRM_ATTR_TASK),
CRM_OP_FENCE, pcmk__str_casei)) {
crm_trace("Executing fencing action %d (%s)",
action->id, id);
return graph_fns->stonith(graph, action)? pcmk_rc_ok : pcmk_rc_error;
}
crm_trace("Executing control action %d (%s)", action->id, id);
return graph_fns->crmd(graph, action)? pcmk_rc_ok : pcmk_rc_error;
default:
crm_err("Unsupported graph action type <%s id='%s'> (bug?)",
crm_element_name(action->xml), id);
return EINVAL;
}
}
/*!
* \internal
* \brief Execute a graph synapse
*
* \param[in] graph Transition graph with synapse to execute
* \param[in] synapse Synapse to execute
*
* \return Standard Pacemaker return value
*/
static int
fire_synapse(crm_graph_t *graph, synapse_t *synapse)
{
pcmk__set_synapse_flags(synapse, pcmk__synapse_executed);
for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
crm_action_t *action = (crm_action_t *) lpc->data;
if (initiate_action(graph, action) != pcmk_rc_ok) {
crm_err("Failed initiating <%s id=%d> in synapse %d",
crm_element_name(action->xml), action->id, synapse->id);
pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed);
crm__set_graph_action_flags(action, pcmk__graph_action_confirmed | pcmk__graph_action_failed);
return pcmk_rc_error;
}
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Dummy graph method that can be used with simulations
*
* \param[in] graph Transition graph containing action
* \param[in] action Action to be initiated
*
* \retval TRUE Action initiation was (simulated to be) successful
* \retval FALSE Action initiation was (simulated to be) failed (due to the
* PE_fail environment variable being set to the action ID)
*/
static gboolean
pseudo_action_dummy(crm_graph_t * graph, crm_action_t * action)
{
static int fail = -1;
if (fail < 0) {
long long fail_ll;
if ((pcmk__scan_ll(getenv("PE_fail"), &fail_ll, 0LL) == pcmk_rc_ok)
&& (fail_ll > 0LL) && (fail_ll <= INT_MAX)) {
fail = (int) fail_ll;
} else {
fail = 0;
}
}
if (action->id == fail) {
crm_err("Dummy event handler: pretending action %d failed", action->id);
crm__set_graph_action_flags(action, pcmk__graph_action_failed);
graph->abort_priority = INFINITY;
} else {
crm_trace("Dummy event handler: action %d initiated", action->id);
}
crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(graph, action);
return TRUE;
}
static crm_graph_functions_t default_fns = {
pseudo_action_dummy,
pseudo_action_dummy,
pseudo_action_dummy,
pseudo_action_dummy
};
/*!
* \internal
* \brief Execute all actions in a transition graph
*
* \param[in] graph Transition graph to execute
*
* \return Status of transition after execution
*/
enum transition_status
pcmk__execute_graph(crm_graph_t *graph)
{
GList *lpc = NULL;
int log_level = LOG_DEBUG;
enum transition_status pass_result = transition_active;
const char *status = "In progress";
if (graph_fns == NULL) {
graph_fns = &default_fns;
}
if (graph == NULL) {
return transition_complete;
}
graph->fired = 0;
graph->pending = 0;
graph->skipped = 0;
graph->completed = 0;
graph->incomplete = 0;
// Count completed and in-flight synapses
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
synapse_t *synapse = (synapse_t *) lpc->data;
if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
graph->completed++;
} else if (!(pcmk_is_set(synapse->flags, pcmk__synapse_failed)) && pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
graph->pending++;
}
}
crm_trace("Executing graph %d (%d synapses already completed, %d pending)",
graph->id, graph->completed, graph->pending);
// Execute any synapses that are ready
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
synapse_t *synapse = (synapse_t *) lpc->data;
if ((graph->batch_limit > 0)
&& (graph->pending >= graph->batch_limit)) {
crm_debug("Throttling graph execution: batch limit (%d) reached",
graph->batch_limit);
break;
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_failed)) {
graph->skipped++;
continue;
} else if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_executed)) {
continue; // Already handled
} else if (should_fire_synapse(graph, synapse)) {
graph->fired++;
if (fire_synapse(graph, synapse) != pcmk_rc_ok) {
crm_err("Synapse %d failed to fire", synapse->id);
log_level = LOG_ERR;
graph->abort_priority = INFINITY;
graph->incomplete++;
graph->fired--;
}
if (!(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) {
graph->pending++;
}
} else {
crm_trace("Synapse %d cannot fire", synapse->id);
graph->incomplete++;
}
}
if ((graph->pending == 0) && (graph->fired == 0)) {
graph->complete = TRUE;
if ((graph->incomplete != 0) && (graph->abort_priority <= 0)) {
log_level = LOG_WARNING;
pass_result = transition_terminated;
status = "Terminated";
} else if (graph->skipped != 0) {
log_level = LOG_NOTICE;
pass_result = transition_complete;
status = "Stopped";
} else {
log_level = LOG_NOTICE;
pass_result = transition_complete;
status = "Complete";
}
} else if (graph->fired == 0) {
pass_result = transition_pending;
}
do_crm_log(log_level,
"Transition %d (Complete=%d, Pending=%d,"
" Fired=%d, Skipped=%d, Incomplete=%d, Source=%s): %s",
graph->id, graph->completed, graph->pending, graph->fired,
graph->skipped, graph->incomplete, graph->source, status);
return pass_result;
}
/*
* Functions for unpacking transition graph XML into structs
*/
/*!
* \internal
* \brief Unpack a transition graph action from XML
*
* \param[in] parent Synapse that action is part of
* \param[in] xml_action Action XML to unparse
*
* \return Newly allocated action on success, or NULL otherwise
*/
static crm_action_t *
unpack_action(synapse_t *parent, xmlNode *xml_action)
{
- action_type_e action_type;
+ enum pcmk__graph_action_type action_type;
crm_action_t *action = NULL;
const char *element = TYPE(xml_action);
const char *value = ID(xml_action);
if (value == NULL) {
crm_err("Ignoring transition graph action without id (bug?)");
crm_log_xml_trace(xml_action, "invalid");
return NULL;
}
if (pcmk__str_eq(element, XML_GRAPH_TAG_RSC_OP, pcmk__str_casei)) {
- action_type = action_type_rsc;
+ action_type = pcmk__rsc_graph_action;
} else if (pcmk__str_eq(element, XML_GRAPH_TAG_PSEUDO_EVENT,
pcmk__str_casei)) {
- action_type = action_type_pseudo;
+ action_type = pcmk__pseudo_graph_action;
} else if (pcmk__str_eq(element, XML_GRAPH_TAG_CRM_EVENT,
pcmk__str_casei)) {
- action_type = action_type_crm;
+ action_type = pcmk__cluster_graph_action;
} else {
crm_err("Ignoring transition graph action of unknown type '%s' (bug?)",
element);
crm_log_xml_trace(xml_action, "invalid");
return NULL;
}
action = calloc(1, sizeof(crm_action_t));
if (action == NULL) {
crm_perror(LOG_CRIT, "Cannot unpack transition graph action");
crm_log_xml_trace(xml_action, "lost");
return NULL;
}
pcmk__scan_min_int(value, &(action->id), -1);
- action->type = action_type_rsc;
+ action->type = pcmk__rsc_graph_action;
action->xml = copy_xml(xml_action);
action->synapse = parent;
action->type = action_type;
action->params = xml2list(action->xml);
value = g_hash_table_lookup(action->params, "CRM_meta_timeout");
pcmk__scan_min_int(value, &(action->timeout), 0);
/* Take start-delay into account for the timeout of the action timer */
value = g_hash_table_lookup(action->params, "CRM_meta_start_delay");
{
int start_delay;
pcmk__scan_min_int(value, &start_delay, 0);
action->timeout += start_delay;
}
if (pcmk__guint_from_hash(action->params,
CRM_META "_" XML_LRM_ATTR_INTERVAL, 0,
&(action->interval_ms)) != pcmk_rc_ok) {
action->interval_ms = 0;
}
value = g_hash_table_lookup(action->params, "CRM_meta_can_fail");
if (value != NULL) {
gboolean can_fail = FALSE;
crm_str_to_boolean(value, &can_fail);
if (can_fail) {
crm__set_graph_action_flags(action, pcmk__graph_action_can_fail);
} else {
crm__clear_graph_action_flags(action, pcmk__graph_action_can_fail);
}
#ifndef PCMK__COMPAT_2_0
if (pcmk_is_set(action->flags, pcmk__graph_action_can_fail)) {
crm_warn("Support for the can_fail meta-attribute is deprecated"
" and will be removed in a future release");
}
#endif
}
crm_trace("Action %d has timer set to %dms", action->id, action->timeout);
return action;
}
/*!
* \internal
* \brief Unpack transition graph synapse from XML
*
* \param[in] new_graph Transition graph that synapse is part of
* \param[in] xml_synapse Synapse XML
*
* \return Newly allocated synapse on success, or NULL otherwise
*/
static synapse_t *
unpack_synapse(crm_graph_t *new_graph, xmlNode *xml_synapse)
{
const char *value = NULL;
xmlNode *action_set = NULL;
synapse_t *new_synapse = NULL;
crm_trace("Unpacking synapse %s", ID(xml_synapse));
new_synapse = calloc(1, sizeof(synapse_t));
if (new_synapse == NULL) {
return NULL;
}
pcmk__scan_min_int(ID(xml_synapse), &(new_synapse->id), 0);
value = crm_element_value(xml_synapse, XML_CIB_ATTR_PRIORITY);
pcmk__scan_min_int(value, &(new_synapse->priority), 0);
CRM_CHECK(new_synapse->id >= 0, free(new_synapse);
return NULL);
new_graph->num_synapses++;
crm_trace("Unpacking synapse %s action sets",
crm_element_value(xml_synapse, XML_ATTR_ID));
for (action_set = first_named_child(xml_synapse, "action_set");
action_set != NULL; action_set = crm_next_same_xml(action_set)) {
for (xmlNode *action = pcmk__xml_first_child(action_set);
action != NULL; action = pcmk__xml_next(action)) {
crm_action_t *new_action = unpack_action(new_synapse, action);
if (new_action == NULL) {
continue;
}
crm_trace("Adding action %d to synapse %d",
new_action->id, new_synapse->id);
new_graph->num_actions++;
new_synapse->actions = g_list_append(new_synapse->actions,
new_action);
}
}
crm_trace("Unpacking synapse %s inputs", ID(xml_synapse));
for (xmlNode *inputs = first_named_child(xml_synapse, "inputs");
inputs != NULL; inputs = crm_next_same_xml(inputs)) {
for (xmlNode *trigger = first_named_child(inputs, "trigger");
trigger != NULL; trigger = crm_next_same_xml(trigger)) {
for (xmlNode *input = pcmk__xml_first_child(trigger);
input != NULL; input = pcmk__xml_next(input)) {
crm_action_t *new_input = unpack_action(new_synapse, input);
if (new_input == NULL) {
continue;
}
crm_trace("Adding input %d to synapse %d",
new_input->id, new_synapse->id);
new_synapse->inputs = g_list_append(new_synapse->inputs,
new_input);
}
}
}
return new_synapse;
}
/*!
* \internal
* \brief Unpack transition graph XML
*
* \param[in] xml_graph Transition graph XML to unpack
* \param[in] reference Where the XML came from (for logging)
*
* \return Newly allocated transition graph on success, NULL otherwise
* \note The caller is responsible for freeing the return value using
* pcmk__free_graph().
* \note The XML is expected to be structured like:
<transition_graph ...>
<synapse id="0">
<action_set>
<rsc_op id="2" ...>
...
</action_set>
<inputs>
<rsc_op id="1" ...
...
</inputs>
</synapse>
...
</transition_graph>
*/
crm_graph_t *
pcmk__unpack_graph(xmlNode *xml_graph, const char *reference)
{
crm_graph_t *new_graph = NULL;
const char *t_id = NULL;
const char *time = NULL;
new_graph = calloc(1, sizeof(crm_graph_t));
if (new_graph == NULL) {
return NULL;
}
new_graph->source = strdup((reference == NULL)? "unknown" : reference);
if (new_graph->source == NULL) {
free(new_graph);
return NULL;
}
new_graph->id = -1;
new_graph->abort_priority = 0;
new_graph->network_delay = 0;
new_graph->stonith_timeout = 0;
new_graph->completion_action = tg_done;
// Parse top-level attributes from <transition_graph>
if (xml_graph != NULL) {
t_id = crm_element_value(xml_graph, "transition_id");
CRM_CHECK(t_id != NULL, free(new_graph);
return NULL);
pcmk__scan_min_int(t_id, &(new_graph->id), -1);
time = crm_element_value(xml_graph, "cluster-delay");
CRM_CHECK(time != NULL, free(new_graph);
return NULL);
new_graph->network_delay = crm_parse_interval_spec(time);
time = crm_element_value(xml_graph, "stonith-timeout");
if (time == NULL) {
new_graph->stonith_timeout = new_graph->network_delay;
} else {
new_graph->stonith_timeout = crm_parse_interval_spec(time);
}
// Use 0 (dynamic limit) as default/invalid, -1 (no limit) as minimum
t_id = crm_element_value(xml_graph, "batch-limit");
if ((t_id == NULL)
|| (pcmk__scan_min_int(t_id, &(new_graph->batch_limit),
-1) != pcmk_rc_ok)) {
new_graph->batch_limit = 0;
}
t_id = crm_element_value(xml_graph, "migration-limit");
pcmk__scan_min_int(t_id, &(new_graph->migration_limit), -1);
}
// Unpack each child <synapse> element
for (xmlNode *synapse_xml = first_named_child(xml_graph, "synapse");
synapse_xml != NULL; synapse_xml = crm_next_same_xml(synapse_xml)) {
synapse_t *new_synapse = unpack_synapse(new_graph, synapse_xml);
if (new_synapse != NULL) {
new_graph->synapses = g_list_append(new_graph->synapses,
new_synapse);
}
}
crm_debug("Unpacked transition %d from %s: %d actions in %d synapses",
new_graph->id, new_graph->source, new_graph->num_actions,
new_graph->num_synapses);
return new_graph;
}
/*
* Functions for freeing transition graph objects
*/
/*!
* \internal
* \brief Free a transition graph action object
*
* \param[in] user_data Action to free
*/
static void
free_graph_action(gpointer user_data)
{
crm_action_t *action = user_data;
if ((action->timer != NULL) && (action->timer->source_id != 0)) {
crm_warn("Cancelling timer for graph action %d", action->id);
g_source_remove(action->timer->source_id);
}
if (action->params != NULL) {
g_hash_table_destroy(action->params);
}
free_xml(action->xml);
free(action->timer);
free(action);
}
/*!
* \internal
* \brief Free a transition graph synapse object
*
* \param[in] user_data Synapse to free
*/
static void
free_graph_synapse(gpointer user_data)
{
synapse_t *synapse = user_data;
g_list_free_full(synapse->actions, free_graph_action);
g_list_free_full(synapse->inputs, free_graph_action);
free(synapse);
}
/*!
* \internal
* \brief Free a transition graph object
*
* \param[in] graph Transition graph to free
*/
void
pcmk__free_graph(crm_graph_t *graph)
{
if (graph != NULL) {
g_list_free_full(graph->synapses, free_graph_synapse);
free(graph->source);
free(graph);
}
}
/*
* Other transition graph utilities
*/
/*!
* \internal
* \brief Synthesize an executor event from a graph action
*
* \param[in] resource If not NULL, use greater call ID than in this XML
* \param[in] action Graph action
* \param[in] status What to use as event execution status
* \param[in] rc What to use as event exit status
* \param[in] exit_reason What to use as event exit reason
*
* \return Newly allocated executor event on success, or NULL otherwise
*/
lrmd_event_data_t *
pcmk__event_from_graph_action(xmlNode *resource, crm_action_t *action,
int status, int rc, const char *exit_reason)
{
lrmd_event_data_t *op = NULL;
GHashTableIter iter;
const char *name = NULL;
const char *value = NULL;
xmlNode *action_resource = NULL;
CRM_CHECK(action != NULL, return NULL);
- CRM_CHECK(action->type == action_type_rsc, return NULL);
+ CRM_CHECK(action->type == pcmk__rsc_graph_action, return NULL);
action_resource = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "invalid");
return NULL);
op = lrmd_new_event(ID(action_resource),
crm_element_value(action->xml, XML_LRM_ATTR_TASK),
action->interval_ms);
lrmd__set_result(op, rc, status, exit_reason);
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
op->params = pcmk__strkey_table(free, free);
g_hash_table_iter_init(&iter, action->params);
while (g_hash_table_iter_next(&iter, (void **)&name, (void **)&value)) {
g_hash_table_insert(op->params, strdup(name), strdup(value));
}
for (xmlNode *xop = pcmk__xml_first_child(resource); xop != NULL;
xop = pcmk__xml_next(xop)) {
int tmp = 0;
crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp);
crm_debug("Got call_id=%d for %s", tmp, ID(resource));
if (tmp > op->call_id) {
op->call_id = tmp;
}
}
op->call_id++;
return op;
}
diff --git a/lib/pacemaker/pcmk_graph_logging.c b/lib/pacemaker/pcmk_graph_logging.c
index 0a3d14f49d..b03b2a3ff7 100644
--- a/lib/pacemaker/pcmk_graph_logging.c
+++ b/lib/pacemaker/pcmk_graph_logging.c
@@ -1,210 +1,210 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <pacemaker-internal.h>
/*!
* \internal
* \brief Return text equivalent of an enum transition_status for logging
*
* \param[in] state Transition status
*
* \return Human-readable text equivalent of \p state
*/
const char *
pcmk__graph_status2text(enum transition_status state)
{
switch (state) {
case transition_active:
return "active";
case transition_pending:
return "pending";
case transition_complete:
return "complete";
case transition_terminated:
return "terminated";
}
return "unknown";
}
static const char *
-actiontype2text(action_type_e type)
+actiontype2text(enum pcmk__graph_action_type type)
{
switch (type) {
- case action_type_pseudo:
+ case pcmk__pseudo_graph_action:
return "pseudo";
- case action_type_rsc:
+ case pcmk__rsc_graph_action:
return "resource";
- case action_type_crm:
+ case pcmk__cluster_graph_action:
return "cluster";
}
return "invalid";
}
/*!
* \internal
* \brief Find a transition graph action by ID
*
* \param[in] graph Transition graph to search
* \param[in] id Action ID to search for
*
* \return Transition graph action corresponding to \p id, or NULL if none
*/
static crm_action_t *
find_graph_action_by_id(crm_graph_t *graph, int id)
{
if (graph == NULL) {
return NULL;
}
for (GList *sIter = graph->synapses; sIter != NULL; sIter = sIter->next) {
synapse_t *synapse = (synapse_t *) sIter->data;
for (GList *aIter = synapse->actions; aIter != NULL;
aIter = aIter->next) {
crm_action_t *action = (crm_action_t *) aIter->data;
if (action->id == id) {
return action;
}
}
}
return NULL;
}
const char *
synapse_state_str(synapse_t *synapse)
{
if (pcmk_is_set(synapse->flags, pcmk__synapse_failed)) {
return "Failed";
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
return "Completed";
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
return "In-flight";
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
return "Ready";
}
return "Pending";
}
// List action IDs of inputs in graph that haven't completed successfully
static char *
synapse_pending_inputs(crm_graph_t *graph, synapse_t *synapse)
{
char *pending = NULL;
size_t pending_len = 0;
for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
crm_action_t *input = (crm_action_t *) lpc->data;
if (pcmk_is_set(input->flags, pcmk__graph_action_failed)) {
pcmk__add_word(&pending, &pending_len, ID(input->xml));
} else if (pcmk_is_set(input->flags, pcmk__graph_action_confirmed)) {
// Confirmed successful inputs are not pending
} else if (find_graph_action_by_id(graph, input->id) != NULL) {
// In-flight or pending
pcmk__add_word(&pending, &pending_len, ID(input->xml));
}
}
if (pending == NULL) {
pending = strdup("none");
}
return pending;
}
// Log synapse inputs that aren't in graph
static void
log_unresolved_inputs(unsigned int log_level, crm_graph_t *graph,
synapse_t *synapse)
{
for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
crm_action_t *input = (crm_action_t *) lpc->data;
const char *key = crm_element_value(input->xml, XML_LRM_ATTR_TASK_KEY);
const char *host = crm_element_value(input->xml, XML_LRM_ATTR_TARGET);
if (find_graph_action_by_id(graph, input->id) == NULL) {
do_crm_log(log_level,
" * [Input %2d]: Unresolved dependency %s op %s%s%s",
input->id, actiontype2text(input->type), key,
(host? " on " : ""), (host? host : ""));
}
}
}
static void
log_synapse_action(unsigned int log_level, synapse_t *synapse,
crm_action_t *action, const char *pending_inputs)
{
const char *key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
const char *host = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
char *desc = crm_strdup_printf("%s %s op %s",
synapse_state_str(synapse),
actiontype2text(action->type), key);
do_crm_log(log_level,
"[Action %4d]: %-50s%s%s (priority: %d, waiting: %s)",
action->id, desc, (host? " on " : ""), (host? host : ""),
synapse->priority, pending_inputs);
free(desc);
}
static void
log_synapse(unsigned int log_level, crm_graph_t *graph, synapse_t *synapse)
{
char *pending = NULL;
if (!pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
pending = synapse_pending_inputs(graph, synapse);
}
for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
log_synapse_action(log_level, synapse, (crm_action_t *) lpc->data,
pending);
}
free(pending);
if (!pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
log_unresolved_inputs(log_level, graph, synapse);
}
}
void
pcmk__log_graph_action(int log_level, crm_action_t *action)
{
log_synapse(log_level, NULL, action->synapse);
}
void
pcmk__log_graph(unsigned int log_level, crm_graph_t *graph)
{
if ((graph == NULL) || (graph->num_actions == 0)) {
if (log_level == LOG_TRACE) {
crm_debug("Empty transition graph");
}
return;
}
do_crm_log(log_level, "Graph %d with %d actions:"
" batch-limit=%d jobs, network-delay=%ums",
graph->id, graph->num_actions,
graph->batch_limit, graph->network_delay);
for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
log_synapse(log_level, graph, (synapse_t *) lpc->data);
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Wed, Jun 4, 5:54 AM (6 h, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1854708
Default Alt Text
(140 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment