Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4624141
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
168 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index f3ab3069c7..26cb51a70f 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -1,1053 +1,1054 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <pacemaker-controld.h>
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
/*
* stonith failure counting
*
* We don't want to get stuck in a permanent fencing loop. Keep track of the
* number of fencing failures for each target node, and the most we'll restart a
* transition for.
*/
struct st_fail_rec {
int count;
};
static bool fence_reaction_panic = FALSE;
static unsigned long int stonith_max_attempts = 10;
static GHashTable *stonith_failures = NULL;
void
update_stonith_max_attempts(const char *value)
{
stonith_max_attempts = char2score(value);
if (stonith_max_attempts < 1UL) {
stonith_max_attempts = 10UL;
}
}
void
set_fence_reaction(const char *reaction_s)
{
if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
fence_reaction_panic = TRUE;
} else {
if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) {
crm_warn("Invalid value '%s' for %s, using 'stop'",
reaction_s, XML_CONFIG_ATTR_FENCE_REACTION);
}
fence_reaction_panic = FALSE;
}
}
static gboolean
too_many_st_failures(const char *target)
{
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *value = NULL;
if (stonith_failures == NULL) {
return FALSE;
}
if (target == NULL) {
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
if (value->count >= stonith_max_attempts) {
target = (const char*)key;
goto too_many;
}
}
} else {
value = g_hash_table_lookup(stonith_failures, target);
if ((value != NULL) && (value->count >= stonith_max_attempts)) {
goto too_many;
}
}
return FALSE;
too_many:
crm_warn("Too many failures (%d) to fence %s, giving up",
value->count, target);
return TRUE;
}
/*!
* \internal
* \brief Reset a stonith fail count
*
* \param[in] target Name of node to reset, or NULL for all
*/
void
st_fail_count_reset(const char *target)
{
if (stonith_failures == NULL) {
return;
}
if (target) {
struct st_fail_rec *rec = NULL;
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count = 0;
}
} else {
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *rec = NULL;
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rec)) {
rec->count = 0;
}
}
}
static void
st_fail_count_increment(const char *target)
{
struct st_fail_rec *rec = NULL;
if (stonith_failures == NULL) {
stonith_failures = pcmk__strkey_table(free, free);
}
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count++;
} else {
rec = malloc(sizeof(struct st_fail_rec));
if(rec == NULL) {
return;
}
rec->count = 1;
g_hash_table_insert(stonith_failures, strdup(target), rec);
}
}
/* end stonith fail count functions */
static void
cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Fencing update %d for %s: failed - %s (%d)",
call_id, (char *)user_data, pcmk_strerror(rc), rc);
crm_log_xml_warn(msg, "Failed update");
abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL);
} else {
crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
}
}
static void
send_stonith_update(pcmk__graph_action_t *action, const char *target,
const char *uuid)
{
int rc = pcmk_ok;
crm_node_t *peer = NULL;
/* We (usually) rely on the membership layer to do node_update_cluster,
* and the peer status callback to do node_update_peer, because the node
* might have already rejoined before we get the stonith result here.
*/
int flags = node_update_join | node_update_expected;
/* zero out the node-status & remove all LRM status info */
xmlNode *node_state = NULL;
CRM_CHECK(target != NULL, return);
CRM_CHECK(uuid != NULL, return);
/* Make sure the membership and join caches are accurate */
peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);
CRM_CHECK(peer != NULL, return);
if (peer->state == NULL) {
/* Usually, we rely on the membership layer to update the cluster state
* in the CIB. However, if the node has never been seen, do it here, so
* the node is not considered unclean.
*/
flags |= node_update_cluster;
}
if (peer->uuid == NULL) {
crm_info("Recording uuid '%s' for node '%s'", uuid, target);
peer->uuid = strdup(uuid);
}
crmd_peer_down(peer, TRUE);
/* Generate a node state update for the CIB */
node_state = create_node_state_update(peer, flags, NULL, __func__);
/* we have to mark whether or not remote nodes have already been fenced */
if (peer->flags & crm_remote_node) {
char *now_s = pcmk__ttoa(time(NULL));
crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
free(now_s);
}
/* Force our known ID */
crm_xml_add(node_state, XML_ATTR_UUID, uuid);
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
cib_quorum_override | cib_scope_local | cib_can_create);
/* Delay processing the trigger until the update completes */
crm_debug("Sending fencing update %d for %s", rc, target);
fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);
/* Make sure it sticks */
/* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */
controld_delete_node_state(peer->uname, controld_section_all,
cib_scope_local);
free_xml(node_state);
return;
}
/*!
* \internal
* \brief Abort transition due to stonith failure
*
* \param[in] abort_action Whether to restart or stop transition
* \param[in] target Don't restart if this (NULL for any) has too many failures
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
*/
static void
abort_for_stonith_failure(enum transition_action abort_action,
const char *target, xmlNode *reason)
{
/* If stonith repeatedly fails, we eventually give up on starting a new
* transition for that reason.
*/
if ((abort_action != tg_stop) && too_many_st_failures(target)) {
abort_action = tg_stop;
}
abort_transition(INFINITY, abort_action, "Stonith failed", reason);
}
/*
* stonith cleanup list
*
* If the DC is shot, proper notifications might not go out.
* The stonith cleanup list allows the cluster to (re-)send
* notifications once a new DC is elected.
*/
static GList *stonith_cleanup_list = NULL;
/*!
* \internal
* \brief Add a node to the stonith cleanup list
*
* \param[in] target Name of node to add
*/
void
add_stonith_cleanup(const char *target) {
stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
}
/*!
* \internal
* \brief Remove a node from the stonith cleanup list
*
* \param[in] Name of node to remove
*/
void
remove_stonith_cleanup(const char *target)
{
GList *iter = stonith_cleanup_list;
while (iter != NULL) {
GList *tmp = iter;
char *iter_name = tmp->data;
iter = iter->next;
if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
crm_trace("Removing %s from the cleanup list", iter_name);
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
free(iter_name);
}
}
}
/*!
* \internal
* \brief Purge all entries from the stonith cleanup list
*/
void
purge_stonith_cleanup()
{
if (stonith_cleanup_list) {
GList *iter = NULL;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
}
/*!
* \internal
* \brief Send stonith updates for all entries in cleanup list, then purge it
*/
void
execute_stonith_cleanup()
{
GList *iter;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_node_t *target_node = crm_get_peer(0, target);
const char *uuid = crm_peer_uuid(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
send_stonith_update(NULL, target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
/* end stonith cleanup list functions */
/* stonith API client
*
* Functions that need to interact directly with the fencer via its API
*/
static stonith_t *stonith_api = NULL;
static crm_trigger_t *stonith_reconnect = NULL;
static char *te_client_id = NULL;
static gboolean
fail_incompletable_stonith(pcmk__graph_t *graph)
{
GList *lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GList *lpc2 = NULL;
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
if ((action->type != pcmk__cluster_graph_action)
|| pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (task && pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
- crm__set_graph_action_flags(action, pcmk__graph_action_failed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
last_action = action->xml;
pcmk__update_graph(graph, action);
crm_notice("Failing action %d (%s): fencer terminated",
action->id, ID(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("Fencer failure resulted in unrunnable actions");
abort_for_stonith_failure(tg_restart, NULL, last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
{
te_cleanup_stonith_history_sync(st, FALSE);
if (pcmk_is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_crit("Fencing daemon connection failed");
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencing daemon disconnected");
}
if (stonith_api) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(st);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (AM_I_DC) {
fail_incompletable_stonith(transition_graph);
trigger_graph();
}
}
/*!
* \internal
* \brief Handle an event notification from the fencing API
*
* \param[in] st Fencing API connection
* \param[in] event Fencing API event notification
*/
static void
handle_fence_notification(stonith_t *st, stonith_event_t *event)
{
bool succeeded = true;
const char *executioner = "the cluster";
const char *client = "a client";
const char *reason = NULL;
int exec_status;
if (te_client_id == NULL) {
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
}
if (event == NULL) {
crm_err("Notify data not found");
return;
}
if (event->executioner != NULL) {
executioner = event->executioner;
}
if (event->client_origin != NULL) {
client = event->client_origin;
}
exec_status = stonith__event_execution_status(event);
if ((stonith__event_exit_status(event) != CRM_EX_OK)
|| (exec_status != PCMK_EXEC_DONE)) {
succeeded = false;
if (exec_status == PCMK_EXEC_DONE) {
exec_status = PCMK_EXEC_ERROR;
}
}
reason = stonith__event_exit_reason(event);
crmd_alert_fencing_op(event);
if (pcmk__str_eq("on", event->action, pcmk__str_none)) {
// Unfencing doesn't need special handling, just a log message
if (succeeded) {
crm_notice("%s was unfenced by %s at the request of %s@%s",
event->target, executioner, client, event->origin);
} else {
crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
event->target, executioner,
pcmk_exec_status_str(exec_status),
((reason == NULL)? "" : ": "),
((reason == NULL)? "" : reason),
stonith__event_exit_status(event));
}
return;
}
if (succeeded
&& pcmk__str_eq(event->target, fsa_our_uname, pcmk__str_casei)) {
/* We were notified of our own fencing. Most likely, either fencing was
* misconfigured, or fabric fencing that doesn't cut cluster
* communication is in use.
*
* Either way, shutting down the local host is a good idea, to require
* administrator intervention. Also, other nodes would otherwise likely
* set our status to lost because of the fencing callback and discard
* our subsequent election votes as "not part of our cluster".
*/
crm_crit("We were allegedly just fenced by %s for %s!",
executioner, event->origin); // Dumps blackbox if enabled
if (fence_reaction_panic) {
pcmk__panic(__func__);
} else {
crm_exit(CRM_EX_FATAL);
}
return; // Should never get here
}
/* Update the count of fencing failures for this target, in case we become
* DC later. The current DC has already updated its fail count in
* tengine_stonith_callback().
*/
if (!AM_I_DC) {
if (succeeded) {
st_fail_count_reset(event->target);
} else {
st_fail_count_increment(event->target);
}
}
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
"%s%s%s%s " CRM_XS " event=%s",
event->target, (succeeded? "" : " not"),
event->action, executioner, client, event->origin,
(succeeded? "OK" : pcmk_exec_status_str(exec_status)),
((reason == NULL)? "" : " ("),
((reason == NULL)? "" : reason),
((reason == NULL)? "" : ")"),
event->id);
if (succeeded) {
crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
CRM_GET_PEER_ANY);
const char *uuid = NULL;
if (peer == NULL) {
return;
}
uuid = crm_peer_uuid(peer);
if (AM_I_DC) {
/* The DC always sends updates */
send_stonith_update(NULL, event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
* Unfortunately, the controller doesn't have a simple, reliable way
* to map hosts to guests. It might be possible to track this in the
* peer cache via crm_remote_peer_cache_refresh(). For now, we rely
* on the scheduler creating fence pseudo-events for the guests.
*/
if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
/* Abort the current transition if it wasn't the cluster that
* initiated fencing.
*/
crm_info("External fencing operation from %s fenced %s",
client, event->target);
abort_transition(INFINITY, tg_restart,
"External Fencing Operation", NULL);
}
/* Assume it was our leader if we don't currently have one */
} else if (pcmk__str_eq(fsa_our_dc, event->target,
pcmk__str_null_matches|pcmk__str_casei)
&& !pcmk_is_set(peer->flags, crm_remote_node)) {
crm_notice("Fencing target %s %s our leader",
event->target, (fsa_our_dc? "was" : "may have been"));
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (pcmk__str_eq(event->executioner, fsa_our_uname,
pcmk__str_casei)) {
send_stonith_update(NULL, event->target, uuid);
}
add_stonith_cleanup(event->target);
}
/* If the target is a remote node, and we host its connection,
* immediately fail all monitors so it can be recovered quickly.
* The connection won't necessarily drop when a remote node is fenced,
* so the failure might not otherwise be detected until the next poke.
*/
if (pcmk_is_set(peer->flags, crm_remote_node)) {
remote_ra_fail(event->target);
}
crmd_peer_down(peer, TRUE);
}
}
/*!
* \brief Connect to fencer
*
* \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
*
* \return TRUE
* \note If user_data is NULL, this will wait 2s between attempts, for up to
* 30 attempts, meaning the controller could be blocked as long as 58s.
*/
static gboolean
te_connect_stonith(gpointer user_data)
{
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
return TRUE;
}
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Already connected to fencer, no need to retry");
return TRUE;
}
if (user_data == NULL) {
// Blocking (retry failures now until successful)
rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
if (rc != pcmk_ok) {
crm_err("Could not connect to fencer in 30 attempts: %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
} else {
// Non-blocking (retry failures later in main loop)
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (rc != pcmk_ok) {
if (pcmk_is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_notice("Fencer connection failed (will retry): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencer connection failed (ignoring because no longer required): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
return TRUE;
}
}
if (rc == pcmk_ok) {
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_DISCONNECT,
tengine_stonith_connection_destroy);
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_FENCE,
handle_fence_notification);
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_HISTORY_SYNCED,
tengine_stonith_history_synced);
te_trigger_stonith_history_sync(TRUE);
crm_notice("Fencer successfully connected");
}
return TRUE;
}
/*!
\internal
\brief Schedule fencer connection attempt in main loop
*/
void
controld_trigger_fencer_connect()
{
if (stonith_reconnect == NULL) {
stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
te_connect_stonith,
GINT_TO_POINTER(TRUE));
}
controld_set_fsa_input_flags(R_ST_REQUIRED);
mainloop_set_trigger(stonith_reconnect);
}
void
controld_disconnect_fencer(bool destroy)
{
if (stonith_api) {
// Prevent fencer connection from coming up again
controld_clear_fsa_input_flags(R_ST_REQUIRED);
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(stonith_api);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (destroy) {
if (stonith_api) {
stonith_api->cmds->free(stonith_api);
stonith_api = NULL;
}
if (stonith_reconnect) {
mainloop_destroy_trigger(stonith_reconnect);
stonith_reconnect = NULL;
}
if (te_client_id) {
free(te_client_id);
te_client_id = NULL;
}
}
}
static gboolean
do_stonith_history_sync(gpointer user_data)
{
if (stonith_api && (stonith_api->state != stonith_disconnected)) {
stonith_history_t *history = NULL;
te_cleanup_stonith_history_sync(stonith_api, FALSE);
stonith_api->cmds->history(stonith_api,
st_opt_sync_call | st_opt_broadcast,
NULL, &history, 5);
stonith_history_free(history);
return TRUE;
} else {
crm_info("Skip triggering stonith history-sync as stonith is disconnected");
return FALSE;
}
}
static void
tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
{
char *uuid = NULL;
int stonith_id = -1;
int transition_id = -1;
pcmk__graph_action_t *action = NULL;
const char *target = NULL;
if ((data == NULL) || (data->userdata == NULL)) {
crm_err("Ignoring fence operation %d result: "
"No transition key given (bug?)",
((data == NULL)? -1 : data->call_id));
return;
}
if (!AM_I_DC) {
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
reason = pcmk_exec_status_str(stonith__execution_status(data));
}
crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
data->call_id, stonith__exit_status(data), reason,
(const char *) data->userdata);
return;
}
CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
&stonith_id, NULL),
goto bail);
if (transition_graph->complete || (stonith_id < 0)
|| !pcmk__str_eq(uuid, te_uuid, pcmk__str_none)
|| (transition_graph->id != transition_id)) {
crm_info("Ignoring fence operation %d result: "
"Not from current transition " CRM_XS
" complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
data->call_id, pcmk__btoa(transition_graph->complete),
stonith_id, uuid, te_uuid, transition_id, transition_graph->id);
goto bail;
}
action = controld_get_action(stonith_id);
if (action == NULL) {
crm_err("Ignoring fence operation %d result: "
"Action %d not found in transition graph (bug?) "
CRM_XS " uuid=%s transition=%d",
data->call_id, stonith_id, uuid, transition_id);
goto bail;
}
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if (target == NULL) {
crm_err("Ignoring fence operation %d result: No target given (bug?)",
data->call_id);
goto bail;
}
stop_te_timer(action);
if (stonith__exit_status(data) == CRM_EX_OK) {
const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
const char *op = crm_meta_value(action->params, "stonith_action");
crm_info("Fence operation %d for %s succeeded", data->call_id, target);
if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
te_action_confirmed(action, NULL);
if (pcmk__str_eq("on", op, pcmk__str_casei)) {
const char *value = NULL;
char *now = pcmk__ttoa(time(NULL));
gboolean is_remote_node = FALSE;
/* This check is not 100% reliable, since this node is not
* guaranteed to have the remote node cached. However, it
* doesn't have to be reliable, since the attribute manager can
* learn a node's "remoteness" by other means sooner or later.
* This allows it to learn more quickly if this node does have
* the information.
*/
if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
is_remote_node = TRUE;
}
update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
is_remote_node);
free(now);
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
is_remote_node);
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
is_remote_node);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
send_stonith_update(action, target, uuid);
- crm__set_graph_action_flags(action, pcmk__graph_action_sent_update);
+ pcmk__set_graph_action_flags(action,
+ pcmk__graph_action_sent_update);
}
}
st_fail_count_reset(target);
} else {
enum transition_action abort_action = tg_restart;
int status = stonith__execution_status(data);
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
if (status == PCMK_EXEC_DONE) {
reason = "Agent returned error";
} else {
reason = pcmk_exec_status_str(status);
}
}
- crm__set_graph_action_flags(action, pcmk__graph_action_failed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
/* If no fence devices were available, there's no use in immediately
* checking again, so don't start a new transition in that case.
*/
if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
crm_warn("Fence operation %d for %s failed: %s "
"(aborting transition and giving up for now)",
data->call_id, target, reason);
abort_action = tg_stop;
} else {
crm_notice("Fence operation %d for %s failed: %s "
"(aborting transition)", data->call_id, target, reason);
}
/* Increment the fail count now, so abort_for_stonith_failure() can
* check it. Non-DC nodes will increment it in
* handle_fence_notification().
*/
st_fail_count_increment(target);
abort_for_stonith_failure(abort_action, target, NULL);
}
pcmk__update_graph(transition_graph, action);
trigger_graph();
bail:
free(data->userdata);
free(uuid);
return;
}
static int
fence_with_delay(const char *target, const char *type, const char *delay)
{
uint32_t options = st_opt_none; // Group of enum stonith_call_options
int timeout_sec = (int) (transition_graph->stonith_timeout / 1000);
int delay_i;
if (crmd_join_phase_count(crm_join_confirmed) == 1) {
stonith__set_call_options(options, target, st_opt_allow_suicide);
}
pcmk__scan_min_int(delay, &delay_i, 0);
return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
type, timeout_sec, 0, delay_i);
}
gboolean
te_fence_node(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
int rc = 0;
const char *id = NULL;
const char *uuid = NULL;
const char *target = NULL;
const char *type = NULL;
char *transition_key = NULL;
const char *priority_delay = NULL;
gboolean invalid_action = FALSE;
id = ID(action->xml);
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
type = crm_meta_value(action->params, "stonith_action");
CRM_CHECK(id != NULL, invalid_action = TRUE);
CRM_CHECK(uuid != NULL, invalid_action = TRUE);
CRM_CHECK(type != NULL, invalid_action = TRUE);
CRM_CHECK(target != NULL, invalid_action = TRUE);
if (invalid_action) {
crm_log_xml_warn(action->xml, "BadAction");
return FALSE;
}
priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
crm_notice("Requesting fencing (%s) of node %s "
CRM_XS " action=%s timeout=%u%s%s",
type, target, id, transition_graph->stonith_timeout,
priority_delay ? " priority_delay=" : "",
priority_delay ? priority_delay : "");
/* Passing NULL means block until we can connect... */
te_connect_stonith(NULL);
rc = fence_with_delay(target, type, priority_delay);
transition_key = pcmk__transition_key(transition_graph->id, action->id, 0,
te_uuid),
stonith_api->cmds->register_callback(stonith_api, rc,
(int) (transition_graph->stonith_timeout / 1000),
st_opt_timeout_updates, transition_key,
"tengine_stonith_callback", tengine_stonith_callback);
return TRUE;
}
bool
controld_verify_stonith_watchdog_timeout(const char *value)
{
gboolean rv = TRUE;
if (stonith_api && (stonith_api->state != stonith_disconnected) &&
stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
fsa_our_uname)) {
rv = pcmk__valid_sbd_timeout(value);
}
return rv;
}
/* end stonith API client functions */
/*
* stonith history synchronization
*
* Each node's fencer keeps track of a cluster-wide fencing history. When a node
* joins or leaves, we need to synchronize the history across all nodes.
*/
static crm_trigger_t *stonith_history_sync_trigger = NULL;
static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
void
te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
{
if (free_timers) {
mainloop_timer_del(stonith_history_sync_timer_short);
stonith_history_sync_timer_short = NULL;
mainloop_timer_del(stonith_history_sync_timer_long);
stonith_history_sync_timer_long = NULL;
} else {
mainloop_timer_stop(stonith_history_sync_timer_short);
mainloop_timer_stop(stonith_history_sync_timer_long);
}
if (st) {
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
}
}
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
{
te_cleanup_stonith_history_sync(st, FALSE);
crm_debug("Fence-history synced - cancel all timers");
}
static gboolean
stonith_history_sync_set_trigger(gpointer user_data)
{
mainloop_set_trigger(stonith_history_sync_trigger);
return FALSE;
}
void
te_trigger_stonith_history_sync(bool long_timeout)
{
/* trigger a sync in 5s to give more nodes the
* chance to show up so that we don't create
* unnecessary stonith-history-sync traffic
*
* the long timeout of 30s is there as a fallback
* so that after a successful connection to fenced
* we will wait for 30s for the DC to trigger a
* history-sync
* if this doesn't happen we trigger a sync locally
* (e.g. fenced segfaults and is restarted by pacemakerd)
*/
/* as we are finally checking the stonith-connection
* in do_stonith_history_sync we should be fine
* leaving stonith_history_sync_time & stonith_history_sync_trigger
* around
*/
if (stonith_history_sync_trigger == NULL) {
stonith_history_sync_trigger =
mainloop_add_trigger(G_PRIORITY_LOW,
do_stonith_history_sync, NULL);
}
if (long_timeout) {
if(stonith_history_sync_timer_long == NULL) {
stonith_history_sync_timer_long =
mainloop_timer_add("history_sync_long", 30000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
mainloop_timer_start(stonith_history_sync_timer_long);
} else {
if(stonith_history_sync_timer_short == NULL) {
stonith_history_sync_timer_short =
mainloop_timer_add("history_sync_short", 5000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
mainloop_timer_start(stonith_history_sync_timer_short);
}
}
/* end stonith history synchronization functions */
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
index f35cd2fcb3..17e4c4b7bf 100644
--- a/daemons/controld/controld_te_actions.c
+++ b/daemons/controld/controld_te_actions.c
@@ -1,686 +1,687 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event()
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-internal.h>
#include <pacemaker-controld.h>
char *te_uuid = NULL;
GHashTable *te_targets = NULL;
void send_rsc_command(pcmk__graph_action_t *action);
static void te_update_job_count(pcmk__graph_action_t *action, int offset);
static void
te_start_action_timer(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
action->timer = g_timeout_add(action->timeout + graph->network_delay,
action_timer_callback, (void *) action);
CRM_ASSERT(action->timer != 0);
}
static gboolean
te_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo)
{
const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
/* send to peers as well? */
if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) {
GHashTableIter iter;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *cmd = NULL;
if (pcmk__str_eq(fsa_our_uname, node->uname, pcmk__str_casei)) {
continue;
}
cmd = create_request(task, pseudo->xml, node->uname,
CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
free_xml(cmd);
}
remote_ra_process_maintenance_nodes(pseudo->xml);
} else {
/* Check action for Pacemaker Remote node side effects */
remote_ra_process_pseudo(pseudo->xml);
}
crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
te_action_confirmed(pseudo, graph);
return TRUE;
}
static int
get_target_rc(pcmk__graph_action_t *action)
{
int exit_status;
pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC),
&exit_status, 0);
return exit_status;
}
static gboolean
te_crm_command(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
char *counter = NULL;
xmlNode *cmd = NULL;
gboolean is_local = FALSE;
const char *id = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
id = ID(action->xml);
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) {
router_node = fsa_our_uname;
}
}
}
if (pcmk__str_empty(on_node)) {
crm_err("Corrupted command (id=%s) %s: no node",
pcmk__s(id, "<null>"), pcmk__s(task, "without task"));
return FALSE;
}
if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_info("Executing crm-event (%s)%s%s: %s on %s",
pcmk__s(id, "<null>"), (is_local? " locally" : ""),
(no_wait? " without waiting" : ""),
pcmk__s(task, "unspecified task"), on_node);
if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
/* defer until everything else completes */
crm_info("crm-event (%s) is a local shutdown", pcmk__s(id, "<null>"));
graph->completion_action = tg_shutdown;
graph->abort_reason = "local shutdown";
te_action_confirmed(action, graph);
return TRUE;
} else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
crm_node_t *peer = crm_get_peer(0, router_node);
pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
}
cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
counter = pcmk__transition_key(transition_graph->id, action->id,
get_target_rc(action), te_uuid);
crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
free(counter);
free_xml(cmd);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return FALSE;
} else if (no_wait) {
te_action_confirmed(action, graph);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_start_action_timer(graph, action);
}
return TRUE;
}
/*!
* \internal
* \brief Synthesize an executor event for a resource action timeout
*
* \param[in] action Resource action that timed out
* \param[in] target_rc Expected result of action that timed out
*
* Synthesize an executor event for a resource action timeout. (If the executor
* gets a timeout while waiting for a resource action to complete, that will be
* reported via the usual callback. This timeout means we didn't hear from the
* executor itself or the controller that relayed the action to the executor.)
*
* \return Newly created executor event for result of \p action
* \note The caller is responsible for freeing the return value using
* lrmd_free_event().
*/
static lrmd_event_data_t *
synthesize_timeout_event(pcmk__graph_action_t *action, int target_rc)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *reason = NULL;
char *dynamic_reason = NULL;
if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
reason = "Local executor did not return result in time";
} else {
const char *router_node = NULL;
router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (router_node == NULL) {
router_node = target;
}
dynamic_reason = crm_strdup_printf("Controller on %s did not return "
"result in time", router_node);
reason = dynamic_reason;
}
op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
PCMK_OCF_UNKNOWN_ERROR, reason);
op->call_id = -1;
op->user_data = pcmk__transition_key(transition_graph->id, action->id,
target_rc, te_uuid);
free(dynamic_reason);
return op;
}
static void
controld_record_action_event(pcmk__graph_action_t *action,
lrmd_event_data_t *op)
{
xmlNode *state = NULL;
xmlNode *rsc = NULL;
xmlNode *action_rsc = NULL;
int rc = pcmk_ok;
const char *rsc_id = NULL;
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
int call_options = cib_quorum_override | cib_scope_local;
int target_rc = get_target_rc(action);
action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
if (action_rsc == NULL) {
return;
}
rsc_id = ID(action_rsc);
CRM_CHECK(rsc_id != NULL,
crm_log_xml_err(action->xml, "Bad:action"); return);
/*
update the CIB
<node_state id="hadev">
<lrm>
<lrm_resources>
<lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
*/
state = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(state, XML_ATTR_UUID, target_uuid);
crm_xml_add(state, XML_ATTR_UNAME, target);
rsc = create_xml_node(state, XML_CIB_TAG_LRM);
crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE);
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
__func__);
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
free_xml(state);
crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
rc, action->id, task_uuid, target);
- crm__set_graph_action_flags(action, pcmk__graph_action_sent_update);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update);
}
void
controld_record_action_timeout(pcmk__graph_action_t *action)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
int target_rc = get_target_rc(action);
crm_warn("%s %d: %s on %s timed out",
crm_element_name(action->xml), action->id, task_uuid, target);
op = synthesize_timeout_event(action, target_rc);
controld_record_action_event(action, op);
lrmd_free_event(op);
}
static gboolean
te_rsc_command(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
/* never overwrite stop actions in the CIB with
* anything other than completed results
*
* Writing pending stops makes it look like the
* resource is running again
*/
xmlNode *cmd = NULL;
xmlNode *rsc_op = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
gboolean is_local = FALSE;
char *counter = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
const char *task_uuid = NULL;
CRM_ASSERT(action != NULL);
CRM_ASSERT(action->xml != NULL);
- crm__clear_graph_action_flags(action, pcmk__graph_action_executed);
+ pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if (pcmk__str_empty(on_node)) {
crm_err("Corrupted command(id=%s) %s: no node",
ID(action->xml), pcmk__s(task, "without task"));
return FALSE;
}
rsc_op = action->xml;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
}
counter = pcmk__transition_key(transition_graph->id, action->id,
get_target_rc(action), te_uuid);
crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
task, task_uuid, (is_local? " locally" : ""), on_node,
(no_wait? " without waiting" : ""), action->id);
cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
if (is_local) {
/* shortcut local resource commands */
ha_msg_input_t data = {
.msg = cmd,
.xml = rsc_op,
};
fsa_data_t msg = {
.id = 0,
.data = &data,
.data_type = fsa_dt_ha_msg,
.fsa_input = I_NULL,
.fsa_cause = C_FSA_INTERNAL,
.actions = A_LRM_INVOKE,
.origin = __func__,
};
do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);
} else {
rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
}
free(counter);
free_xml(cmd);
- crm__set_graph_action_flags(action, pcmk__graph_action_executed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return FALSE;
} else if (no_wait) {
+ /* Just mark confirmed. Don't bump the job count only to immediately
+ * decrement it.
+ */
crm_info("Action %d confirmed - no wait", action->id);
- crm__set_graph_action_flags(action, pcmk__graph_action_confirmed); /* Just mark confirmed.
- * Don't bump the job count only to immediately decrement it
- */
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(transition_graph, action);
trigger_graph();
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
action->id, task, task_uuid, on_node, action->timeout);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_update_job_count(action, 1);
te_start_action_timer(graph, action);
}
return TRUE;
}
struct te_peer_s
{
char *name;
int jobs;
int migrate_jobs;
};
static void te_peer_free(gpointer p)
{
struct te_peer_s *peer = p;
free(peer->name);
free(peer);
}
void te_reset_job_counts(void)
{
GHashTableIter iter;
struct te_peer_s *peer = NULL;
if(te_targets == NULL) {
te_targets = pcmk__strkey_table(NULL, te_peer_free);
}
g_hash_table_iter_init(&iter, te_targets);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
peer->jobs = 0;
peer->migrate_jobs = 0;
}
}
static void
te_update_job_count_on(const char *target, int offset, bool migrate)
{
struct te_peer_s *r = NULL;
if(target == NULL || te_targets == NULL) {
return;
}
r = g_hash_table_lookup(te_targets, target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
r->jobs += offset;
if(migrate) {
r->migrate_jobs += offset;
}
crm_trace("jobs[%s] = %d", target, r->jobs);
}
static void
te_update_job_count(pcmk__graph_action_t *action, int offset)
{
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
/* No limit on these */
return;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
CRMD_ACTION_MIGRATED, NULL)) {
const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
te_update_job_count_on(t1, offset, TRUE);
te_update_job_count_on(t2, offset, TRUE);
return;
} else if (target == NULL) {
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
}
te_update_job_count_on(target, offset, FALSE);
}
static gboolean
te_should_perform_action_on(pcmk__graph_t *graph, pcmk__graph_action_t *action,
const char *target)
{
int limit = 0;
struct te_peer_s *r = NULL;
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
if(target == NULL) {
/* No limit on these */
return TRUE;
} else if(te_targets == NULL) {
return FALSE;
}
r = g_hash_table_lookup(te_targets, target);
limit = throttle_get_job_limit(target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
if(limit <= r->jobs) {
crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
target, limit, r->jobs, id);
return FALSE;
} else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
target, graph->migration_limit, r->migrate_jobs, id);
return FALSE;
}
}
crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
return TRUE;
}
static gboolean
te_should_perform_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *target = NULL;
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (action->type != pcmk__rsc_graph_action) {
/* No limit on these */
return TRUE;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
CRMD_ACTION_MIGRATED, NULL)) {
target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
if(te_should_perform_action_on(graph, action, target) == FALSE) {
return FALSE;
}
target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
} else if (target == NULL) {
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
}
return te_should_perform_action_on(graph, action, target);
}
/*!
* \brief Confirm a graph action (and optionally update graph)
*
* \param[in] action Action to confirm
* \param[in] graph Update and trigger this graph (if non-NULL)
*/
void
te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph)
{
if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
if ((action->type == pcmk__rsc_graph_action)
&& (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) {
te_update_job_count(action, -1);
}
- crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
}
if (graph) {
pcmk__update_graph(graph, action);
trigger_graph();
}
}
pcmk__graph_functions_t te_graph_fns = {
te_pseudo_action,
te_rsc_command,
te_crm_command,
te_fence_node,
te_should_perform_action,
};
void
notify_crmd(pcmk__graph_t *graph)
{
const char *type = "unknown";
enum crmd_fsa_input event = I_NULL;
crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state));
CRM_CHECK(graph->complete, graph->complete = TRUE);
switch (graph->completion_action) {
case tg_stop:
type = "stop";
if (fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case tg_done:
type = "done";
if (fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case tg_restart:
type = "restart";
if (fsa_state == S_TRANSITION_ENGINE) {
if (transition_timer->period_ms > 0) {
controld_stop_timer(transition_timer);
controld_start_timer(transition_timer);
} else {
event = I_PE_CALC;
}
} else if (fsa_state == S_POLICY_ENGINE) {
controld_set_fsa_action_flags(A_PE_INVOKE);
trigger_fsa();
}
break;
case tg_shutdown:
type = "shutdown";
if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
event = I_STOP;
} else {
crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
event = I_TERMINATE;
}
}
crm_debug("Transition %d status: %s - %s", graph->id, type,
pcmk__s(graph->abort_reason, "unspecified reason"));
graph->abort_reason = NULL;
graph->completion_action = tg_done;
controld_clear_fsa_input_flags(R_IN_TRANSITION);
if (event != I_NULL) {
register_fsa_input(C_FSA_INTERNAL, event, NULL);
} else if (fsa_source) {
mainloop_set_trigger(fsa_source);
}
}
diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c
index 99920c90a1..9f38c57301 100644
--- a/daemons/controld/controld_te_callbacks.c
+++ b/daemons/controld/controld_te_callbacks.c
@@ -1,674 +1,674 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/msg_xml.h>
#include <crm/cluster.h> /* For ONLINESTATUS etc */
#include <pacemaker-controld.h>
void te_update_confirm(const char *event, xmlNode * msg);
extern char *te_uuid;
gboolean shuttingdown = FALSE;
pcmk__graph_t *transition_graph;
crm_trigger_t *transition_trigger = NULL;
/* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */
#define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']"
// An explicit shutdown-lock of 0 means the lock has been cleared
static bool
shutdown_lock_cleared(xmlNode *lrm_resource)
{
time_t shutdown_lock = 0;
return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
&shutdown_lock) == pcmk_ok)
&& (shutdown_lock == 0);
}
static void
te_update_diff_v1(const char *event, xmlNode *diff)
{
int lpc, max;
xmlXPathObject *xpathObj = NULL;
CRM_CHECK(diff != NULL, return);
xml_log_patchset(LOG_TRACE, __func__, diff);
if (cib_config_changed(NULL, NULL, &diff)) {
abort_transition(INFINITY, tg_restart, "Non-status change", diff);
goto bail; /* configuration changed */
}
/* Tickets Attributes - Added/Updated */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS);
if (numXpathResults(xpathObj) > 0) {
xmlNode *aborted = getXpathResult(xpathObj, 0);
abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted);
goto bail;
}
freeXpathObject(xpathObj);
/* Tickets Attributes - Removed */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS);
if (numXpathResults(xpathObj) > 0) {
xmlNode *aborted = getXpathResult(xpathObj, 0);
abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted);
goto bail;
}
freeXpathObject(xpathObj);
/* Transient Attributes - Removed */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//"
XML_TAG_TRANSIENT_NODEATTRS);
if (numXpathResults(xpathObj) > 0) {
xmlNode *aborted = getXpathResult(xpathObj, 0);
abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted);
goto bail;
}
freeXpathObject(xpathObj);
// Check for lrm_resource entries
xpathObj = xpath_search(diff,
"//" F_CIB_UPDATE_RESULT
"//" XML_TAG_DIFF_ADDED
"//" XML_LRM_TAG_RESOURCE);
max = numXpathResults(xpathObj);
/*
* Updates by, or in response to, graph actions will never affect more than
* one resource at a time, so such updates indicate an LRM refresh. In that
* case, start a new transition rather than check each result individually,
* which can result in _huge_ speedups in large clusters.
*
* Unfortunately, we can only do so when there are no pending actions.
* Otherwise, we could mistakenly throw away those results here, and
* the cluster will stall waiting for them and time out the operation.
*/
if ((transition_graph->pending == 0) && (max > 1)) {
crm_debug("Ignoring resource operation updates due to history refresh of %d resources",
max);
crm_log_xml_trace(diff, "lrm-refresh");
abort_transition(INFINITY, tg_restart, "History refresh", NULL);
goto bail;
}
if (max == 1) {
xmlNode *lrm_resource = getXpathResult(xpathObj, 0);
if (shutdown_lock_cleared(lrm_resource)) {
// @TODO would be more efficient to abort once after transition done
abort_transition(INFINITY, tg_restart, "Shutdown lock cleared",
lrm_resource);
// Still process results, so we stop timers and update failcounts
}
}
freeXpathObject(xpathObj);
/* Process operation updates */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP);
max = numXpathResults(xpathObj);
if (max > 0) {
int lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
const char *node = get_node_id(rsc_op);
process_graph_event(rsc_op, node);
}
}
freeXpathObject(xpathObj);
/* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */
xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP);
max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
int path_max = 0;
const char *op_id = NULL;
char *rsc_op_xpath = NULL;
xmlXPathObject *op_match = NULL;
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match == NULL) { continue; };
op_id = ID(match);
path_max = strlen(RSC_OP_TEMPLATE) + strlen(op_id) + 1;
rsc_op_xpath = calloc(1, path_max);
snprintf(rsc_op_xpath, path_max, RSC_OP_TEMPLATE, op_id);
op_match = xpath_search(diff, rsc_op_xpath);
if (numXpathResults(op_match) == 0) {
/* Prevent false positives by matching cancelations too */
const char *node = get_node_id(match);
pcmk__graph_action_t *cancelled = get_cancel_action(op_id, node);
if (cancelled == NULL) {
crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id,
node);
abort_transition(INFINITY, tg_restart, "Resource op removal", match);
freeXpathObject(op_match);
free(rsc_op_xpath);
goto bail;
} else {
crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d",
op_id, node, cancelled->id);
}
}
freeXpathObject(op_match);
free(rsc_op_xpath);
}
bail:
freeXpathObject(xpathObj);
}
static void
process_lrm_resource_diff(xmlNode *lrm_resource, const char *node)
{
for (xmlNode *rsc_op = pcmk__xml_first_child(lrm_resource); rsc_op != NULL;
rsc_op = pcmk__xml_next(rsc_op)) {
process_graph_event(rsc_op, node);
}
if (shutdown_lock_cleared(lrm_resource)) {
// @TODO would be more efficient to abort once after transition done
abort_transition(INFINITY, tg_restart, "Shutdown lock cleared",
lrm_resource);
}
}
static void
process_resource_updates(const char *node, xmlNode *xml, xmlNode *change,
const char *op, const char *xpath)
{
xmlNode *rsc = NULL;
if (xml == NULL) {
return;
}
if (strcmp(TYPE(xml), XML_CIB_TAG_LRM) == 0) {
xml = first_named_child(xml, XML_LRM_TAG_RESOURCES);
CRM_CHECK(xml != NULL, return);
}
CRM_CHECK(strcmp(TYPE(xml), XML_LRM_TAG_RESOURCES) == 0, return);
/*
* Updates by, or in response to, TE actions will never contain updates
* for more than one resource at a time, so such updates indicate an
* LRM refresh.
*
* In that case, start a new transition rather than check each result
* individually, which can result in _huge_ speedups in large clusters.
*
* Unfortunately, we can only do so when there are no pending actions.
* Otherwise, we could mistakenly throw away those results here, and
* the cluster will stall waiting for them and time out the operation.
*/
if ((transition_graph->pending == 0)
&& xml->children && xml->children->next) {
crm_log_xml_trace(change, "lrm-refresh");
abort_transition(INFINITY, tg_restart, "History refresh", NULL);
return;
}
for (rsc = pcmk__xml_first_child(xml); rsc != NULL;
rsc = pcmk__xml_next(rsc)) {
crm_trace("Processing %s", ID(rsc));
process_lrm_resource_diff(rsc, node);
}
}
static char *extract_node_uuid(const char *xpath)
{
char *mutable_path = strdup(xpath);
char *node_uuid = NULL;
char *search = NULL;
char *match = NULL;
match = strstr(mutable_path, "node_state[@id=\'");
if (match == NULL) {
free(mutable_path);
return NULL;
}
match += strlen("node_state[@id=\'");
search = strchr(match, '\'');
if (search == NULL) {
free(mutable_path);
return NULL;
}
search[0] = 0;
node_uuid = strdup(match);
free(mutable_path);
return node_uuid;
}
static void
abort_unless_down(const char *xpath, const char *op, xmlNode *change,
const char *reason)
{
char *node_uuid = NULL;
pcmk__graph_action_t *down = NULL;
if(!pcmk__str_eq(op, "delete", pcmk__str_casei)) {
abort_transition(INFINITY, tg_restart, reason, change);
return;
}
node_uuid = extract_node_uuid(xpath);
if(node_uuid == NULL) {
crm_err("Could not extract node ID from %s", xpath);
abort_transition(INFINITY, tg_restart, reason, change);
return;
}
down = match_down_event(node_uuid);
if (down == NULL) {
crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath);
abort_transition(INFINITY, tg_restart, reason, change);
} else {
crm_trace("Expecting changes to %s (%s)", node_uuid, xpath);
}
free(node_uuid);
}
static void
process_op_deletion(const char *xpath, xmlNode *change)
{
char *mutable_key = strdup(xpath);
char *key;
char *node_uuid;
// Extract the part of xpath between last pair of single quotes
key = strrchr(mutable_key, '\'');
if (key != NULL) {
*key = '\0';
key = strrchr(mutable_key, '\'');
}
if (key == NULL) {
crm_warn("Ignoring malformed CIB update (resource deletion of %s)",
xpath);
free(mutable_key);
return;
}
++key;
node_uuid = extract_node_uuid(xpath);
if (confirm_cancel_action(key, node_uuid) == FALSE) {
abort_transition(INFINITY, tg_restart, "Resource operation removal",
change);
}
free(mutable_key);
free(node_uuid);
}
static void
process_delete_diff(const char *xpath, const char *op, xmlNode *change)
{
if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) {
process_op_deletion(xpath, change);
} else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) {
abort_unless_down(xpath, op, change, "Resource state removal");
} else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) {
abort_unless_down(xpath, op, change, "Node state removal");
} else {
crm_trace("Ignoring delete of %s", xpath);
}
}
static void
process_node_state_diff(xmlNode *state, xmlNode *change, const char *op,
const char *xpath)
{
xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);
process_resource_updates(ID(state), lrm, change, op, xpath);
}
static void
process_status_diff(xmlNode *status, xmlNode *change, const char *op,
const char *xpath)
{
for (xmlNode *state = pcmk__xml_first_child(status); state != NULL;
state = pcmk__xml_next(state)) {
process_node_state_diff(state, change, op, xpath);
}
}
static void
process_cib_diff(xmlNode *cib, xmlNode *change, const char *op,
const char *xpath)
{
xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS);
xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION);
if (status) {
process_status_diff(status, change, op, xpath);
}
if (config) {
abort_transition(INFINITY, tg_restart,
"Non-status-only change", change);
}
}
static void
te_update_diff_v2(xmlNode *diff)
{
crm_log_xml_trace(diff, "Patch:Raw");
for (xmlNode *change = pcmk__xml_first_child(diff); change != NULL;
change = pcmk__xml_next(change)) {
xmlNode *match = NULL;
const char *name = NULL;
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
// Possible ops: create, modify, delete, move
const char *op = crm_element_value(change, XML_DIFF_OP);
// Ignore uninteresting updates
if (op == NULL) {
continue;
} else if (xpath == NULL) {
crm_trace("Ignoring %s change for version field", op);
continue;
} else if (strcmp(op, "move") == 0) {
crm_trace("Ignoring move change at %s", xpath);
continue;
}
// Find the result of create/modify ops
if (strcmp(op, "create") == 0) {
match = change->children;
} else if (strcmp(op, "modify") == 0) {
match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
match = match->children;
}
} else if (strcmp(op, "delete") != 0) {
crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)",
op, xpath);
continue;
}
if (match) {
if (match->type == XML_COMMENT_NODE) {
crm_trace("Ignoring %s operation for comment at %s", op, xpath);
continue;
}
name = (const char *)match->name;
}
crm_trace("Handling %s operation for %s%s%s",
op, (xpath? xpath : "CIB"),
(name? " matched by " : ""), (name? name : ""));
if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) {
abort_transition(INFINITY, tg_restart, "Configuration change",
change);
break; // Won't be packaged with operation results we may be waiting for
} else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS)
|| pcmk__str_eq(name, XML_CIB_TAG_TICKETS, pcmk__str_casei)) {
abort_transition(INFINITY, tg_restart, "Ticket attribute change", change);
break; // Won't be packaged with operation results we may be waiting for
} else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[")
|| pcmk__str_eq(name, XML_TAG_TRANSIENT_NODEATTRS, pcmk__str_casei)) {
abort_unless_down(xpath, op, change, "Transient attribute change");
break; // Won't be packaged with operation results we may be waiting for
} else if (strcmp(op, "delete") == 0) {
process_delete_diff(xpath, op, change);
} else if (name == NULL) {
crm_warn("Ignoring malformed CIB update (%s at %s has no result)",
op, xpath);
} else if (strcmp(name, XML_TAG_CIB) == 0) {
process_cib_diff(match, change, op, xpath);
} else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) {
process_status_diff(match, change, op, xpath);
} else if (strcmp(name, XML_CIB_TAG_STATE) == 0) {
process_node_state_diff(match, change, op, xpath);
} else if (strcmp(name, XML_CIB_TAG_LRM) == 0) {
process_resource_updates(ID(match), match, change, op, xpath);
} else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, "lrm");
process_resource_updates(local_node, match, change, op, xpath);
free(local_node);
} else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, "lrm");
process_lrm_resource_diff(match, local_node);
free(local_node);
} else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, "lrm");
process_graph_event(match, local_node);
free(local_node);
} else {
crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)",
op, xpath, name);
}
}
}
void
te_update_diff(const char *event, xmlNode * msg)
{
xmlNode *diff = NULL;
const char *op = NULL;
int rc = -EINVAL;
int format = 1;
int p_add[] = { 0, 0, 0 };
int p_del[] = { 0, 0, 0 };
CRM_CHECK(msg != NULL, return);
crm_element_value_int(msg, F_CIB_RC, &rc);
if (transition_graph == NULL) {
crm_trace("No graph");
return;
} else if (rc < pcmk_ok) {
crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
return;
} else if (transition_graph->complete
&& fsa_state != S_IDLE
&& fsa_state != S_TRANSITION_ENGINE
&& fsa_state != S_POLICY_ENGINE) {
crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state),
transition_graph->complete);
return;
}
op = crm_element_value(msg, F_CIB_OPERATION);
diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
xml_patch_versions(diff, p_add, p_del);
crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op,
p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2],
fsa_state2string(fsa_state));
crm_element_value_int(diff, "format", &format);
switch (format) {
case 1:
te_update_diff_v1(event, diff);
break;
case 2:
te_update_diff_v2(diff);
break;
default:
crm_warn("Ignoring malformed CIB update (unknown patch format %d)",
format);
}
}
gboolean
process_te_message(xmlNode * msg, xmlNode * xml_data)
{
const char *from = crm_element_value(msg, F_ORIG);
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
const char *ref = crm_element_value(msg, F_CRM_REFERENCE);
const char *op = crm_element_value(msg, F_CRM_TASK);
const char *type = crm_element_value(msg, F_CRM_MSG_TYPE);
crm_trace("Processing %s (%s) message", op, ref);
crm_log_xml_trace(msg, "ipc");
if (op == NULL) {
/* error */
} else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) {
crm_trace("Bad sys-to: %s", pcmk__s(sys_to, "missing"));
return FALSE;
} else if (pcmk__str_eq(op, CRM_OP_INVOKE_LRM, pcmk__str_casei)
&& pcmk__str_eq(sys_from, CRM_SYSTEM_LRMD, pcmk__str_casei)
/* && pcmk__str_eq(type, XML_ATTR_RESPONSE, pcmk__str_casei) */
) {
xmlXPathObject *xpathObj = NULL;
crm_log_xml_trace(msg, "Processing (N)ACK");
crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from);
xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP);
if (numXpathResults(xpathObj)) {
int lpc = 0, max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
const char *node = get_node_id(rsc_op);
process_graph_event(rsc_op, node);
}
freeXpathObject(xpathObj);
} else {
crm_log_xml_err(msg, "Invalid (N)ACK");
freeXpathObject(xpathObj);
return FALSE;
}
} else {
crm_err("Unknown command: %s::%s from %s", type, op, sys_from);
}
crm_trace("finished processing message");
return TRUE;
}
void
cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc));
}
}
/*!
* \brief Handle a timeout in node-to-node communication
*
* \param[in] data Pointer to graph action
*
* \return FALSE (indicating that source should be not be re-added)
*/
gboolean
action_timer_callback(gpointer data)
{
pcmk__graph_action_t *action = (pcmk__graph_action_t *) data;
const char *task = NULL;
const char *on_node = NULL;
const char *via_node = NULL;
CRM_CHECK(data != NULL, return FALSE);
stop_te_timer(action);
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
via_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (transition_graph->complete) {
crm_notice("Node %s did not send %s result (via %s) within %dms "
"(ignoring because transition not in progress)",
(on_node? on_node : ""), (task? task : "unknown action"),
(via_node? via_node : "controller"), action->timeout);
} else {
/* fail the action */
crm_err("Node %s did not send %s result (via %s) within %dms "
"(action timeout plus cluster-delay)",
(on_node? on_node : ""), (task? task : "unknown action"),
(via_node? via_node : "controller"),
action->timeout + transition_graph->network_delay);
pcmk__log_graph_action(LOG_ERR, action);
- crm__set_graph_action_flags(action, pcmk__graph_action_failed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
te_action_confirmed(action, transition_graph);
abort_transition(INFINITY, tg_restart, "Action lost", NULL);
// Record timeout in the CIB if appropriate
if ((action->type == pcmk__rsc_graph_action)
&& controld_action_is_recordable(task)) {
controld_record_action_timeout(action);
}
}
return FALSE;
}
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
index 63427f9ee5..fdf1e9bc76 100644
--- a/daemons/controld/controld_te_events.c
+++ b/daemons/controld/controld_te_events.c
@@ -1,508 +1,508 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <pacemaker-controld.h>
char *failed_stop_offset = NULL;
char *failed_start_offset = NULL;
gboolean
fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node)
{
const char *target_uuid = NULL;
const char *router = NULL;
const char *router_uuid = NULL;
xmlNode *last_action = NULL;
GList *gIter = NULL;
GList *gIter2 = NULL;
if (graph == NULL || graph->complete) {
return FALSE;
}
gIter = graph->synapses;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
/* We've already been here */
continue;
}
gIter2 = synapse->actions;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
if ((action->type == pcmk__pseudo_graph_action)
|| pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
} else if (action->type == pcmk__cluster_graph_action) {
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
continue;
}
}
target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (router) {
crm_node_t *node = crm_get_peer(0, router);
if (node) {
router_uuid = node->uuid;
}
}
if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
- crm__set_graph_action_flags(action, pcmk__graph_action_failed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
pcmk__set_synapse_flags(synapse, pcmk__synapse_failed);
last_action = action->xml;
stop_te_timer(action);
pcmk__update_graph(graph, action);
if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
crm_notice("Action %d (%s) was pending on %s (offline)",
action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
} else {
crm_info("Action %d (%s) is scheduled for %s (offline)",
action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
}
}
}
}
if (last_action != NULL) {
crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
abort_transition(INFINITY, tg_restart, "Node failure", last_action);
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Update failure-related node attributes if warranted
*
* \param[in] event XML describing operation that (maybe) failed
* \param[in] event_node_uuid Node that event occurred on
* \param[in] rc Actual operation return code
* \param[in] target_rc Expected operation return code
* \param[in] do_update If TRUE, do update regardless of operation type
* \param[in] ignore_failures If TRUE, update last failure but not fail count
*
* \return TRUE if this was not a direct nack, success or lrm status refresh
*/
static gboolean
update_failcount(xmlNode * event, const char *event_node_uuid, int rc,
int target_rc, gboolean do_update, gboolean ignore_failures)
{
guint interval_ms = 0;
char *task = NULL;
char *rsc_id = NULL;
const char *value = NULL;
const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
const char *on_uname = crm_peer_uname(event_node_uuid);
const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);
// Nothing needs to be done for success or status refresh
if (rc == target_rc) {
return FALSE;
} else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
id, rc, on_uname);
return FALSE;
}
/* Sanity check */
CRM_CHECK(on_uname != NULL, return TRUE);
CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
crm_err("Couldn't parse: %s", ID(event)); goto bail);
/* Decide whether update is necessary and what value to use */
if ((interval_ms > 0) || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)
|| pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) {
do_update = TRUE;
} else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) {
do_update = TRUE;
if (failed_start_offset == NULL) {
failed_start_offset = strdup(CRM_INFINITY_S);
}
value = failed_start_offset;
} else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) {
do_update = TRUE;
if (failed_stop_offset == NULL) {
failed_stop_offset = strdup(CRM_INFINITY_S);
}
value = failed_stop_offset;
}
/* Fail count will be either incremented or set to infinity */
if (!pcmk_str_is_infinity(value)) {
value = XML_NVPAIR_ATTR_VALUE "++";
}
if (do_update) {
char *now = pcmk__ttoa(time(NULL));
char *attr_name = NULL;
gboolean is_remote_node = FALSE;
if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
is_remote_node = TRUE;
}
crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
(ignore_failures? "last failure" : "failcount"),
rsc_id, on_uname, task, rc, value, now);
/* Update the fail count, if we're not ignoring failures */
if (!ignore_failures) {
attr_name = pcmk__failcount_name(rsc_id, task, interval_ms);
update_attrd(on_uname, attr_name, value, NULL, is_remote_node);
free(attr_name);
}
/* Update the last failure time (even if we're ignoring failures,
* so that failure can still be detected and shown, e.g. by crm_mon)
*/
attr_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
update_attrd(on_uname, attr_name, now, NULL, is_remote_node);
free(attr_name);
free(now);
}
bail:
free(rsc_id);
free(task);
return TRUE;
}
pcmk__graph_action_t *
controld_get_action(int id)
{
for (GList *item = transition_graph->synapses; item; item = item->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data;
for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data;
if (action->id == id) {
return action;
}
}
}
return NULL;
}
pcmk__graph_action_t *
get_cancel_action(const char *id, const char *node)
{
GList *gIter = NULL;
GList *gIter2 = NULL;
gIter = transition_graph->synapses;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
gIter2 = synapse->actions;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
const char *task = NULL;
const char *target = NULL;
pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
crm_trace("Wrong key %s for %s on %s", task, id, node);
continue;
}
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
crm_trace("Wrong node %s for %s on %s", target, id, node);
continue;
}
crm_trace("Found %s on %s", id, node);
return action;
}
}
return NULL;
}
bool
confirm_cancel_action(const char *id, const char *node_id)
{
const char *op_key = NULL;
const char *node_name = NULL;
pcmk__graph_action_t *cancel = get_cancel_action(id, node_id);
if (cancel == NULL) {
return FALSE;
}
op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY);
node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET);
stop_te_timer(cancel);
te_action_confirmed(cancel, transition_graph);
crm_info("Cancellation of %s on %s confirmed (action %d)",
op_key, node_name, cancel->id);
return TRUE;
}
/* downed nodes are listed like: <downed> <node id="UUID1" /> ... </downed> */
#define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
"/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']"
/*!
* \brief Find a transition event that would have made a specified node down
*
* \param[in] target UUID of node to match
*
* \return Matching event if found, NULL otherwise
*/
pcmk__graph_action_t *
match_down_event(const char *target)
{
pcmk__graph_action_t *match = NULL;
xmlXPathObjectPtr xpath_ret = NULL;
GList *gIter, *gIter2;
char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
for (gIter = transition_graph->synapses;
gIter != NULL && match == NULL;
gIter = gIter->next) {
for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions;
gIter2 != NULL && match == NULL;
gIter2 = gIter2->next) {
match = (pcmk__graph_action_t *) gIter2->data;
if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
xpath_ret = xpath_search(match->xml, xpath);
if (numXpathResults(xpath_ret) < 1) {
match = NULL;
}
freeXpathObject(xpath_ret);
} else {
// Only actions that were actually started can match
match = NULL;
}
}
}
free(xpath);
if (match != NULL) {
crm_debug("Shutdown action %d (%s) found for node %s", match->id,
crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target);
} else {
crm_debug("No reason to expect node %s to be down", target);
}
return match;
}
void
process_graph_event(xmlNode *event, const char *event_node)
{
int rc = -1; // Actual result
int target_rc = -1; // Expected result
int status = -1; // Executor status
int callid = -1; // Executor call ID
int transition_num = -1; // Transition number
int action_num = -1; // Action number within transition
char *update_te_uuid = NULL;
bool ignore_failures = FALSE;
const char *id = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *uname = NULL;
CRM_ASSERT(event != NULL);
/*
<lrm_rsc_op id="rsc_east-05_last_0" operation_key="rsc_east-05_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" transition-magic="0:7;9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" call-id="17" rc-code="7" op-status="0" interval="0" last-rc-change="1355361636" exec-time="128" queue-time="0" op-digest="c81f5f40b1c9e859c992e800b1aa6972"/>
*/
magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY);
if (magic == NULL) {
/* non-change */
return;
}
crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status);
if (status == PCMK_EXEC_PENDING) {
return;
}
id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
crm_element_value_int(event, XML_LRM_ATTR_RC, &rc);
crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid);
rc = pcmk__effective_rc(rc);
if (decode_transition_key(magic, &update_te_uuid, &transition_num,
&action_num, &target_rc) == FALSE) {
// decode_transition_key() already logged the bad key
crm_err("Can't process action %s result: Incompatible versions? "
CRM_XS " call-id=%d", id, callid);
abort_transition(INFINITY, tg_restart, "Bad event", event);
return;
}
if (transition_num == -1) {
// E.g. crm_resource --fail
desc = "initiated outside of the cluster";
abort_transition(INFINITY, tg_restart, "Unexpected event", event);
} else if ((action_num < 0) || !pcmk__str_eq(update_te_uuid, te_uuid, pcmk__str_none)) {
desc = "initiated by a different DC";
abort_transition(INFINITY, tg_restart, "Foreign event", event);
} else if ((transition_graph->id != transition_num)
|| (transition_graph->complete)) {
// Action is not from currently active transition
guint interval_ms = 0;
if (parse_op_key(id, NULL, NULL, &interval_ms)
&& (interval_ms != 0)) {
/* Recurring actions have the transition number they were first
* scheduled in.
*/
if (status == PCMK_EXEC_CANCELLED) {
confirm_cancel_action(id, get_node_id(event));
goto bail;
}
desc = "arrived after initial scheduling";
abort_transition(INFINITY, tg_restart, "Change in recurring result",
event);
} else if (transition_graph->id != transition_num) {
desc = "arrived really late";
abort_transition(INFINITY, tg_restart, "Old event", event);
} else {
desc = "arrived late";
abort_transition(INFINITY, tg_restart, "Inactive graph", event);
}
} else {
// Event is result of an action from currently active transition
pcmk__graph_action_t *action = controld_get_action(action_num);
if (action == NULL) {
// Should never happen
desc = "unknown";
abort_transition(INFINITY, tg_restart, "Unknown event", event);
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
/* Nothing further needs to be done if the action has already been
* confirmed. This can happen e.g. when processing both an
* "xxx_last_0" or "xxx_last_failure_0" record as well as the main
* history record, which would otherwise result in incorrectly
* bumping the fail count twice.
*/
crm_log_xml_debug(event, "Event already confirmed:");
goto bail;
} else {
/* An action result needs to be confirmed.
* (This is the only case where desc == NULL.)
*/
if (pcmk__str_eq(crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore", pcmk__str_casei)) {
ignore_failures = TRUE;
} else if (rc != target_rc) {
- crm__set_graph_action_flags(action, pcmk__graph_action_failed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
}
stop_te_timer(action);
te_action_confirmed(action, transition_graph);
if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) {
abort_transition(action->synapse->priority + 1, tg_restart,
"Event failed", event);
}
}
}
if (id == NULL) {
id = "unknown action";
}
uname = crm_element_value(event, XML_LRM_ATTR_TARGET);
if (uname == NULL) {
uname = "unknown node";
}
if (status == PCMK_EXEC_INVALID) {
// We couldn't attempt the action
crm_info("Transition %d action %d (%s on %s): %s",
transition_num, action_num, id, uname,
pcmk_exec_status_str(status));
} else if (desc && update_failcount(event, event_node, rc, target_rc,
(transition_num == -1), FALSE)) {
crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(target_rc),
services_ocf_exitcode_str(rc),
target_rc, rc, callid, desc);
} else if (desc) {
crm_info("Transition %d action %d (%s on %s): %s "
CRM_XS " rc=%d target-rc=%d call-id=%d",
transition_num, action_num, id, uname,
desc, rc, target_rc, callid);
} else if (rc == target_rc) {
crm_info("Transition %d action %d (%s on %s) confirmed: %s "
CRM_XS " rc=%d call-id=%d",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(rc), rc, callid);
} else {
update_failcount(event, event_node, rc, target_rc,
(transition_num == -1), ignore_failures);
crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
CRM_XS " target-rc=%d rc=%d call-id=%d",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(target_rc),
services_ocf_exitcode_str(rc),
target_rc, rc, callid);
}
bail:
free(update_te_uuid);
}
diff --git a/include/pcmki/pcmki_transition.h b/include/pcmki/pcmki_transition.h
index f71c39658d..7c4f77ad50 100644
--- a/include/pcmki/pcmki_transition.h
+++ b/include/pcmki/pcmki_transition.h
@@ -1,164 +1,164 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__PCMKI_PCMKI_TRANSITION__H
# define PCMK__PCMKI_PCMKI_TRANSITION__H
# include <glib.h>
# include <crm/crm.h>
# include <crm/msg_xml.h>
# include <crm/common/xml.h>
#ifdef __cplusplus
extern "C" {
#endif
enum pcmk__graph_action_type {
pcmk__pseudo_graph_action,
pcmk__rsc_graph_action,
pcmk__cluster_graph_action,
};
enum pcmk__synapse_flags {
pcmk__synapse_ready = (1 << 0),
pcmk__synapse_failed = (1 << 1),
pcmk__synapse_executed = (1 << 2),
pcmk__synapse_confirmed = (1 << 3),
};
typedef struct {
int id;
int priority;
uint32_t flags; // Group of pcmk__synapse_flags
GList *actions; /* pcmk__graph_action_t* */
GList *inputs; /* pcmk__graph_action_t* */
} pcmk__graph_synapse_t;
#define pcmk__set_synapse_flags(synapse, flags_to_set) do { \
(synapse)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Synapse", "synapse", \
(synapse)->flags, (flags_to_set), #flags_to_set); \
} while (0)
#define pcmk__clear_synapse_flags(synapse, flags_to_clear) do { \
(synapse)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Synapse", "synapse", \
(synapse)->flags, (flags_to_clear), #flags_to_clear); \
} while (0)
enum pcmk__graph_action_flags {
pcmk__graph_action_sent_update = (1 << 0), /* sent to the CIB */
pcmk__graph_action_executed = (1 << 1), /* sent to the CRM */
pcmk__graph_action_confirmed = (1 << 2),
pcmk__graph_action_failed = (1 << 3),
pcmk__graph_action_can_fail = (1 << 4), //! \deprecated Will be removed in a future release
};
typedef struct {
int id;
int timeout;
int timer;
guint interval_ms;
GHashTable *params;
enum pcmk__graph_action_type type;
pcmk__graph_synapse_t *synapse;
uint32_t flags; // Group of pcmk__graph_action_flags
xmlNode *xml;
} pcmk__graph_action_t;
-#define crm__set_graph_action_flags(action, flags_to_set) do { \
+#define pcmk__set_graph_action_flags(action, flags_to_set) do { \
(action)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Action", "action", \
(action)->flags, (flags_to_set), #flags_to_set); \
} while (0)
-#define crm__clear_graph_action_flags(action, flags_to_clear) do { \
+#define pcmk__clear_graph_action_flags(action, flags_to_clear) do { \
(action)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Action", "action", \
(action)->flags, (flags_to_clear), #flags_to_clear); \
} while (0)
/* order matters here */
enum transition_action {
tg_done,
tg_stop,
tg_restart,
tg_shutdown,
};
typedef struct {
int id;
char *source;
int abort_priority;
gboolean complete;
const char *abort_reason;
enum transition_action completion_action;
int num_actions;
int num_synapses;
int batch_limit;
guint network_delay;
guint stonith_timeout;
int fired;
int pending;
int skipped;
int completed;
int incomplete;
GList *synapses; /* pcmk__graph_synapse_t* */
int migration_limit;
} pcmk__graph_t;
typedef struct {
gboolean (*pseudo) (pcmk__graph_t *graph, pcmk__graph_action_t *action);
gboolean (*rsc) (pcmk__graph_t *graph, pcmk__graph_action_t *action);
gboolean (*crmd) (pcmk__graph_t *graph, pcmk__graph_action_t *action);
gboolean (*stonith) (pcmk__graph_t *graph, pcmk__graph_action_t *action);
gboolean (*allowed) (pcmk__graph_t *graph, pcmk__graph_action_t *action);
} pcmk__graph_functions_t;
enum transition_status {
transition_active,
transition_pending, /* active but no actions performed this time */
transition_complete,
transition_terminated,
};
void pcmk__set_graph_functions(pcmk__graph_functions_t *fns);
pcmk__graph_t *pcmk__unpack_graph(xmlNode *xml_graph, const char *reference);
enum transition_status pcmk__execute_graph(pcmk__graph_t *graph);
void pcmk__update_graph(pcmk__graph_t *graph, pcmk__graph_action_t *action);
void pcmk__free_graph(pcmk__graph_t *graph);
const char *pcmk__graph_status2text(enum transition_status state);
void pcmk__log_graph(unsigned int log_level, pcmk__graph_t *graph);
void pcmk__log_graph_action(int log_level, pcmk__graph_action_t *action);
lrmd_event_data_t *pcmk__event_from_graph_action(xmlNode *resource,
pcmk__graph_action_t *action,
int status, int rc,
const char *exit_reason);
#ifdef __cplusplus
}
#endif
#endif
diff --git a/lib/pacemaker/pcmk_graph_consumer.c b/lib/pacemaker/pcmk_graph_consumer.c
index ab1c87c604..382932e9da 100644
--- a/lib/pacemaker/pcmk_graph_consumer.c
+++ b/lib/pacemaker/pcmk_graph_consumer.c
@@ -1,856 +1,858 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/lrmd_internal.h>
#include <pacemaker-internal.h>
/*
* Functions for updating graph
*/
/*!
* \internal
* \brief Update synapse after completed prerequisite
*
* A synapse is ready to be executed once all its prerequisite actions (inputs)
* complete. Given a completed action, check whether it is an input for a given
* synapse, and if so, mark the input as confirmed, and mark the synapse as
* ready if appropriate.
*
* \param[in] synapse Transition graph synapse to update
* \param[in] action_id ID of an action that completed
*
* \note The only substantial effect here is confirming synapse inputs.
* should_fire_synapse() will recalculate pcmk__synapse_ready, so the only
* thing that uses the pcmk__synapse_ready from here is
* synapse_state_str().
*/
static void
update_synapse_ready(pcmk__graph_synapse_t *synapse, int action_id)
{
if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
return; // All inputs have already been confirmed
}
pcmk__set_synapse_flags(synapse, pcmk__synapse_ready); // Presume ready until proven otherwise
for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
pcmk__graph_action_t *prereq = (pcmk__graph_action_t *) lpc->data;
if (prereq->id == action_id) {
crm_trace("Confirming input %d of synapse %d",
action_id, synapse->id);
- crm__set_graph_action_flags(prereq, pcmk__graph_action_confirmed);
+ pcmk__set_graph_action_flags(prereq, pcmk__graph_action_confirmed);
} else if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) {
pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
crm_trace("Synapse %d still not ready after action %d",
synapse->id, action_id);
}
}
if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
crm_trace("Synapse %d is now ready to execute", synapse->id);
}
}
/*!
* \internal
* \brief Update action and synapse confirmation after action completion
*
* \param[in] synapse Transition graph synapse that action belongs to
* \param[in] action_id ID of action that completed
*/
static void
update_synapse_confirmed(pcmk__graph_synapse_t *synapse, int action_id)
{
bool all_confirmed = true;
for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc->data;
if (action->id == action_id) {
crm_trace("Confirmed action %d of synapse %d",
action_id, synapse->id);
- crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
} else if (all_confirmed && !(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
all_confirmed = false;
crm_trace("Synapse %d still not confirmed after action %d",
synapse->id, action_id);
}
}
if (all_confirmed && !(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) {
crm_trace("Confirmed synapse %d", synapse->id);
pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed);
}
}
/*!
* \internal
* \brief Update the transition graph with a completed action result
*
* \param[in,out] graph Transition graph to update
* \param[in] action Action that completed
*/
void
pcmk__update_graph(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
continue; // This synapse already completed
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
update_synapse_confirmed(synapse, action->id);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_failed)) || (synapse->priority == INFINITY)) {
update_synapse_ready(synapse, action->id);
}
}
}
/*
* Functions for executing graph
*/
/* A transition graph consists of various types of actions. The library caller
* registers execution functions for each action type, which will be stored
* here.
*/
static pcmk__graph_functions_t *graph_fns = NULL;
/*!
* \internal
* \brief Set transition graph execution functions
*
* \param[in] Execution functions to use
*/
void
pcmk__set_graph_functions(pcmk__graph_functions_t *fns)
{
crm_debug("Setting custom functions for executing transition graphs");
graph_fns = fns;
CRM_ASSERT(graph_fns != NULL);
CRM_ASSERT(graph_fns->rsc != NULL);
CRM_ASSERT(graph_fns->crmd != NULL);
CRM_ASSERT(graph_fns->pseudo != NULL);
CRM_ASSERT(graph_fns->stonith != NULL);
}
/*!
* \internal
* \brief Check whether a graph synapse is ready to be executed
*
* \param[in] graph Transition graph that synapse is part of
* \param[in] synapse Synapse to check
*
* \return true if synapse is ready, false otherwise
*/
static bool
should_fire_synapse(pcmk__graph_t *graph, pcmk__graph_synapse_t *synapse)
{
GList *lpc = NULL;
pcmk__set_synapse_flags(synapse, pcmk__synapse_ready);
for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
pcmk__graph_action_t *prereq = (pcmk__graph_action_t *) lpc->data;
if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) {
crm_trace("Input %d for synapse %d not yet confirmed",
prereq->id, synapse->id);
pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
break;
} else if (pcmk_is_set(prereq->flags, pcmk__graph_action_failed) && !(pcmk_is_set(prereq->flags, pcmk__graph_action_can_fail))) {
crm_trace("Input %d for synapse %d confirmed but failed",
prereq->id, synapse->id);
pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
break;
}
}
if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
crm_trace("Synapse %d is ready to execute", synapse->id);
} else {
return false;
}
for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
pcmk__graph_action_t *a = (pcmk__graph_action_t *) lpc->data;
if (a->type == pcmk__pseudo_graph_action) {
/* None of the below applies to pseudo ops */
} else if (synapse->priority < graph->abort_priority) {
crm_trace("Skipping synapse %d: priority %d is less than "
"abort priority %d",
synapse->id, synapse->priority, graph->abort_priority);
graph->skipped++;
return false;
} else if (graph_fns->allowed && !(graph_fns->allowed(graph, a))) {
crm_trace("Deferring synapse %d: not allowed", synapse->id);
return false;
}
}
return true;
}
/*!
* \internal
* \brief Initiate an action from a transition graph
*
* \param[in] graph Transition graph containing action
* \param[in] action Action to execute
*
* \return Standard Pacemaker return code
*/
static int
initiate_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *id = ID(action->xml);
CRM_CHECK(id != NULL, return EINVAL);
CRM_CHECK(!pcmk_is_set(action->flags, pcmk__graph_action_executed),
return pcmk_rc_already);
- crm__set_graph_action_flags(action, pcmk__graph_action_executed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
switch (action->type) {
case pcmk__pseudo_graph_action:
crm_trace("Executing pseudo-action %d (%s)", action->id, id);
return graph_fns->pseudo(graph, action)? pcmk_rc_ok : pcmk_rc_error;
case pcmk__rsc_graph_action:
crm_trace("Executing resource action %d (%s)", action->id, id);
return graph_fns->rsc(graph, action)? pcmk_rc_ok : pcmk_rc_error;
case pcmk__cluster_graph_action:
if (pcmk__str_eq(crm_element_value(action->xml, XML_LRM_ATTR_TASK),
CRM_OP_FENCE, pcmk__str_casei)) {
crm_trace("Executing fencing action %d (%s)",
action->id, id);
return graph_fns->stonith(graph, action)? pcmk_rc_ok : pcmk_rc_error;
}
crm_trace("Executing control action %d (%s)", action->id, id);
return graph_fns->crmd(graph, action)? pcmk_rc_ok : pcmk_rc_error;
default:
crm_err("Unsupported graph action type <%s id='%s'> (bug?)",
crm_element_name(action->xml), id);
return EINVAL;
}
}
/*!
* \internal
* \brief Execute a graph synapse
*
* \param[in] graph Transition graph with synapse to execute
* \param[in] synapse Synapse to execute
*
* \return Standard Pacemaker return value
*/
static int
fire_synapse(pcmk__graph_t *graph, pcmk__graph_synapse_t *synapse)
{
pcmk__set_synapse_flags(synapse, pcmk__synapse_executed);
for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc->data;
if (initiate_action(graph, action) != pcmk_rc_ok) {
crm_err("Failed initiating <%s id=%d> in synapse %d",
crm_element_name(action->xml), action->id, synapse->id);
pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed);
- crm__set_graph_action_flags(action, pcmk__graph_action_confirmed | pcmk__graph_action_failed);
+ pcmk__set_graph_action_flags(action,
+ pcmk__graph_action_confirmed
+ |pcmk__graph_action_failed);
return pcmk_rc_error;
}
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Dummy graph method that can be used with simulations
*
* \param[in] graph Transition graph containing action
* \param[in] action Action to be initiated
*
* \retval TRUE Action initiation was (simulated to be) successful
* \retval FALSE Action initiation was (simulated to be) failed (due to the
* PE_fail environment variable being set to the action ID)
*/
static gboolean
pseudo_action_dummy(pcmk__graph_t * graph, pcmk__graph_action_t *action)
{
static int fail = -1;
if (fail < 0) {
long long fail_ll;
if ((pcmk__scan_ll(getenv("PE_fail"), &fail_ll, 0LL) == pcmk_rc_ok)
&& (fail_ll > 0LL) && (fail_ll <= INT_MAX)) {
fail = (int) fail_ll;
} else {
fail = 0;
}
}
if (action->id == fail) {
crm_err("Dummy event handler: pretending action %d failed", action->id);
- crm__set_graph_action_flags(action, pcmk__graph_action_failed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
graph->abort_priority = INFINITY;
} else {
crm_trace("Dummy event handler: action %d initiated", action->id);
}
- crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(graph, action);
return TRUE;
}
static pcmk__graph_functions_t default_fns = {
pseudo_action_dummy,
pseudo_action_dummy,
pseudo_action_dummy,
pseudo_action_dummy
};
/*!
* \internal
* \brief Execute all actions in a transition graph
*
* \param[in] graph Transition graph to execute
*
* \return Status of transition after execution
*/
enum transition_status
pcmk__execute_graph(pcmk__graph_t *graph)
{
GList *lpc = NULL;
int log_level = LOG_DEBUG;
enum transition_status pass_result = transition_active;
const char *status = "In progress";
if (graph_fns == NULL) {
graph_fns = &default_fns;
}
if (graph == NULL) {
return transition_complete;
}
graph->fired = 0;
graph->pending = 0;
graph->skipped = 0;
graph->completed = 0;
graph->incomplete = 0;
// Count completed and in-flight synapses
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
graph->completed++;
} else if (!(pcmk_is_set(synapse->flags, pcmk__synapse_failed)) && pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
graph->pending++;
}
}
crm_trace("Executing graph %d (%d synapses already completed, %d pending)",
graph->id, graph->completed, graph->pending);
// Execute any synapses that are ready
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
if ((graph->batch_limit > 0)
&& (graph->pending >= graph->batch_limit)) {
crm_debug("Throttling graph execution: batch limit (%d) reached",
graph->batch_limit);
break;
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_failed)) {
graph->skipped++;
continue;
} else if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_executed)) {
continue; // Already handled
} else if (should_fire_synapse(graph, synapse)) {
graph->fired++;
if (fire_synapse(graph, synapse) != pcmk_rc_ok) {
crm_err("Synapse %d failed to fire", synapse->id);
log_level = LOG_ERR;
graph->abort_priority = INFINITY;
graph->incomplete++;
graph->fired--;
}
if (!(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) {
graph->pending++;
}
} else {
crm_trace("Synapse %d cannot fire", synapse->id);
graph->incomplete++;
}
}
if ((graph->pending == 0) && (graph->fired == 0)) {
graph->complete = TRUE;
if ((graph->incomplete != 0) && (graph->abort_priority <= 0)) {
log_level = LOG_WARNING;
pass_result = transition_terminated;
status = "Terminated";
} else if (graph->skipped != 0) {
log_level = LOG_NOTICE;
pass_result = transition_complete;
status = "Stopped";
} else {
log_level = LOG_NOTICE;
pass_result = transition_complete;
status = "Complete";
}
} else if (graph->fired == 0) {
pass_result = transition_pending;
}
do_crm_log(log_level,
"Transition %d (Complete=%d, Pending=%d,"
" Fired=%d, Skipped=%d, Incomplete=%d, Source=%s): %s",
graph->id, graph->completed, graph->pending, graph->fired,
graph->skipped, graph->incomplete, graph->source, status);
return pass_result;
}
/*
* Functions for unpacking transition graph XML into structs
*/
/*!
* \internal
* \brief Unpack a transition graph action from XML
*
* \param[in] parent Synapse that action is part of
* \param[in] xml_action Action XML to unparse
*
* \return Newly allocated action on success, or NULL otherwise
*/
static pcmk__graph_action_t *
unpack_action(pcmk__graph_synapse_t *parent, xmlNode *xml_action)
{
enum pcmk__graph_action_type action_type;
pcmk__graph_action_t *action = NULL;
const char *element = TYPE(xml_action);
const char *value = ID(xml_action);
if (value == NULL) {
crm_err("Ignoring transition graph action without id (bug?)");
crm_log_xml_trace(xml_action, "invalid");
return NULL;
}
if (pcmk__str_eq(element, XML_GRAPH_TAG_RSC_OP, pcmk__str_casei)) {
action_type = pcmk__rsc_graph_action;
} else if (pcmk__str_eq(element, XML_GRAPH_TAG_PSEUDO_EVENT,
pcmk__str_casei)) {
action_type = pcmk__pseudo_graph_action;
} else if (pcmk__str_eq(element, XML_GRAPH_TAG_CRM_EVENT,
pcmk__str_casei)) {
action_type = pcmk__cluster_graph_action;
} else {
crm_err("Ignoring transition graph action of unknown type '%s' (bug?)",
element);
crm_log_xml_trace(xml_action, "invalid");
return NULL;
}
action = calloc(1, sizeof(pcmk__graph_action_t));
if (action == NULL) {
crm_perror(LOG_CRIT, "Cannot unpack transition graph action");
crm_log_xml_trace(xml_action, "lost");
return NULL;
}
pcmk__scan_min_int(value, &(action->id), -1);
action->type = pcmk__rsc_graph_action;
action->xml = copy_xml(xml_action);
action->synapse = parent;
action->type = action_type;
action->params = xml2list(action->xml);
value = g_hash_table_lookup(action->params, "CRM_meta_timeout");
pcmk__scan_min_int(value, &(action->timeout), 0);
/* Take start-delay into account for the timeout of the action timer */
value = g_hash_table_lookup(action->params, "CRM_meta_start_delay");
{
int start_delay;
pcmk__scan_min_int(value, &start_delay, 0);
action->timeout += start_delay;
}
if (pcmk__guint_from_hash(action->params,
CRM_META "_" XML_LRM_ATTR_INTERVAL, 0,
&(action->interval_ms)) != pcmk_rc_ok) {
action->interval_ms = 0;
}
value = g_hash_table_lookup(action->params, "CRM_meta_can_fail");
if (value != NULL) {
gboolean can_fail = FALSE;
crm_str_to_boolean(value, &can_fail);
if (can_fail) {
- crm__set_graph_action_flags(action, pcmk__graph_action_can_fail);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_can_fail);
} else {
- crm__clear_graph_action_flags(action, pcmk__graph_action_can_fail);
+ pcmk__clear_graph_action_flags(action, pcmk__graph_action_can_fail);
}
#ifndef PCMK__COMPAT_2_0
if (pcmk_is_set(action->flags, pcmk__graph_action_can_fail)) {
crm_warn("Support for the can_fail meta-attribute is deprecated"
" and will be removed in a future release");
}
#endif
}
crm_trace("Action %d has timer set to %dms", action->id, action->timeout);
return action;
}
/*!
* \internal
* \brief Unpack transition graph synapse from XML
*
* \param[in] new_graph Transition graph that synapse is part of
* \param[in] xml_synapse Synapse XML
*
* \return Newly allocated synapse on success, or NULL otherwise
*/
static pcmk__graph_synapse_t *
unpack_synapse(pcmk__graph_t *new_graph, xmlNode *xml_synapse)
{
const char *value = NULL;
xmlNode *action_set = NULL;
pcmk__graph_synapse_t *new_synapse = NULL;
crm_trace("Unpacking synapse %s", ID(xml_synapse));
new_synapse = calloc(1, sizeof(pcmk__graph_synapse_t));
if (new_synapse == NULL) {
return NULL;
}
pcmk__scan_min_int(ID(xml_synapse), &(new_synapse->id), 0);
value = crm_element_value(xml_synapse, XML_CIB_ATTR_PRIORITY);
pcmk__scan_min_int(value, &(new_synapse->priority), 0);
CRM_CHECK(new_synapse->id >= 0, free(new_synapse);
return NULL);
new_graph->num_synapses++;
crm_trace("Unpacking synapse %s action sets",
crm_element_value(xml_synapse, XML_ATTR_ID));
for (action_set = first_named_child(xml_synapse, "action_set");
action_set != NULL; action_set = crm_next_same_xml(action_set)) {
for (xmlNode *action = pcmk__xml_first_child(action_set);
action != NULL; action = pcmk__xml_next(action)) {
pcmk__graph_action_t *new_action = unpack_action(new_synapse,
action);
if (new_action == NULL) {
continue;
}
crm_trace("Adding action %d to synapse %d",
new_action->id, new_synapse->id);
new_graph->num_actions++;
new_synapse->actions = g_list_append(new_synapse->actions,
new_action);
}
}
crm_trace("Unpacking synapse %s inputs", ID(xml_synapse));
for (xmlNode *inputs = first_named_child(xml_synapse, "inputs");
inputs != NULL; inputs = crm_next_same_xml(inputs)) {
for (xmlNode *trigger = first_named_child(inputs, "trigger");
trigger != NULL; trigger = crm_next_same_xml(trigger)) {
for (xmlNode *input = pcmk__xml_first_child(trigger);
input != NULL; input = pcmk__xml_next(input)) {
pcmk__graph_action_t *new_input = unpack_action(new_synapse,
input);
if (new_input == NULL) {
continue;
}
crm_trace("Adding input %d to synapse %d",
new_input->id, new_synapse->id);
new_synapse->inputs = g_list_append(new_synapse->inputs,
new_input);
}
}
}
return new_synapse;
}
/*!
* \internal
* \brief Unpack transition graph XML
*
* \param[in] xml_graph Transition graph XML to unpack
* \param[in] reference Where the XML came from (for logging)
*
* \return Newly allocated transition graph on success, NULL otherwise
* \note The caller is responsible for freeing the return value using
* pcmk__free_graph().
* \note The XML is expected to be structured like:
<transition_graph ...>
<synapse id="0">
<action_set>
<rsc_op id="2" ...>
...
</action_set>
<inputs>
<rsc_op id="1" ...
...
</inputs>
</synapse>
...
</transition_graph>
*/
pcmk__graph_t *
pcmk__unpack_graph(xmlNode *xml_graph, const char *reference)
{
pcmk__graph_t *new_graph = NULL;
const char *t_id = NULL;
const char *time = NULL;
new_graph = calloc(1, sizeof(pcmk__graph_t));
if (new_graph == NULL) {
return NULL;
}
new_graph->source = strdup((reference == NULL)? "unknown" : reference);
if (new_graph->source == NULL) {
free(new_graph);
return NULL;
}
new_graph->id = -1;
new_graph->abort_priority = 0;
new_graph->network_delay = 0;
new_graph->stonith_timeout = 0;
new_graph->completion_action = tg_done;
// Parse top-level attributes from <transition_graph>
if (xml_graph != NULL) {
t_id = crm_element_value(xml_graph, "transition_id");
CRM_CHECK(t_id != NULL, free(new_graph);
return NULL);
pcmk__scan_min_int(t_id, &(new_graph->id), -1);
time = crm_element_value(xml_graph, "cluster-delay");
CRM_CHECK(time != NULL, free(new_graph);
return NULL);
new_graph->network_delay = crm_parse_interval_spec(time);
time = crm_element_value(xml_graph, "stonith-timeout");
if (time == NULL) {
new_graph->stonith_timeout = new_graph->network_delay;
} else {
new_graph->stonith_timeout = crm_parse_interval_spec(time);
}
// Use 0 (dynamic limit) as default/invalid, -1 (no limit) as minimum
t_id = crm_element_value(xml_graph, "batch-limit");
if ((t_id == NULL)
|| (pcmk__scan_min_int(t_id, &(new_graph->batch_limit),
-1) != pcmk_rc_ok)) {
new_graph->batch_limit = 0;
}
t_id = crm_element_value(xml_graph, "migration-limit");
pcmk__scan_min_int(t_id, &(new_graph->migration_limit), -1);
}
// Unpack each child <synapse> element
for (xmlNode *synapse_xml = first_named_child(xml_graph, "synapse");
synapse_xml != NULL; synapse_xml = crm_next_same_xml(synapse_xml)) {
pcmk__graph_synapse_t *new_synapse = unpack_synapse(new_graph,
synapse_xml);
if (new_synapse != NULL) {
new_graph->synapses = g_list_append(new_graph->synapses,
new_synapse);
}
}
crm_debug("Unpacked transition %d from %s: %d actions in %d synapses",
new_graph->id, new_graph->source, new_graph->num_actions,
new_graph->num_synapses);
return new_graph;
}
/*
* Functions for freeing transition graph objects
*/
/*!
* \internal
* \brief Free a transition graph action object
*
* \param[in] user_data Action to free
*/
static void
free_graph_action(gpointer user_data)
{
pcmk__graph_action_t *action = user_data;
if (action->timer != 0) {
crm_warn("Cancelling timer for graph action %d", action->id);
g_source_remove(action->timer);
}
if (action->params != NULL) {
g_hash_table_destroy(action->params);
}
free_xml(action->xml);
free(action);
}
/*!
* \internal
* \brief Free a transition graph synapse object
*
* \param[in] user_data Synapse to free
*/
static void
free_graph_synapse(gpointer user_data)
{
pcmk__graph_synapse_t *synapse = user_data;
g_list_free_full(synapse->actions, free_graph_action);
g_list_free_full(synapse->inputs, free_graph_action);
free(synapse);
}
/*!
* \internal
* \brief Free a transition graph object
*
* \param[in] graph Transition graph to free
*/
void
pcmk__free_graph(pcmk__graph_t *graph)
{
if (graph != NULL) {
g_list_free_full(graph->synapses, free_graph_synapse);
free(graph->source);
free(graph);
}
}
/*
* Other transition graph utilities
*/
/*!
* \internal
* \brief Synthesize an executor event from a graph action
*
* \param[in] resource If not NULL, use greater call ID than in this XML
* \param[in] action Graph action
* \param[in] status What to use as event execution status
* \param[in] rc What to use as event exit status
* \param[in] exit_reason What to use as event exit reason
*
* \return Newly allocated executor event on success, or NULL otherwise
*/
lrmd_event_data_t *
pcmk__event_from_graph_action(xmlNode *resource, pcmk__graph_action_t *action,
int status, int rc, const char *exit_reason)
{
lrmd_event_data_t *op = NULL;
GHashTableIter iter;
const char *name = NULL;
const char *value = NULL;
xmlNode *action_resource = NULL;
CRM_CHECK(action != NULL, return NULL);
CRM_CHECK(action->type == pcmk__rsc_graph_action, return NULL);
action_resource = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "invalid");
return NULL);
op = lrmd_new_event(ID(action_resource),
crm_element_value(action->xml, XML_LRM_ATTR_TASK),
action->interval_ms);
lrmd__set_result(op, rc, status, exit_reason);
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
op->params = pcmk__strkey_table(free, free);
g_hash_table_iter_init(&iter, action->params);
while (g_hash_table_iter_next(&iter, (void **)&name, (void **)&value)) {
g_hash_table_insert(op->params, strdup(name), strdup(value));
}
for (xmlNode *xop = pcmk__xml_first_child(resource); xop != NULL;
xop = pcmk__xml_next(xop)) {
int tmp = 0;
crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp);
crm_debug("Got call_id=%d for %s", tmp, ID(resource));
if (tmp > op->call_id) {
op->call_id = tmp;
}
}
op->call_id++;
return op;
}
diff --git a/lib/pacemaker/pcmk_simulate.c b/lib/pacemaker/pcmk_simulate.c
index 3b2cb77298..88bad9dbe6 100644
--- a/lib/pacemaker/pcmk_simulate.c
+++ b/lib/pacemaker/pcmk_simulate.c
@@ -1,982 +1,982 @@
/*
* Copyright 2021-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/cib/internal.h>
#include <crm/common/output.h>
#include <crm/common/results.h>
#include <crm/pengine/pe_types.h>
#include <pacemaker-internal.h>
#include <pacemaker.h>
#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include "libpacemaker_private.h"
#define STATUS_PATH_MAX 512
static pcmk__output_t *out = NULL;
static cib_t *fake_cib = NULL;
static GList *fake_resource_list = NULL;
static GList *fake_op_fail_list = NULL;
static void set_effective_date(pe_working_set_t *data_set, bool print_original,
char *use_date);
/*!
* \internal
* \brief Create an action name for use in a dot graph
*
* \param[in] action Action to create name for
* \param[in] verbose If true, add action ID to name
*
* \return Newly allocated string with action name
* \note It is the caller's responsibility to free the result.
*/
static char *
create_action_name(pe_action_t *action, bool verbose)
{
char *action_name = NULL;
const char *prefix = "";
const char *action_host = NULL;
const char *clone_name = NULL;
const char *task = action->task;
if (action->node != NULL) {
action_host = action->node->details->uname;
} else if (!pcmk_is_set(action->flags, pe_action_pseudo)) {
action_host = "<none>";
}
if (pcmk__str_eq(action->task, RSC_CANCEL, pcmk__str_none)) {
prefix = "Cancel ";
task = action->cancel_task;
}
if (action->rsc != NULL) {
clone_name = action->rsc->clone_name;
}
if (clone_name != NULL) {
char *key = NULL;
guint interval_ms = 0;
if (pcmk__guint_from_hash(action->meta,
XML_LRM_ATTR_INTERVAL_MS, 0,
&interval_ms) != pcmk_rc_ok) {
interval_ms = 0;
}
if (pcmk__strcase_any_of(action->task, RSC_NOTIFY, RSC_NOTIFIED,
NULL)) {
const char *n_type = g_hash_table_lookup(action->meta,
"notify_key_type");
const char *n_task = g_hash_table_lookup(action->meta,
"notify_key_operation");
CRM_ASSERT(n_type != NULL);
CRM_ASSERT(n_task != NULL);
key = pcmk__notify_key(clone_name, n_type, n_task);
} else {
key = pcmk__op_key(clone_name, task, interval_ms);
}
if (action_host != NULL) {
action_name = crm_strdup_printf("%s%s %s",
prefix, key, action_host);
} else {
action_name = crm_strdup_printf("%s%s", prefix, key);
}
free(key);
} else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) {
const char *op = g_hash_table_lookup(action->meta, "stonith_action");
action_name = crm_strdup_printf("%s%s '%s' %s",
prefix, action->task, op, action_host);
} else if (action->rsc && action_host) {
action_name = crm_strdup_printf("%s%s %s",
prefix, action->uuid, action_host);
} else if (action_host) {
action_name = crm_strdup_printf("%s%s %s",
prefix, action->task, action_host);
} else {
action_name = crm_strdup_printf("%s", action->uuid);
}
if (verbose) {
char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id);
free(action_name);
action_name = with_id;
}
return action_name;
}
/*!
* \internal
* \brief Display the status of a cluster
*
* \param[in] data_set Cluster working set
* \param[in] show_opts How to modify display (as pcmk_show_opt_e flags)
* \param[in] section_opts Sections to display (as pcmk_section_e flags)
* \param[in] title What to use as list title
* \param[in] print_spacer Whether to display a spacer first
*/
static void
print_cluster_status(pe_working_set_t *data_set, uint32_t show_opts,
uint32_t section_opts, const char *title, bool print_spacer)
{
pcmk__output_t *out = data_set->priv;
GList *all = NULL;
crm_exit_t stonith_rc = 0;
section_opts |= pcmk_section_nodes | pcmk_section_resources;
show_opts |= pcmk_show_inactive_rscs | pcmk_show_failed_detail;
all = g_list_prepend(all, (gpointer) "*");
PCMK__OUTPUT_SPACER_IF(out, print_spacer);
out->begin_list(out, NULL, NULL, "%s", title);
out->message(out, "cluster-status", data_set, stonith_rc, NULL, FALSE,
section_opts, show_opts, NULL, all, all);
out->end_list(out);
g_list_free(all);
}
/*!
* \internal
* \brief Display a summary of all actions scheduled in a transition
*
* \param[in] data_set Cluster working set (fully scheduled)
* \param[in] print_spacer Whether to display a spacer first
*/
static void
print_transition_summary(pe_working_set_t *data_set, bool print_spacer)
{
pcmk__output_t *out = data_set->priv;
PCMK__OUTPUT_SPACER_IF(out, print_spacer);
out->begin_list(out, NULL, NULL, "Transition Summary");
pcmk__output_actions(data_set);
out->end_list(out);
}
/*!
* \internal
* \brief Reset a cluster working set's input, output, date, and flags
*
* \param[in] data_set Cluster working set
* \param[in] input What to set as cluster input
* \param[in] out What to set as cluster output object
* \param[in] use_date What to set as cluster's current timestamp
* \param[in] flags Cluster flags to add (pe_flag_*)
*/
static void
reset(pe_working_set_t *data_set, xmlNodePtr input, pcmk__output_t *out,
char *use_date, unsigned int flags)
{
data_set->input = input;
data_set->priv = out;
set_effective_date(data_set, true, use_date);
if (pcmk_is_set(flags, pcmk_sim_sanitized)) {
pe__set_working_set_flags(data_set, pe_flag_sanitized);
}
if (pcmk_is_set(flags, pcmk_sim_show_scores)) {
pe__set_working_set_flags(data_set, pe_flag_show_scores);
}
if (pcmk_is_set(flags, pcmk_sim_show_utilization)) {
pe__set_working_set_flags(data_set, pe_flag_show_utilization);
}
}
/*!
* \brief Write out a file in dot(1) format describing the actions that will
* be taken by the scheduler in response to an input CIB file.
*
* \param[in] data_set Working set for the cluster
* \param[in] dot_file The filename to write
* \param[in] all_actions Write all actions, even those that are optional or
* are on unmanaged resources
* \param[in] verbose Add extra information, such as action IDs, to the
* output
*
* \return Standard Pacemaker return code
*/
static int
write_sim_dotfile(pe_working_set_t *data_set, const char *dot_file,
bool all_actions, bool verbose)
{
GList *gIter = NULL;
FILE *dot_strm = fopen(dot_file, "w");
if (dot_strm == NULL) {
return errno;
}
fprintf(dot_strm, " digraph \"g\" {\n");
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
const char *style = "dashed";
const char *font = "black";
const char *color = "black";
char *action_name = create_action_name(action, verbose);
if (pcmk_is_set(action->flags, pe_action_pseudo)) {
font = "orange";
}
if (pcmk_is_set(action->flags, pe_action_dumped)) {
style = "bold";
color = "green";
} else if ((action->rsc != NULL)
&& !pcmk_is_set(action->rsc->flags, pe_rsc_managed)) {
color = "red";
font = "purple";
if (!all_actions) {
goto do_not_write;
}
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
color = "blue";
if (!all_actions) {
goto do_not_write;
}
} else {
color = "red";
CRM_LOG_ASSERT(!pcmk_is_set(action->flags, pe_action_runnable));
}
pe__set_action_flags(action, pe_action_dumped);
fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n",
action_name, style, color, font);
do_not_write:
free(action_name);
}
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
GList *gIter2 = NULL;
for (gIter2 = action->actions_before; gIter2 != NULL; gIter2 = gIter2->next) {
pe_action_wrapper_t *before = (pe_action_wrapper_t *) gIter2->data;
char *before_name = NULL;
char *after_name = NULL;
const char *style = "dashed";
bool optional = true;
if (before->state == pe_link_dumped) {
optional = false;
style = "bold";
} else if (before->type == pe_order_none) {
continue;
} else if (pcmk_is_set(before->action->flags, pe_action_dumped)
&& pcmk_is_set(action->flags, pe_action_dumped)
&& before->type != pe_order_load) {
optional = false;
}
if (all_actions || !optional) {
before_name = create_action_name(before->action, verbose);
after_name = create_action_name(action, verbose);
fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n",
before_name, after_name, style);
free(before_name);
free(after_name);
}
}
}
fprintf(dot_strm, "}\n");
fflush(dot_strm);
fclose(dot_strm);
return pcmk_rc_ok;
}
/*!
* \brief Profile the configuration updates and scheduler actions in a single
* CIB file, printing the profiling timings.
*
* \note \p data_set->priv must have been set to a valid \p pcmk__output_t
* object before this function is called.
*
* \param[in] xml_file The CIB file to profile
* \param[in] repeat Number of times to run
* \param[in] data_set Working set for the cluster
* \param[in] use_date The date to set the cluster's time to (may be NULL)
*/
static void
profile_file(const char *xml_file, long long repeat, pe_working_set_t *data_set,
char *use_date)
{
pcmk__output_t *out = data_set->priv;
xmlNode *cib_object = NULL;
clock_t start = 0;
clock_t end;
unsigned long long data_set_flags = pe_flag_no_compat;
CRM_ASSERT(out != NULL);
cib_object = filename2xml(xml_file);
start = clock();
if (pcmk_find_cib_element(cib_object, XML_CIB_TAG_STATUS) == NULL) {
create_xml_node(cib_object, XML_CIB_TAG_STATUS);
}
if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) {
free_xml(cib_object);
return;
}
if (validate_xml(cib_object, NULL, FALSE) != TRUE) {
free_xml(cib_object);
return;
}
if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) {
data_set_flags |= pe_flag_show_scores;
}
if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
data_set_flags |= pe_flag_show_utilization;
}
for (int i = 0; i < repeat; ++i) {
xmlNode *input = (repeat == 1)? cib_object : copy_xml(cib_object);
data_set->input = input;
set_effective_date(data_set, false, use_date);
pcmk__schedule_actions(input, data_set_flags, data_set);
pe_reset_working_set(data_set);
}
end = clock();
out->message(out, "profile", xml_file, start, end);
}
void
pcmk__profile_dir(const char *dir, long long repeat, pe_working_set_t *data_set, char *use_date)
{
pcmk__output_t *out = data_set->priv;
struct dirent **namelist;
int file_num = scandir(dir, &namelist, 0, alphasort);
CRM_ASSERT(out != NULL);
if (file_num > 0) {
struct stat prop;
char buffer[FILENAME_MAX];
out->begin_list(out, NULL, NULL, "Timings");
while (file_num--) {
if ('.' == namelist[file_num]->d_name[0]) {
free(namelist[file_num]);
continue;
} else if (!pcmk__ends_with_ext(namelist[file_num]->d_name,
".xml")) {
free(namelist[file_num]);
continue;
}
snprintf(buffer, sizeof(buffer), "%s/%s", dir, namelist[file_num]->d_name);
if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) {
profile_file(buffer, repeat, data_set, use_date);
}
free(namelist[file_num]);
}
free(namelist);
out->end_list(out);
}
}
/*!
* \brief Set the date of the cluster, either to the value given by
* \p use_date, or to the "execution-date" value in the CIB.
*
* \note \p data_set->priv must have been set to a valid \p pcmk__output_t
* object before this function is called.
*
* \param[in,out] data_set Working set for the cluster
* \param[in] print_original If \p true, the "execution-date" should
* also be printed
* \param[in] use_date The date to set the cluster's time to
* (may be NULL)
*/
static void
set_effective_date(pe_working_set_t *data_set, bool print_original,
char *use_date)
{
pcmk__output_t *out = data_set->priv;
time_t original_date = 0;
CRM_ASSERT(out != NULL);
crm_element_value_epoch(data_set->input, "execution-date", &original_date);
if (use_date) {
data_set->now = crm_time_new(use_date);
out->info(out, "Setting effective cluster time: %s", use_date);
crm_time_log(LOG_NOTICE, "Pretending 'now' is", data_set->now,
crm_time_log_date | crm_time_log_timeofday);
} else if (original_date) {
data_set->now = crm_time_new(NULL);
crm_time_set_timet(data_set->now, &original_date);
if (print_original) {
char *when = crm_time_as_string(data_set->now,
crm_time_log_date|crm_time_log_timeofday);
out->info(out, "Using the original execution date of: %s", when);
free(when);
}
}
}
/*!
* \internal
* \brief Simulate successfully executing a pseudo-action in a graph
*
* \param[in] graph Graph to update with pseudo-action result
* \param[in] action Pseudo-action to simulate executing
*
* \return TRUE
*/
static gboolean
simulate_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
- crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
out->message(out, "inject-pseudo-action", node, task);
pcmk__update_graph(graph, action);
return TRUE;
}
/*!
* \internal
* \brief Simulate executing a resource action in a graph
*
* \param[in] graph Graph to update with resource action result
* \param[in] action Resource action to simulate executing
*
* \return TRUE if action is validly specified, otherwise FALSE
*/
static gboolean
simulate_resource_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
int rc;
lrmd_event_data_t *op = NULL;
int target_outcome = PCMK_OCF_OK;
const char *rtype = NULL;
const char *rclass = NULL;
const char *resource = NULL;
const char *rprovider = NULL;
const char *resource_config_name = NULL;
const char *operation = crm_element_value(action->xml, "operation");
const char *target_rc_s = crm_meta_value(action->params,
XML_ATTR_TE_TARGET_RC);
xmlNode *cib_node = NULL;
xmlNode *cib_resource = NULL;
xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
char *uuid = NULL;
const char *router_node = crm_element_value(action->xml,
XML_LRM_ATTR_ROUTER_NODE);
// Certain actions don't need to be displayed or history entries
if (pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) {
crm_debug("No history injection for %s op on %s", operation, node);
goto done; // Confirm action and update graph
}
if (action_rsc == NULL) { // Shouldn't be possible
crm_log_xml_err(action->xml, "Bad");
free(node);
return FALSE;
}
/* A resource might be known by different names in the configuration and in
* the action (for example, a clone instance). Grab the configuration name
* (which is preferred when writing history), and if necessary, the instance
* name.
*/
resource_config_name = crm_element_value(action_rsc, XML_ATTR_ID);
if (resource_config_name == NULL) { // Shouldn't be possible
crm_log_xml_err(action->xml, "No ID");
free(node);
return FALSE;
}
resource = resource_config_name;
if (pe_find_resource(fake_resource_list, resource) == NULL) {
const char *longname = crm_element_value(action_rsc, XML_ATTR_ID_LONG);
if ((longname != NULL)
&& (pe_find_resource(fake_resource_list, longname) != NULL)) {
resource = longname;
}
}
// Certain actions need to be displayed but don't need history entries
if (pcmk__strcase_any_of(operation, "delete", RSC_METADATA, NULL)) {
out->message(out, "inject-rsc-action", resource, operation, node,
(guint) 0);
goto done; // Confirm action and update graph
}
rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS);
rtype = crm_element_value(action_rsc, XML_ATTR_TYPE);
rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER);
pcmk__scan_min_int(target_rc_s, &target_outcome, 0);
CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL,
cib_sync_call|cib_scope_local) == pcmk_ok);
// Ensure the action node is in the CIB
uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID);
cib_node = pcmk__inject_node(fake_cib, node,
((router_node == NULL)? uuid: node));
free(uuid);
CRM_ASSERT(cib_node != NULL);
// Add a history entry for the action
cib_resource = pcmk__inject_resource_history(out, cib_node, resource,
resource_config_name,
rclass, rtype, rprovider);
if (cib_resource == NULL) {
crm_err("Could not simulate action %d history for resource %s",
action->id, resource);
free(node);
free_xml(cib_node);
return FALSE;
}
// Simulate and display an executor event for the action result
op = pcmk__event_from_graph_action(cib_resource, action, PCMK_EXEC_DONE,
target_outcome, "User-injected result");
out->message(out, "inject-rsc-action", resource, op->op_type, node,
op->interval_ms);
// Check whether action is in a list of desired simulated failures
for (GList *iter = fake_op_fail_list; iter != NULL; iter = iter->next) {
char *spec = (char *) iter->data;
char *key = NULL;
const char *match_name = NULL;
// Allow user to specify anonymous clone with or without instance number
key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource, op->op_type,
op->interval_ms, node);
if (strncasecmp(key, spec, strlen(key)) == 0) {
match_name = resource;
}
free(key);
// If not found, try the resource's name in the configuration
if ((match_name == NULL)
&& (strcmp(resource, resource_config_name) != 0)) {
key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource_config_name,
op->op_type, op->interval_ms, node);
if (strncasecmp(key, spec, strlen(key)) == 0) {
match_name = resource_config_name;
}
free(key);
}
if (match_name == NULL) {
continue; // This failed action entry doesn't match
}
// ${match_name}_${task}_${interval_in_ms}@${node}=${rc}
rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc);
if (rc != 1) {
out->err(out, "Invalid failed operation '%s' "
"(result code must be integer)", spec);
continue; // Keep checking other list entries
}
out->info(out, "Pretending action %d failed with rc=%d",
action->id, op->rc);
- crm__set_graph_action_flags(action, pcmk__graph_action_failed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
graph->abort_priority = INFINITY;
pcmk__inject_failcount(out, cib_node, match_name, op->op_type,
op->interval_ms, op->rc);
break;
}
pcmk__inject_action_result(cib_resource, op, target_outcome);
lrmd_free_event(op);
rc = fake_cib->cmds->modify(fake_cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call|cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
done:
free(node);
free_xml(cib_node);
- crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(graph, action);
return TRUE;
}
/*!
* \internal
* \brief Simulate successfully executing a cluster action
*
* \param[in] graph Graph to update with action result
* \param[in] action Cluster action to simulate
*
* \return TRUE
*/
static gboolean
simulate_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
- crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
out->message(out, "inject-cluster-action", node, task, rsc);
pcmk__update_graph(graph, action);
return TRUE;
}
/*!
* \internal
* \brief Simulate successfully executing a fencing action
*
* \param[in] graph Graph to update with action result
* \param[in] action Fencing action to simulate
*
* \return TRUE
*/
static gboolean
simulate_fencing_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *op = crm_meta_value(action->params, "stonith_action");
char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
out->message(out, "inject-fencing-action", target, op);
if (!pcmk__str_eq(op, "on", pcmk__str_casei)) {
int rc = pcmk_ok;
char xpath[STATUS_PATH_MAX];
// Set node state to offline
xmlNode *cib_node = pcmk__inject_node_state_change(fake_cib, target,
false);
CRM_ASSERT(cib_node != NULL);
crm_xml_add(cib_node, XML_ATTR_ORIGIN, __func__);
rc = fake_cib->cmds->replace(fake_cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call|cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
// Simulate controller clearing node's resource history and attributes
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s",
target, XML_CIB_TAG_LRM);
fake_cib->cmds->remove(fake_cib, xpath, NULL,
cib_xpath|cib_sync_call|cib_scope_local);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s",
target, XML_TAG_TRANSIENT_NODEATTRS);
fake_cib->cmds->remove(fake_cib, xpath, NULL,
cib_xpath|cib_sync_call|cib_scope_local);
free_xml(cib_node);
}
- crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
+ pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(graph, action);
free(target);
return TRUE;
}
enum transition_status
pcmk__simulate_transition(pe_working_set_t *data_set, cib_t *cib,
GList *op_fail_list)
{
pcmk__graph_t *transition = NULL;
enum transition_status graph_rc;
pcmk__graph_functions_t simulation_fns = {
simulate_pseudo_action,
simulate_resource_action,
simulate_cluster_action,
simulate_fencing_action,
};
out = data_set->priv;
fake_cib = cib;
fake_op_fail_list = op_fail_list;
if (!out->is_quiet(out)) {
out->begin_list(out, NULL, NULL, "Executing Cluster Transition");
}
pcmk__set_graph_functions(&simulation_fns);
transition = pcmk__unpack_graph(data_set->graph, crm_system_name);
pcmk__log_graph(LOG_DEBUG, transition);
fake_resource_list = data_set->resources;
do {
graph_rc = pcmk__execute_graph(transition);
} while (graph_rc == transition_active);
fake_resource_list = NULL;
if (graph_rc != transition_complete) {
out->err(out, "Transition failed: %s",
pcmk__graph_status2text(graph_rc));
pcmk__log_graph(LOG_ERR, transition);
out->err(out, "An invalid transition was produced");
}
pcmk__free_graph(transition);
if (!out->is_quiet(out)) {
// If not quiet, we'll need the resulting CIB for later display
xmlNode *cib_object = NULL;
int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object,
cib_sync_call|cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
pe_reset_working_set(data_set);
data_set->input = cib_object;
out->end_list(out);
}
return graph_rc;
}
int
pcmk__simulate(pe_working_set_t *data_set, pcmk__output_t *out,
pcmk_injections_t *injections, unsigned int flags,
uint32_t section_opts, char *use_date, char *input_file,
char *graph_file, char *dot_file)
{
int printed = pcmk_rc_no_output;
int rc = pcmk_rc_ok;
xmlNodePtr input = NULL;
cib_t *cib = NULL;
rc = cib__signon_query(&cib, &input);
if (rc != pcmk_rc_ok) {
goto simulate_done;
}
reset(data_set, input, out, use_date, flags);
cluster_status(data_set);
if (!out->is_quiet(out)) {
if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) {
printed = out->message(out, "maint-mode", data_set->flags);
}
if (data_set->disabled_resources || data_set->blocked_resources) {
PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
printed = out->info(out,
"%d of %d resource instances DISABLED and "
"%d BLOCKED from further action due to failure",
data_set->disabled_resources,
data_set->ninstances,
data_set->blocked_resources);
}
/* Most formatted output headers use caps for each word, but this one
* only has the first word capitalized for compatibility with pcs.
*/
print_cluster_status(data_set,
pcmk_is_set(flags, pcmk_sim_show_pending)? pcmk_show_pending : 0,
section_opts, "Current cluster status",
(printed == pcmk_rc_ok));
printed = pcmk_rc_ok;
}
// If the user requested any injections, handle them
if ((injections->node_down != NULL)
|| (injections->node_fail != NULL)
|| (injections->node_up != NULL)
|| (injections->op_inject != NULL)
|| (injections->ticket_activate != NULL)
|| (injections->ticket_grant != NULL)
|| (injections->ticket_revoke != NULL)
|| (injections->ticket_standby != NULL)
|| (injections->watchdog != NULL)) {
PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
pcmk__inject_scheduler_input(data_set, cib, injections);
printed = pcmk_rc_ok;
rc = cib->cmds->query(cib, NULL, &input, cib_sync_call);
if (rc != pcmk_rc_ok) {
rc = pcmk_legacy2rc(rc);
goto simulate_done;
}
cleanup_calculations(data_set);
reset(data_set, input, out, use_date, flags);
cluster_status(data_set);
}
if (input_file != NULL) {
rc = write_xml_file(input, input_file, FALSE);
if (rc < 0) {
rc = pcmk_legacy2rc(rc);
goto simulate_done;
}
}
if (pcmk_any_flags_set(flags, pcmk_sim_process | pcmk_sim_simulate)) {
pcmk__output_t *logger_out = NULL;
unsigned long long data_set_flags = pe_flag_no_compat;
if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) {
data_set_flags |= pe_flag_show_scores;
}
if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
data_set_flags |= pe_flag_show_utilization;
}
if (pcmk_all_flags_set(data_set->flags,
pe_flag_show_scores|pe_flag_show_utilization)) {
PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
out->begin_list(out, NULL, NULL,
"Allocation Scores and Utilization Information");
printed = pcmk_rc_ok;
} else if (pcmk_is_set(data_set->flags, pe_flag_show_scores)) {
PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
out->begin_list(out, NULL, NULL, "Allocation Scores");
printed = pcmk_rc_ok;
} else if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
out->begin_list(out, NULL, NULL, "Utilization Information");
printed = pcmk_rc_ok;
} else {
rc = pcmk__log_output_new(&logger_out);
if (rc != pcmk_rc_ok) {
goto simulate_done;
}
pe__register_messages(logger_out);
pcmk__register_lib_messages(logger_out);
data_set->priv = logger_out;
}
pcmk__schedule_actions(input, data_set_flags, data_set);
if (logger_out == NULL) {
out->end_list(out);
} else {
logger_out->finish(logger_out, CRM_EX_OK, true, NULL);
pcmk__output_free(logger_out);
data_set->priv = out;
}
input = NULL; /* Don't try and free it twice */
if (graph_file != NULL) {
rc = write_xml_file(data_set->graph, graph_file, FALSE);
if (rc < 0) {
rc = pcmk_rc_graph_error;
goto simulate_done;
}
}
if (dot_file != NULL) {
rc = write_sim_dotfile(data_set, dot_file,
pcmk_is_set(flags, pcmk_sim_all_actions),
pcmk_is_set(flags, pcmk_sim_verbose));
if (rc != pcmk_rc_ok) {
rc = pcmk_rc_dot_error;
goto simulate_done;
}
}
if (!out->is_quiet(out)) {
print_transition_summary(data_set, printed == pcmk_rc_ok);
}
}
rc = pcmk_rc_ok;
if (!pcmk_is_set(flags, pcmk_sim_simulate)) {
goto simulate_done;
}
PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok);
if (pcmk__simulate_transition(data_set, cib,
injections->op_fail) != transition_complete) {
rc = pcmk_rc_invalid_transition;
}
if (out->is_quiet(out)) {
goto simulate_done;
}
set_effective_date(data_set, true, use_date);
if (pcmk_is_set(flags, pcmk_sim_show_scores)) {
pe__set_working_set_flags(data_set, pe_flag_show_scores);
}
if (pcmk_is_set(flags, pcmk_sim_show_utilization)) {
pe__set_working_set_flags(data_set, pe_flag_show_utilization);
}
cluster_status(data_set);
print_cluster_status(data_set, 0, section_opts, "Revised Cluster Status",
true);
simulate_done:
cib__clean_up_connection(&cib);
return rc;
}
int
pcmk_simulate(xmlNodePtr *xml, pe_working_set_t *data_set,
pcmk_injections_t *injections, unsigned int flags,
unsigned int section_opts, char *use_date, char *input_file,
char *graph_file, char *dot_file)
{
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__xml_output_new(&out, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
pe__register_messages(out);
pcmk__register_lib_messages(out);
rc = pcmk__simulate(data_set, out, injections, flags, section_opts,
use_date, input_file, graph_file, dot_file);
pcmk__xml_output_finish(out, xml);
return rc;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jul 8, 5:59 PM (1 d, 2 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2001669
Default Alt Text
(168 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment